rustdoc-search: update to stringdex 0.0.6

This update includes a few optimizations that reduce the size and index building time: - the wire format uses two bits to store four possibilities, instead of only handling three https://gitlab.com/notriddle/stringdex/-/merge_requests/34 - the hashes themselves are 40 bits instead of 48, and inlining is able to still fit enough data by storing runs https://gitlab.com/notriddle/stringdex/-/merge_requests/35 - scanning for duplicates takes advantage of the rarity of conflicts, using an array with 32 bit numbers and only pulling in the other 8 bits when actually needed https://gitlab.com/notriddle/stringdex/-/merge_requests/37
2026-04-26 13:01:27 +03:00 · 2026-03-12 19:01:39 -07:00
parent 3102493c71
commit 949570f801
3 changed files with 691 additions and 384 deletions
@@ -5372,9 +5372,9 @@ dependencies = [

 [[package]]
 name = "stringdex"
-version = "0.0.5"
+version = "0.0.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "07ab85c3f308f022ce6861ab57576b5b6ebc4835f9577e67e0f35f6c351e3f0a"
+checksum = "155cb460a7ede06f71ac9961e28d3ba4b3408355e233f8edd158b957ceba3950"
 dependencies = [
 "stacker",
 ]
@@ -22,7 +22,7 @@ rustdoc-json-types = { path = "../rustdoc-json-types" }
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1.0"
 smallvec = "1.8.1"
-stringdex = "=0.0.5"
+stringdex = "=0.0.6"
 tempfile = "3"
 threadpool = "1.8.1"
 tracing = "0.1"
@@ -55,21 +55,66 @@ class RoaringBitmap {
            this.consumed_len_bytes = pspecial - i;
            return this;
        } else if (u8array[i] > 0xe0) {
-            // Special representation of tiny sets that are runs
-            const lspecial = u8array[i] & 0x0f;
-            this.keysAndCardinalities = new Uint8Array(lspecial * 4);
-            i += 1;
-            const key = u8array[i + 2] | (u8array[i + 3] << 8);
-            const value = u8array[i] | (u8array[i + 1] << 8);
-            const container = new RoaringBitmapRun(1, new Uint8Array(4));
-            container.array[0] = value & 0xFF;
-            container.array[1] = (value >> 8) & 0xFF;
-            container.array[2] = lspecial - 1;
-            this.containers.push(container);
-            this.keysAndCardinalities[0] = key & 0xFF;
-            this.keysAndCardinalities[1] = (key >> 8) & 0xFF;
-            this.keysAndCardinalities[2] = lspecial - 1;
-            this.consumed_len_bytes = 5;
+            // Special representation of a node with multiple runs
+            const run_count_m1 = (u8array[i] & 0x0f);
+            const run_count = run_count_m1 + 1;
+            this.keysAndCardinalities = new Uint8Array(run_count * 4);
+            // the run keys and values
+            let pspecial = i + 1;
+            // the run lengths
+            let pnspecial = pspecial + (run_count * 4);
+            /** @type {number|null} */
+            let previous_key = null;
+            /** @type {RoaringBitmapRun|null} */
+            let previous_container = null;
+            for (let j = 0; j < run_count; j += 1) {
+                const key = u8array[pspecial + 2] | (u8array[pspecial + 3] << 8);
+                const value = u8array[pspecial] | (u8array[pspecial + 1] << 8);
+                const run_length_m1 = j % 2 === 0 ?
+                    (u8array[pnspecial] >> 4) :
+                    (u8array[pnspecial] & 0x0f);
+                if (j % 2 !== 0) {
+                    pnspecial += 1;
+                }
+                pspecial += 4;
+                if (key === previous_key && previous_container !== null) {
+                    const new_container_array = new Uint8Array(
+                        (previous_container.runcount + 1) * 4,
+                    );
+                    new_container_array.set(previous_container.array);
+                    new_container_array[previous_container.runcount * 4] = value & 0xFF;
+                    new_container_array[(previous_container.runcount * 4) + 1] =
+                        (value >> 8) & 0xFF;
+                    new_container_array[(previous_container.runcount * 4) + 2] =
+                        run_length_m1;
+                    previous_container.array = new_container_array;
+                    previous_container.runcount += 1;
+                    let cardinalitym1 =
+                        this.keysAndCardinalities[(this.containers.length * 4) - 2] |
+                        (this.keysAndCardinalities[(this.containers.length * 4) - 1] << 8);
+                    cardinalitym1 += run_length_m1 + 1;
+                    this.keysAndCardinalities[(this.containers.length * 4) - 2] =
+                        cardinalitym1 & 0xFF;
+                    this.keysAndCardinalities[(this.containers.length * 4) - 1] =
+                        (cardinalitym1 >> 8) & 0xFF;
+                } else {
+                    previous_key = key;
+                    previous_container = new RoaringBitmapRun(1, Uint8Array.of(
+                        value & 0xFF,
+                        (value >> 8) & 0xFF,
+                        run_length_m1,
+                        0,
+                    ));
+                    this.containers.push(previous_container);
+                    this.keysAndCardinalities[(this.containers.length * 4) - 4] = key & 0xFF;
+                    this.keysAndCardinalities[(this.containers.length * 4) - 3] = (key >> 8) & 0xFF;
+                    this.keysAndCardinalities[(this.containers.length * 4) - 2] = run_length_m1;
+                }
+            }
+            if (run_count % 2 !== 0) {
+                pnspecial += 1;
+            }
+            this.consumed_len_bytes = pnspecial - i;
            return this;
        } else if (u8array[i] > 0xd0) {
            // Special representation of tiny sets that are close together
@@ -100,6 +145,26 @@ class RoaringBitmap {
            }
            this.consumed_len_bytes = pspecial - i;
            return this;
+        } else if (u8array[i] > 0x80) {
+            // Special representation of tiny sets that are runs
+            const lspecial = u8array[i] & 0x3f;
+            const lspecialm1 = lspecial - 1;
+            this.keysAndCardinalities = new Uint8Array(4);
+            i += 1;
+            const key = u8array[i + 2] | (u8array[i + 3] << 8);
+            const value = u8array[i] | (u8array[i + 1] << 8);
+            const container = new RoaringBitmapRun(1, new Uint8Array(4));
+            container.array[0] = value & 0xFF;
+            container.array[1] = (value >> 8) & 0xFF;
+            container.array[2] = lspecialm1 & 0xFF;
+            container.array[3] = lspecialm1 >> 8;
+            this.containers.push(container);
+            this.keysAndCardinalities[0] = key & 0xFF;
+            this.keysAndCardinalities[1] = (key >> 8) & 0xFF;
+            this.keysAndCardinalities[2] = lspecialm1 & 0xFF;
+            this.keysAndCardinalities[3] = lspecialm1 >> 8;
+            this.consumed_len_bytes = 5;
+            return this;
        } else if (u8array[i] < 0x3a) {
            // Special representation of tiny sets with arbitrary 32-bit integers
            const lspecial = u8array[i];
@@ -801,7 +866,7 @@ class HashTable {
        for (let i = 0; i < l; i += 1) {
            const value = values[i];
            if (value !== undefined) {
-                yield [keys.subarray(i * 6, (i + 1) * 6), value];
+                yield [keys.subarray(i * 5, (i + 1) * 5), value];
            }
        }
    }
@@ -818,7 +883,7 @@ class HashTable {
            const l = values.length;
            this.capacityClass += 1;
            const capacity = 1 << this.capacityClass;
-            this.keys = new Uint8Array(capacity * 6);
+            this.keys = new Uint8Array(capacity * 5);
            this.values = [];
            for (let i = 0; i < capacity; i += 1) {
                this.values.push(undefined);
@@ -827,7 +892,7 @@ class HashTable {
            for (let i = 0; i < l; i += 1) {
                const oldValue = values[i];
                if (oldValue !== undefined) {
-                    this.setNoGrow(keys, i * 6, oldValue);
+                    this.setNoGrow(keys, i * 5, oldValue);
                }
            }
        }
@@ -844,25 +909,24 @@ class HashTable {
        const values = this.values;
        const l = 1 << this.capacityClass;
        // because we know that our values are already hashed,
-        // just chop off the lower four bytes
+        // just chop off the first byte
        let slot = (
-            (key[start + 2] << 24) |
-            (key[start + 3] << 16) |
-            (key[start + 4] << 8) |
-            key[start + 5]
+            (key[start + 1] << 24) |
+            (key[start + 2] << 16) |
+            (key[start + 3] << 8) |
+            key[start + 4]
        ) & mask;
        for (let distance = 0; distance < l; ) {
-            const j = slot * 6;
+            const j = slot * 5;
            const otherValue = values[slot];
            if (otherValue === undefined) {
                values[slot] = value;
-                const keysStart = slot * 6;
+                const keysStart = slot * 5;
                keys[keysStart + 0] = key[start + 0];
                keys[keysStart + 1] = key[start + 1];
                keys[keysStart + 2] = key[start + 2];
                keys[keysStart + 3] = key[start + 3];
                keys[keysStart + 4] = key[start + 4];
-                keys[keysStart + 5] = key[start + 5];
                this.size += 1;
                break;
            } else if (
@@ -870,15 +934,14 @@ class HashTable {
                key[start + 1] === keys[j + 1] &&
                key[start + 2] === keys[j + 2] &&
                key[start + 3] === keys[j + 3] &&
-                key[start + 4] === keys[j + 4] &&
-                key[start + 5] === keys[j + 5]
+                key[start + 4] === keys[j + 4]
            ) {
                values[slot] = value;
                break;
            } else {
                const otherPreferredSlot = (
-                    (keys[j + 2] << 24) | (keys[j + 3] << 16) |
-                    (keys[j + 4] << 8) | keys[j + 5]
+                    (keys[j + 1] << 24) | (keys[j + 2] << 16) |
+                    (keys[j + 3] << 8) | keys[j + 4]
                ) & mask;
                const otherDistance = otherPreferredSlot <= slot ?
                    slot - otherPreferredSlot :
@@ -888,7 +951,7 @@ class HashTable {
                    // then insert our node in its place and swap
                    //
                    // https://cglab.ca/~abeinges/blah/robinhood-part-1/
-                    const otherKey = keys.slice(j, j + 6);
+                    const otherKey = keys.slice(j, j + 5);
                    values[slot] = value;
                    value = otherValue;
                    keys[j + 0] = key[start + 0];
@@ -896,7 +959,6 @@ class HashTable {
                    keys[j + 2] = key[start + 2];
                    keys[j + 3] = key[start + 3];
                    keys[j + 4] = key[start + 4];
-                    keys[j + 5] = key[start + 5];
                    key = otherKey;
                    start = 0;
                    distance = otherDistance;
@@ -912,7 +974,7 @@ class HashTable {
     * @returns {T|undefined}
     */
    get(key) {
-        if (key.length !== 6) {
+        if (key.length !== 5) {
            throw "invalid key";
        }
        return this.getWithOffsetKey(key, 0);
@@ -931,13 +993,13 @@ class HashTable {
        // because we know that our values are already hashed,
        // just chop off the lower four bytes
        let slot = (
-            (key[start + 2] << 24) |
-            (key[start + 3] << 16) |
-            (key[start + 4] << 8) |
-            key[start + 5]
+            (key[start + 1] << 24) |
+            (key[start + 2] << 16) |
+            (key[start + 3] << 8) |
+            key[start + 4]
        ) & mask;
        for (let distance = 0; distance < l; distance += 1) {
-            const j = slot * 6;
+            const j = slot * 5;
            const value = values[slot];
            if (value === undefined) {
                break;
@@ -946,14 +1008,13 @@ class HashTable {
                key[start + 1] === keys[j + 1] &&
                key[start + 2] === keys[j + 2] &&
                key[start + 3] === keys[j + 3] &&
-                key[start + 4] === keys[j + 4] &&
-                key[start + 5] === keys[j + 5]
+                key[start + 4] === keys[j + 4]
            ) {
                return value;
            } else {
                const otherPreferredSlot = (
-                    (keys[j + 2] << 24) | (keys[j + 3] << 16) |
-                    (keys[j + 4] << 8) | keys[j + 5]
+                    (keys[j + 1] << 24) | (keys[j + 2] << 16) |
+                    (keys[j + 3] << 8) | keys[j + 4]
                ) & mask;
                const otherDistance = otherPreferredSlot <= slot ?
                    slot - otherPreferredSlot :
@@ -1133,61 +1194,56 @@ function loadDatabase(hooks) {
        dataColumns: new Map(),
        dataColumnsBuckets: new HashTable(),
        searchTreeLoadByNodeID: function(nodeid) {
-            const existingPromise = registry.searchTreePromises.get(nodeid);
-            if (existingPromise) {
-                return existingPromise;
-            }
            /** @type {Promise<SearchTree>} */
            let newPromise;
            if ((nodeid[0] & 0x80) !== 0) {
-                const isWhole = (nodeid[0] & 0x40) !== 0;
-                let leaves;
-                if ((nodeid[0] & 0x10) !== 0) {
-                    let id1 = (nodeid[2] << 8) | nodeid[3];
-                    if ((nodeid[0] & 0x20) !== 0) {
-                        // when data is present, id1 can be up to 20 bits
-                        id1 |= ((nodeid[1] & 0x0f) << 16);
-                    } else {
-                        // otherwise, we fit in 28
-                        id1 |= ((nodeid[0] & 0x0f) << 24) | (nodeid[1] << 16);
-                    }
-                    const id2 = id1 + ((nodeid[4] << 8) | nodeid[5]);
-                    leaves = RoaringBitmap.makeSingleton(id1)
-                        .union(RoaringBitmap.makeSingleton(id2));
-                } else if (!isWhole && (nodeid[0] & 0xf0) === 0x80) {
-                    const id1 = ((nodeid[0] & 0x0f) << 16) | (nodeid[1] << 8) | nodeid[2];
-                    const id2 = id1 + ((nodeid[3] << 4) | ((nodeid[4] >> 4) & 0x0f));
-                    const id3 = id2 + (((nodeid[4] & 0x0f) << 8) | nodeid[5]);
-                    leaves = RoaringBitmap.makeSingleton(id1)
-                        .union(RoaringBitmap.makeSingleton(id2))
-                        .union(RoaringBitmap.makeSingleton(id3));
+                const isSuffixOnly = (nodeid[0] & 0x40) !== 0;
+                const isRun = (nodeid[0] & 0x20) !== 0;
+                const lengthOrData = nodeid[0] & 0x1F;
+                const id = (nodeid[1] << 24) | (nodeid[2] << 16) | (nodeid[3] << 8) | nodeid[4];
+                let bitmap;
+                if (isRun) {
+                    bitmap = new RoaringBitmap(null);
+                    bitmap.containers.push(new RoaringBitmapRun(
+                        1,
+                        Uint8Array.of(
+                            id & 0xFF,
+                            (id >> 8) & 0xFF,
+                            lengthOrData,
+                            0,
+                        ),
+                    ));
+                    bitmap.keysAndCardinalities = Uint8Array.of(
+                        (id >> 16) & 0xff,
+                        (id >> 24) & 0xff,
+                        lengthOrData,
+                        0,
+                    );
                } else {
-                    leaves = RoaringBitmap.makeSingleton(
-                        (nodeid[2] << 24) | (nodeid[3] << 16) |
-                        (nodeid[4] << 8) | nodeid[5],
+                    bitmap = RoaringBitmap.makeSingleton(id);
+                }
+                let tree;
+                if (isSuffixOnly) {
+                    tree = new SuffixSearchTree(
+                        EMPTY_SEARCH_TREE_BRANCHES,
+                        isRun ? 0 : (lengthOrData + 1),
+                        bitmap,
+                    );
+                } else {
+                    tree = new PrefixSearchTree(
+                        EMPTY_SEARCH_TREE_BRANCHES,
+                        EMPTY_SEARCH_TREE_BRANCHES,
+                        isRun ? EMPTY_UINT8 : Uint8Array.of(LONG_ALPHABET.chars[lengthOrData]),
+                        bitmap,
+                        EMPTY_BITMAP,
                    );
                }
-                if (isWhole) {
-                    const data = (nodeid[0] & 0x20) !== 0 ?
-                        Uint8Array.of(((nodeid[0] & 0x0f) << 4) | (nodeid[1] >> 4)) :
-                        EMPTY_UINT8;
-                    newPromise = Promise.resolve(new PrefixSearchTree(
-                        EMPTY_SEARCH_TREE_BRANCHES,
-                        EMPTY_SEARCH_TREE_BRANCHES,
-                        data,
-                        leaves,
-                        EMPTY_BITMAP,
-                    ));
-                } else {
-                    const data = (nodeid[0] & 0xf0) === 0x80 ? 0 : (
-                        ((nodeid[0] & 0x0f) << 4) | (nodeid[1] >> 4));
-                    newPromise = Promise.resolve(new SuffixSearchTree(
-                        EMPTY_SEARCH_TREE_BRANCHES,
-                        data,
-                        leaves,
-                    ));
-                }
+                newPromise = Promise.resolve(tree);
            } else {
+                const existingPromise = registry.searchTreePromises.get(nodeid);
+                if (existingPromise) {
+                    return existingPromise;
+                }
                const hashHex = makeHexFromUint8Array(nodeid);
                newPromise = new Promise((resolve, reject) => {
                    const cb = registry.searchTreeLoadPromiseCallbacks.get(nodeid);
@@ -1211,8 +1267,8 @@ function loadDatabase(hooks) {
                        hooks.loadTreeByHash(hashHex);
                    }
                });
+                registry.searchTreePromises.set(nodeid, newPromise);
            }
-            registry.searchTreePromises.set(nodeid, newPromise);
            return newPromise;
        },
        dataLoadByNameAndHash: function(name, hash) {
@@ -1277,8 +1333,8 @@ function loadDatabase(hooks) {
        getNodeID(i) {
            return new Uint8Array(
                this.nodeids.buffer,
-                this.nodeids.byteOffset + (i * 6),
-                6,
+                this.nodeids.byteOffset + (i * 5),
+                5,
            );
        }
        // https://github.com/microsoft/TypeScript/issues/17227
@@ -1385,113 +1441,310 @@ function loadDatabase(hooks) {
        EMPTY_UINT8,
    );

-    /** @type {number[]} */
-    const SHORT_ALPHABITMAP_CHARS = [];
+    class Alphabet {
+        constructor() {
+            /** @type {number[]} */
+            this.chars = [];
+            /** @type {number} */
+            this.len = 0;
+            /** @type {number} */
+            this.bytes = 0;
+            /** @type {number} */
+            this.flag = 0;
+            /** @type {number} */
+            this.bitwidth = 0;
+        }
+        /**
+         * @param {number} c
+         * @returns {boolean}
+         */
+        contains(c) {
+            return this.chars.indexOf(c) !== -1;
+        }
+        /**
+         * @param {number} c
+         * @returns {number}
+         */
+        index(c) {
+            return this.chars.indexOf(c);
+        }
+    }
+
+    /** @type {Alphabet} */
+    const VOWELONLY_ALPHABITMAP = Object.assign(
+        new Alphabet(),
+        {
+            chars: [0x61, 0x65, 0x69, 0x6f, 0x75],
+            len: 5,
+            bytes: 0,
+            flag: 0x80,
+        },
+    );
+
+    /** @type {Alphabet} */
+    const CONSONANTSONLY_ALPHABET = Object.assign(
+        new Alphabet(),
+        {
+            chars: [],
+            len: 21,
+            bytes: 2,
+            flag: 0xc0,
+        },
+    );
+    for (let i = 0x61; i <= 0x7A; ++i) {
+        if (i === 0x61 || i === 0x65 || i === 0x69 || i === 0x6f || i === 0x75) {
+            // 21 bits, 26 letters, so skip aeiou
+            continue;
+        }
+        CONSONANTSONLY_ALPHABET.chars.push(i);
+    }
+
+    /** @type {Alphabet} */
+    const HEX_ALPHABET = Object.assign(
+        new Alphabet(),
+        {
+            chars: [],
+            len: 16,
+            bytes: 2,
+            flag: 0xfc,
+        },
+    );
+    for (let i = 0x30; i <= 0x39; ++i) {
+        HEX_ALPHABET.chars.push(i);
+    }
+    for (let i = 0x61; i <= 0x66; ++i) {
+        HEX_ALPHABET.chars.push(i);
+    }
+
+    /** @type {Alphabet} */
+    const SHORT_ALPHABET = Object.assign(
+        new Alphabet(),
+        {
+            chars: [],
+            len: 24,
+            bytes: 3,
+            flag: 0xfd,
+        },
+    );
    for (let i = 0x61; i <= 0x7A; ++i) {
        if (i === 0x76 || i === 0x71) {
            // 24 entries, 26 letters, so we skip q and v
            continue;
        }
-        SHORT_ALPHABITMAP_CHARS.push(i);
+        SHORT_ALPHABET.chars.push(i);
    }

-    /** @type {number[]} */
-    const LONG_ALPHABITMAP_CHARS = [0x31, 0x32, 0x33, 0x34, 0x35, 0x36];
+    /** @type {Alphabet} */
+    const LONG_ALPHABET = Object.assign(
+        new Alphabet(),
+        {
+            chars: [0x31, 0x32, 0x33, 0x34, 0x35, 0x36],
+            len: 32,
+            bytes: 4,
+            flag: 0xfe,
+        },
+    );
    for (let i = 0x61; i <= 0x7A; ++i) {
-        LONG_ALPHABITMAP_CHARS.push(i);
+        LONG_ALPHABET.chars.push(i);
+    }
+
+    /** @type {Alphabet} */
+    const ASCII_ALPHABET = Object.assign(
+        new Alphabet(),
+        {
+            chars: [],
+            len: 128,
+            bytes: 16,
+            flag: 0xf0,
+            /**
+             * @param {number} c
+             * @returns {boolean}
+             */
+            contains(c) {
+                return c <= 0x7f;
+            },
+            /**
+             * @param {number} c
+             * @returns {number}
+             */
+            index(c) {
+                return c;
+            },
+        },
+    );
+    for (let i = 0x00; i <= 0x7f; ++i) {
+        ASCII_ALPHABET.chars.push(i);
+    }
+
+    /** @type {Alphabet} */
+    const RAWBYTE_ALPHABET = Object.assign(
+        new Alphabet(),
+        {
+            chars: [],
+            len: 256,
+            bytes: 32,
+            flag: 0xff,
+            /**
+             * @param {number} _c
+             * @returns {boolean}
+             */
+            contains(_c) {
+                return true;
+            },
+            /**
+             * @param {number} c
+             * @returns {number}
+             */
+            index(c) {
+                return c;
+            },
+        },
+    );
+    for (let i = 0x00; i <= 0xff; ++i) {
+        RAWBYTE_ALPHABET.chars.push(i);
    }

    /**
-     * @template ST
-     * @param {number[]} alphabitmap_chars
-     * @param {number} width
-     * @return {(typeof SearchTreeBranches<ST>)&{"ALPHABITMAP_CHARS": number[], "width": number}}
+     * Parse an alphabet and buffer where the flag is right at the beginning.
+     * @param {number} start
+     * @param {Uint8Array} buf
+     * @returns {{"alphabet": Alphabet, "consumed_len_bytes": number, "len": number}?}
     */
-    function makeSearchTreeBranchesAlphaBitmapClass(alphabitmap_chars, width) {
-        const bitwidth = width * 8;
+    Alphabet.parse = function(start, buf) {
+        const flag = buf[start];
+        const parsed = Alphabet.parseFlag(flag, start + 1, buf);
+        if (!parsed) {
+            return null;
+        }
+        parsed.consumed_len_bytes += 1;
+        return parsed;
+    };
+
+    /**
+     * Parse an alphabet and buffer where the flag is not at the beginning.
+     * @param {number} flag
+     * @param {number} i
+     * @param {Uint8Array} buf
+     * @returns {{"alphabet": Alphabet, "consumed_len_bytes": number, "len": number}?}
+     */
+    Alphabet.parseFlag = function(flag, i, buf) {
+        if (flag <= 0x80) {
+            return null;
+        }
+        const alphabet = flag === RAWBYTE_ALPHABET.flag ? RAWBYTE_ALPHABET : (
+            flag === ASCII_ALPHABET.flag ? ASCII_ALPHABET : (
+            flag === LONG_ALPHABET.flag ? LONG_ALPHABET : (
+            flag === SHORT_ALPHABET.flag ? SHORT_ALPHABET : (
+            flag === HEX_ALPHABET.flag ? HEX_ALPHABET : (
+            flag >= CONSONANTSONLY_ALPHABET.flag ? CONSONANTSONLY_ALPHABET : VOWELONLY_ALPHABITMAP
+        )))));
+        let len = alphabet === CONSONANTSONLY_ALPHABET || alphabet === VOWELONLY_ALPHABITMAP ?
+            bitCount(flag & 0x1f) : 0;
+        for (let ix = 0; ix < alphabet.bytes; ++ix) {
+            len += bitCount(buf[i]);
+            i += 1;
+        }
+        return {alphabet, consumed_len_bytes: alphabet.bytes, len};
+    };
+
+    /**
+     * @template ST
+     * @extends SearchTreeBranches<ST>
+     */
+    class SearchTreeBranchesAlphaBitmap extends SearchTreeBranches {
        /**
-         * @extends SearchTreeBranches<ST>
+         * @param {Alphabet} alphabet
+         * @param {Uint8Array} buffer
+         * @param {Uint8Array} nodeids
         */
-        const cls = class SearchTreeBranchesAlphaBitmap extends SearchTreeBranches {
-            /**
-             * @param {number} bitmap
-             * @param {Uint8Array} nodeids
-             */
-            constructor(bitmap, nodeids) {
-                super(nodeids.length / 6, nodeids);
-                if (nodeids.length / 6 !== bitCount(bitmap)) {
-                    throw new Error(`mismatch ${bitmap} ${nodeids}`);
+        constructor(alphabet, buffer, nodeids) {
+            let bitmap;
+            if (alphabet === VOWELONLY_ALPHABITMAP) {
+                bitmap = new Uint8Array(1);
+                bitmap[0] = buffer[0] & 0x1f;
+            } else if (alphabet === CONSONANTSONLY_ALPHABET) {
+                bitmap = new Uint8Array(3);
+                bitmap[0] = buffer[1];
+                bitmap[1] = buffer[2];
+                bitmap[2] = buffer[0] & 0x1f;
+            } else {
+                bitmap = buffer.subarray(1);
+            }
+            let cardinality = 0;
+            for (let i = 0; i < bitmap.length; ++i) {
+                cardinality += bitCount(bitmap[i]);
+            }
+            super(cardinality, nodeids);
+            this.bitmap = bitmap;
+            this.alphabet = alphabet;
+        }
+        /**
+         * Yields [character, SearchTree] pairs.
+         * @returns {Generator<[number, Promise<ST>|null]>}
+         */
+        * entries() {
+            let i = 0;
+            let j = 0;
+            while (i < this.alphabet.len) {
+                if (this.bitmap[i >> 3] & (1 << (i & 0x07))) {
+                    yield [this.alphabet.chars[i], this.subtrees[j]];
+                    j += 1;
                }
-                this.bitmap = bitmap;
-                this.nodeids = nodeids;
+                i += 1;
            }
-            /** @returns {Generator<[number, Promise<ST>|null]>} */
-            * entries() {
-                let i = 0;
-                let j = 0;
-                while (i < bitwidth) {
-                    if (this.bitmap & (1 << i)) {
-                        yield [alphabitmap_chars[i], this.subtrees[j]];
-                        j += 1;
-                    }
-                    i += 1;
-                }
+        }
+        /**
+         * Given a character, returns the numbered index of the search
+         * tree, or -1 if there isn't one.
+         * @param {number} c
+         * @returns {number}
+         */
+        getIndex(c) {
+            //return this.getKeys().indexOf(c);
+            if (!this.alphabet.contains(c)) {
+                return -1;
            }
-            /**
-             * @param {number} k
-             * @returns {number}
-             */
-            getIndex(k) {
-                //return this.getKeys().indexOf(k);
-                const ix = alphabitmap_chars.indexOf(k);
-                if (ix < 0) {
-                    return ix;
-                }
-                const result = bitCount(~(0xffffffff << ix) & this.bitmap);
-                return result >= this.subtrees.length ? -1 : result;
-            }
-            /**
-             * @param {number} branch_index
-             * @returns {number}
-             */
-            getKey(branch_index) {
-                return this.getKeys()[branch_index];
-            }
-            /**
-             * @returns {Uint8Array}
-             */
-            getKeys() {
-                const length = bitCount(this.bitmap);
-                const result = new Uint8Array(length);
-                let result_index = 0;
-                for (let alpha_index = 0; alpha_index < bitwidth; ++alpha_index) {
-                    if (this.bitmap & (1 << alpha_index)) {
-                        result[result_index] = alphabitmap_chars[alpha_index];
-                        result_index += 1;
-                    }
+            const k = this.alphabet.index(c);
+            if (this.bitmap[k >> 3] & (1 << (k & 0x07))) {
+                let result = bitCount(~(0xff << (k & 0x07)) & this.bitmap[k >> 3]);
+                for (let ix = 0; ix < (k >> 3); ++ix) {
+                    result += bitCount(this.bitmap[ix]);
                }
                return result;
+            } else {
+                return -1;
            }
-        };
-        cls.ALPHABITMAP_CHARS = alphabitmap_chars;
-        cls.width = width;
-        return cls;
+        }
+        /**
+         * Given the numbered index of a search tree, returns the key.
+         * This is the exact opposite of getIndex().
+         * @param {number} branch_index
+         * @returns {number}
+         */
+        getKey(branch_index) {
+            return this.getKeys()[branch_index];
+        }
+        /**
+         * Returns a list of one-byte keys.
+         * @returns {Uint8Array}
+         */
+        getKeys() {
+            let length = 0;
+            for (let i = 0; i < this.bitmap.length; ++i) {
+                length += bitCount(this.bitmap[i]);
+            }
+            const result = new Uint8Array(length);
+            let result_index = 0;
+            for (let ix = 0; ix < this.alphabet.len; ++ix) {
+                if (this.bitmap[ix >> 3] & (1 << (ix & 0x07))) {
+                    result[result_index] = this.alphabet.chars[ix];
+                    result_index += 1;
+                }
+            }
+            return result;
+        }
    }

-    /**
-     * @template ST
-     * @type {(typeof SearchTreeBranches<any>)&{"ALPHABITMAP_CHARS": number[], "width": number}}
-     */
-    const SearchTreeBranchesShortAlphaBitmap =
-        makeSearchTreeBranchesAlphaBitmapClass(SHORT_ALPHABITMAP_CHARS, 3);
-
-    /**
-     * @template ST
-     * @type {(typeof SearchTreeBranches<any>)&{"ALPHABITMAP_CHARS": number[], "width": number}}
-     */
-    const SearchTreeBranchesLongAlphaBitmap =
-        makeSearchTreeBranchesAlphaBitmapClass(LONG_ALPHABITMAP_CHARS, 4);
-
    /**
     * @typedef {PrefixSearchTree|SuffixSearchTree|InlineNeighborsTree} SearchTree
     * @typedef {PrefixTrie|SuffixTrie} Trie
@@ -2347,19 +2600,15 @@ function loadDatabase(hooks) {
            const has_branches = (encoded[i] & 0x04) !== 0;
            /** @type {boolean} */
            const is_suffixes_only = (encoded[i] & 0x01) !== 0;
-            let leaves_count = ((encoded[i] >> 4) & 0x07) + 1;
-            let leaves_is_run = (encoded[i] >> 7) !== 0;
+            const leaves_count = (encoded[i] >> 4) & 0x07;
+            const leaves_is_run = (encoded[i] >> 7) !== 0;
            i += 1;
-            let branch_count = 0;
+            let branch_flag = 0;
            if (has_branches) {
-                branch_count = encoded[i] + 1;
+                branch_flag = encoded[i];
                i += 1;
            }
            const dlen = encoded[i] & 0x3f;
-            if ((encoded[i] & 0x80) !== 0) {
-                leaves_count = 0;
-                leaves_is_run = false;
-            }
            i += 1;
            /** @type {Uint8Array} */
            let data = EMPTY_UINT8;
@@ -2367,13 +2616,25 @@ function loadDatabase(hooks) {
                data = encoded.subarray(i, i + dlen);
                i += dlen;
            }
+            const branch_flag_alphabet = Alphabet.parseFlag(branch_flag, i, encoded);
+            let branch_alphabitmap = EMPTY_UINT8;
+            if (branch_flag_alphabet) {
+                branch_alphabitmap = new Uint8Array(branch_flag_alphabet.consumed_len_bytes + 1);
+                branch_alphabitmap[0] = branch_flag;
+                branch_alphabitmap.set(
+                    encoded.subarray(i, i + branch_flag_alphabet.consumed_len_bytes),
+                    1,
+                );
+                i += branch_flag_alphabet.consumed_len_bytes;
+            }
+            const branch_count = branch_flag_alphabet ? branch_flag_alphabet.len : branch_flag;
            const leaf_value_upper = encoded[i] | (encoded[i + 1] << 8);
            i += 2;
            /** @type {Promise<SearchTree>[]} */
            const branch_nodes = [];
            for (let j = 0; j < branch_count; j += 1) {
                const branch_dlen = encoded[i] & 0x0f;
-                const branch_leaves_count = ((encoded[i] >> 4) & 0x07) + 1;
+                const branch_leaves_count = (encoded[i] >> 4) & 0x07;
                const branch_leaves_is_run = (encoded[i] >> 7) !== 0;
                i += 1;
                /** @type {Uint8Array} */
@@ -2427,14 +2688,22 @@ function loadDatabase(hooks) {
                        ),
                ));
            }
-            /** @type {SearchTreeBranchesArray<SearchTree>} */
+            /** @type {SearchTreeBranches<SearchTree>} */
            const branches = branch_count === 0 ?
                EMPTY_SEARCH_TREE_BRANCHES :
-                new SearchTreeBranchesArray(
-                    encoded.subarray(i, i + branch_count),
-                    EMPTY_UINT8,
-                );
-            i += branch_count;
+                branch_flag_alphabet ?
+                    new SearchTreeBranchesAlphaBitmap(
+                        branch_flag_alphabet.alphabet,
+                        branch_alphabitmap,
+                        EMPTY_UINT8,
+                    ) :
+                    new SearchTreeBranchesArray(
+                        encoded.subarray(i, i + branch_count),
+                        EMPTY_UINT8,
+                    );
+            if (!branch_flag_alphabet) {
+                i += branch_count;
+            }
            branches.subtrees = branch_nodes;
            let leaves = EMPTY_BITMAP;
            if (leaves_count !== 0) {
@@ -2556,7 +2825,7 @@ function loadDatabase(hooks) {
                    k += 1;
                }
                const end = k;
-                const bucket = {hash: hashes.subarray(i * 6, (i + 1) * 6), data: null, end, count};
+                const bucket = {hash: hashes.subarray(i * 5, (i + 1) * 5), data: null, end, count};
                this.buckets.push(bucket);
                this.bucket_keys.push(start);
            }
@@ -2698,7 +2967,7 @@ function loadDatabase(hooks) {
     */
    function loadColumnFromBytes(data) {
        const hashBuf = Uint8Array.of(0, 0, 0, 0, 0, 0, 0, 0);
-        const truncatedHash = hashBuf.subarray(2, 8);
+        const truncatedHash = hashBuf.subarray(3, 8);
        siphashOfBytes(data, 0, 0, 0, 0, hashBuf);
        const cb = registry.dataColumnLoadPromiseCallbacks.get(truncatedHash);
        if (cb) {
@@ -2744,7 +3013,7 @@ function loadDatabase(hooks) {
        /** @type {HashTable<SearchTree>} */
        const stash = new HashTable();
        const hash = Uint8Array.of(0, 0, 0, 0, 0, 0, 0, 0);
-        const truncatedHash = new Uint8Array(hash.buffer, 2, 6);
+        const truncatedHash = hash.subarray(3, 8);
        // used for handling compressed (that is, relative-offset) nodes
        /** @type {{hash: Uint8Array, used: boolean}[]} */
        const hash_history = [];
@@ -2764,12 +3033,12 @@ function loadDatabase(hooks) {
         * @param {number} i
         * @param {number} compression_tag
         * @returns {{
-         *     "cpbranches": Uint8Array,
-         *     "csbranches": Uint8Array,
         *     "might_have_prefix_branches": SearchTreeBranches<SearchTree>,
         *     "branches": SearchTreeBranches<SearchTree>,
+         *     "branches_header": Uint8Array,
         *     "cpnodes": Uint8Array,
         *     "csnodes": Uint8Array,
+         *     "branches_keys": Uint8Array,
         *     "consumed_len_bytes": number,
         * }}
         */
@@ -2786,49 +3055,63 @@ function loadDatabase(hooks) {
            const any_children_are_compressed =
                (compression_tag & 0xF0) !== 0x00 || is_long_compressed;
            const start_point = i;
-            let cplen;
-            let cslen;
            /**
-             * @type {(
-             *   typeof SearchTreeBranches<SearchTree> &
-             *   {"ALPHABITMAP_CHARS": number[], "width": number}
-             * )?}
+             * @type {Alphabet|null}
             */
-            let alphabitmap = null;
+            let cpalphabet = null;
+            /**
+             * @type {Uint8Array}
+             */
+            let cpalphabitmap = EMPTY_UINT8;
+            /**
+             * @type {number}
+             */
+            let cplen;
+            /**
+             * @type {Alphabet|null}
+             */
+            let csalphabet = null;
+            /**
+             * @type {Uint8Array}
+             */
+            let csalphabitmap = EMPTY_UINT8;
+            /**
+             * @type {number}
+             */
+            let cslen;
+            // might-have-prefix nodes
            if (is_pure_suffixes_only_node) {
                cplen = 0;
-                cslen = input[i];
-                i += 1;
-                if (cslen >= 0xc0) {
-                    alphabitmap = SearchTreeBranchesLongAlphaBitmap;
-                    cslen = cslen & 0x3F;
-                } else if (cslen >= 0x80) {
-                    alphabitmap = SearchTreeBranchesShortAlphaBitmap;
-                    cslen = cslen & 0x7F;
-                }
            } else {
-                cplen = input[i];
-                i += 1;
-                cslen = input[i];
-                i += 1;
-                if (cplen === 0xff && cslen === 0xff) {
-                    cplen = 0x100;
-                    cslen = 0;
-                } else if (cplen >= 0xc0 && cslen >= 0xc0) {
-                    alphabitmap = SearchTreeBranchesLongAlphaBitmap;
-                    cplen = cplen & 0x3F;
-                    cslen = cslen & 0x3F;
-                } else if (cplen >= 0x80 && cslen >= 0x80) {
-                    alphabitmap = SearchTreeBranchesShortAlphaBitmap;
-                    cplen = cplen & 0x7F;
-                    cslen = cslen & 0x7F;
+                const parsed = Alphabet.parse(i, input);
+                if (parsed) {
+                    cpalphabitmap = input.subarray(i, i + parsed.consumed_len_bytes);
+                    cpalphabet = parsed.alphabet;
+                    cplen = parsed.len;
+                    i += parsed.consumed_len_bytes;
+                } else {
+                    cplen = input[i];
+                    i += 1;
                }
            }
+            // suffix-only nodes
+            const parsed = Alphabet.parse(i, input);
+            if (parsed) {
+                csalphabitmap = input.subarray(i, i + parsed.consumed_len_bytes);
+                csalphabet = parsed.alphabet;
+                cslen = parsed.len;
+                i += parsed.consumed_len_bytes;
+            } else {
+                cslen = input[i];
+                i += 1;
+            }
+            const branches_header = input.subarray(start_point, i);
+            // now process the hashes, offsets, or stack
            let j = 0;
            /** @type {Uint8Array} */
            let cpnodes;
            if (any_children_are_compressed) {
-                cpnodes = cplen === 0 ? EMPTY_UINT8 : new Uint8Array(cplen * 6);
+                cpnodes = cplen === 0 ? EMPTY_UINT8 : new Uint8Array(cplen * 5);
                while (j < cplen) {
                    const is_compressed = all_children_are_compressed ||
                        ((0x10 << j) & compression_tag) !== 0;
@@ -2845,29 +3128,28 @@ function loadDatabase(hooks) {
                        hash_history[slot].used = true;
                        cpnodes.set(
                            hash_history[slot].hash,
-                            j * 6,
+                            j * 5,
                        );
                    } else {
-                        const joff = j * 6;
+                        const joff = j * 5;
                        cpnodes[joff + 0] = input[i + 0];
                        cpnodes[joff + 1] = input[i + 1];
                        cpnodes[joff + 2] = input[i + 2];
                        cpnodes[joff + 3] = input[i + 3];
                        cpnodes[joff + 4] = input[i + 4];
-                        cpnodes[joff + 5] = input[i + 5];
-                        i += 6;
+                        i += 5;
                    }
                    j += 1;
                }
            } else {
-                cpnodes = cplen === 0 ? EMPTY_UINT8 : input.subarray(i, i + (cplen * 6));
-                i += cplen * 6;
+                cpnodes = cplen === 0 ? EMPTY_UINT8 : input.subarray(i, i + (cplen * 5));
+                i += cplen * 5;
            }
            j = 0;
            /** @type {Uint8Array} */
            let csnodes;
            if (any_children_are_compressed) {
-                csnodes = cslen === 0 ? EMPTY_UINT8 : new Uint8Array(cslen * 6);
+                csnodes = cslen === 0 ? EMPTY_UINT8 : new Uint8Array(cslen * 5);
                while (j < cslen) {
                    const is_compressed = all_children_are_compressed ||
                        ((0x10 << (cplen + j)) & compression_tag) !== 0;
@@ -2884,138 +3166,146 @@ function loadDatabase(hooks) {
                        hash_history[slot].used = true;
                        csnodes.set(
                            hash_history[slot].hash,
-                            j * 6,
+                            j * 5,
                        );
                    } else {
-                        const joff = j * 6;
+                        const joff = j * 5;
                        csnodes[joff + 0] = input[i + 0];
                        csnodes[joff + 1] = input[i + 1];
                        csnodes[joff + 2] = input[i + 2];
                        csnodes[joff + 3] = input[i + 3];
                        csnodes[joff + 4] = input[i + 4];
-                        csnodes[joff + 5] = input[i + 5];
-                        i += 6;
+                        i += 5;
                    }
                    j += 1;
                }
            } else {
-                csnodes = cslen === 0 ? EMPTY_UINT8 : input.subarray(i, i + (cslen * 6));
-                i += cslen * 6;
+                csnodes = cslen === 0 ? EMPTY_UINT8 : input.subarray(i, i + (cslen * 5));
+                i += cslen * 5;
            }
-            let cpbranches;
+            const start_point_keys = i;
            let might_have_prefix_branches;
            if (cplen === 0) {
-                cpbranches = EMPTY_UINT8;
                might_have_prefix_branches = EMPTY_SEARCH_TREE_BRANCHES;
-            } else if (alphabitmap) {
-                cpbranches = new Uint8Array(input.buffer, i + input.byteOffset, alphabitmap.width);
-                const branchset = (alphabitmap.width === 4 ? (input[i + 3] << 24) : 0) |
-                    (input[i + 2] << 16) |
-                    (input[i + 1] << 8) |
-                    input[i];
-                might_have_prefix_branches = new alphabitmap(branchset, cpnodes);
-                i += alphabitmap.width;
+            } else if (cpalphabet) {
+                might_have_prefix_branches = new SearchTreeBranchesAlphaBitmap(
+                    cpalphabet,
+                    cpalphabitmap,
+                    cpnodes,
+                );
            } else {
-                cpbranches = new Uint8Array(input.buffer, i + input.byteOffset, cplen);
-                might_have_prefix_branches = new SearchTreeBranchesArray(cpbranches, cpnodes);
+                might_have_prefix_branches = new SearchTreeBranchesArray(
+                    new Uint8Array(input.buffer, i + input.byteOffset, cplen),
+                    cpnodes,
+                );
                i += cplen;
            }
-            let csbranches;
            let branches;
            if (cslen === 0) {
-                csbranches = EMPTY_UINT8;
                branches = might_have_prefix_branches;
-            } else if (alphabitmap) {
-                csbranches = new Uint8Array(input.buffer, i + input.byteOffset, alphabitmap.width);
-                const branchset = (alphabitmap.width === 4 ? (input[i + 3] << 24) : 0) |
-                    (input[i + 2] << 16) |
-                    (input[i + 1] << 8) |
-                    input[i];
-                if (cplen === 0) {
-                    branches = new alphabitmap(branchset, csnodes);
-                } else {
-                    const cpoffset = i - alphabitmap.width;
-                    const cpbranchset =
-                        (alphabitmap.width === 4 ? (input[cpoffset + 3] << 24) : 0) |
-                        (input[cpoffset + 2] << 16) |
-                        (input[cpoffset + 1] << 8) |
-                        input[cpoffset];
-                    const hashes = new Uint8Array((cplen + cslen) * 6);
-                    let cpi = 0;
-                    let csi = 0;
-                    let j = 0;
-                    for (let k = 0; k < alphabitmap.ALPHABITMAP_CHARS.length; k += 1) {
-                        if (branchset & (1 << k)) {
-                            hashes[j + 0] = csnodes[csi + 0];
-                            hashes[j + 1] = csnodes[csi + 1];
-                            hashes[j + 2] = csnodes[csi + 2];
-                            hashes[j + 3] = csnodes[csi + 3];
-                            hashes[j + 4] = csnodes[csi + 4];
-                            hashes[j + 5] = csnodes[csi + 5];
-                            j += 6;
-                            csi += 6;
-                        } else if (cpbranchset & (1 << k)) {
-                            hashes[j + 0] = cpnodes[cpi + 0];
-                            hashes[j + 1] = cpnodes[cpi + 1];
-                            hashes[j + 2] = cpnodes[cpi + 2];
-                            hashes[j + 3] = cpnodes[cpi + 3];
-                            hashes[j + 4] = cpnodes[cpi + 4];
-                            hashes[j + 5] = cpnodes[cpi + 5];
-                            j += 6;
-                            cpi += 6;
-                        }
-                    }
-                    branches = new alphabitmap(branchset | cpbranchset, hashes);
-                }
-                i += alphabitmap.width;
            } else {
-                csbranches = new Uint8Array(input.buffer, i + input.byteOffset, cslen);
-                if (cplen === 0) {
-                    branches = new SearchTreeBranchesArray(csbranches, csnodes);
+                if (csalphabet) {
+                    branches = new SearchTreeBranchesAlphaBitmap(
+                        csalphabet,
+                        csalphabitmap,
+                        csnodes,
+                    );
                } else {
-                    const branchset = new Uint8Array(cplen + cslen);
-                    const hashes = new Uint8Array((cplen + cslen) * 6);
-                    let cpi = 0;
-                    let csi = 0;
-                    let j = 0;
-                    while (cpi < cplen || csi < cslen) {
-                        if (cpi >= cplen || (csi < cslen && cpbranches[cpi] > csbranches[csi])) {
-                            branchset[j] = csbranches[csi];
-                            const joff = j * 6;
-                            const csioff = csi * 6;
-                            hashes[joff + 0] = csnodes[csioff + 0];
-                            hashes[joff + 1] = csnodes[csioff + 1];
-                            hashes[joff + 2] = csnodes[csioff + 2];
-                            hashes[joff + 3] = csnodes[csioff + 3];
-                            hashes[joff + 4] = csnodes[csioff + 4];
-                            hashes[joff + 5] = csnodes[csioff + 5];
-                            csi += 1;
-                        } else {
-                            branchset[j] = cpbranches[cpi];
-                            const joff = j * 6;
-                            const cpioff = cpi * 6;
-                            hashes[joff + 0] = cpnodes[cpioff + 0];
-                            hashes[joff + 1] = cpnodes[cpioff + 1];
-                            hashes[joff + 2] = cpnodes[cpioff + 2];
-                            hashes[joff + 3] = cpnodes[cpioff + 3];
-                            hashes[joff + 4] = cpnodes[cpioff + 4];
-                            hashes[joff + 5] = cpnodes[cpioff + 5];
-                            cpi += 1;
-                        }
-                        j += 1;
-                    }
-                    branches = new SearchTreeBranchesArray(branchset, hashes);
+                    branches = new SearchTreeBranchesArray(
+                        new Uint8Array(input.buffer, i + input.byteOffset, cslen),
+                        csnodes,
+                    );
+                    i += cslen;
+                }
+                if (cplen !== 0) {
+                    const hashes = new Uint8Array((cplen + cslen) * 5);
+                    if (cplen + cslen > 32) {
+                        const raw_bits = new Uint8Array(RAWBYTE_ALPHABET.bytes + 1);
+                        raw_bits[0] = RAWBYTE_ALPHABET.flag;
+                        const bits = raw_bits.subarray(1);
+                        const mhp_keys = might_have_prefix_branches.getKeys();
+                        const so_keys = branches.getKeys();
+                        let mhp_i = 0;
+                        let so_i = 0;
+                        let j = 0;
+                        while (mhp_i < cplen || so_i < cslen) {
+                            if (so_i === cslen || mhp_keys[mhp_i] < so_keys[so_i]) {
+                                const joff = j * 5;
+                                const mhp_off = mhp_i * 5;
+                                hashes[joff + 0] = cpnodes[mhp_off + 0];
+                                hashes[joff + 1] = cpnodes[mhp_off + 1];
+                                hashes[joff + 2] = cpnodes[mhp_off + 2];
+                                hashes[joff + 3] = cpnodes[mhp_off + 3];
+                                hashes[joff + 4] = cpnodes[mhp_off + 4];
+                                const ix = mhp_keys[mhp_i];
+                                bits[ix >> 3] |= 1 << (ix & 0x07);
+                                mhp_i += 1;
+                            } else {
+                                const joff = j * 5;
+                                const so_off = so_i * 5;
+                                hashes[joff + 0] = csnodes[so_off + 0];
+                                hashes[joff + 1] = csnodes[so_off + 1];
+                                hashes[joff + 2] = csnodes[so_off + 2];
+                                hashes[joff + 3] = csnodes[so_off + 3];
+                                hashes[joff + 4] = csnodes[so_off + 4];
+                                const ix = so_keys[so_i];
+                                bits[ix >> 3] |= 1 << (ix & 0x07);
+                                so_i += 1;
+                            }
+                            j += 1;
+                        }
+                        branches = new SearchTreeBranchesAlphaBitmap(
+                            RAWBYTE_ALPHABET,
+                            raw_bits,
+                            hashes,
+                        );
+                    } else {
+                        const merged_keys = new Uint8Array(cplen + cslen);
+                        const mhp_keys = might_have_prefix_branches.getKeys();
+                        const so_keys = branches.getKeys();
+                        let mhp_i = 0;
+                        let so_i = 0;
+                        let j = 0;
+                        while (mhp_i < cplen || so_i < cslen) {
+                            if (so_i === cslen || mhp_keys[mhp_i] < so_keys[so_i]) {
+                                const joff = j * 5;
+                                const mhp_off = mhp_i * 5;
+                                hashes[joff + 0] = cpnodes[mhp_off + 0];
+                                hashes[joff + 1] = cpnodes[mhp_off + 1];
+                                hashes[joff + 2] = cpnodes[mhp_off + 2];
+                                hashes[joff + 3] = cpnodes[mhp_off + 3];
+                                hashes[joff + 4] = cpnodes[mhp_off + 4];
+                                merged_keys[j] = mhp_keys[mhp_i];
+                                mhp_i += 1;
+                            } else {
+                                const joff = j * 5;
+                                const so_off = so_i * 5;
+                                hashes[joff + 0] = csnodes[so_off + 0];
+                                hashes[joff + 1] = csnodes[so_off + 1];
+                                hashes[joff + 2] = csnodes[so_off + 2];
+                                hashes[joff + 3] = csnodes[so_off + 3];
+                                hashes[joff + 4] = csnodes[so_off + 4];
+                                merged_keys[j] = so_keys[so_i];
+                                so_i += 1;
+                            }
+                            j += 1;
+                        }
+                        branches = new SearchTreeBranchesArray(
+                            merged_keys,
+                            hashes,
+                        );
+                    }
                }
-                i += cslen;
            }
+            const branches_keys = input.subarray(start_point_keys, i);
            return {
                consumed_len_bytes: i - start_point,
-                cpbranches,
-                csbranches,
                cpnodes,
                csnodes,
                branches,
                might_have_prefix_branches,
+                branches_header,
+                branches_keys,
            };
        }
        while (i < l) {
@@ -3038,26 +3328,23 @@ function loadDatabase(hooks) {
            /** @type {number} */
            let no_leaves_flag;
            /** @type {number} */
-            let inline_neighbors_flag;
+            let no_branches_flag;
            if (is_data_compressed && is_pure_suffixes_only_node) {
                dlen = 0;
                no_leaves_flag = 0x80;
-                inline_neighbors_flag = 0;
+                no_branches_flag = 0;
            } else {
                dlen = input[i] & 0x3F;
                no_leaves_flag = input[i] & 0x80;
-                inline_neighbors_flag = input[i] & 0x40;
+                no_branches_flag = input[i] & 0x40;
                i += 1;
            }
-            if (inline_neighbors_flag !== 0) {
+            if (no_leaves_flag !== 0 && no_branches_flag !== 0) {
                // node with packed leaves and common 16bit prefix
-                const leaves_count = no_leaves_flag !== 0 ?
-                    0 :
-                    ((compression_tag >> 4) & 0x07) + 1;
-                const leaves_is_run = no_leaves_flag === 0 &&
-                    ((compression_tag >> 4) & 0x08) !== 0;
-                const branch_count = is_long_compressed ?
-                    ((compression_tag >> 8) & 0xff) + 1 :
+                const leaves_count = (compression_tag >> 4) & 0x07;
+                const leaves_is_run = ((compression_tag >> 4) & 0x08) !== 0;
+                const branch_flag = is_long_compressed ?
+                    (compression_tag >> 8) & 0xff :
                    0;
                if (is_data_compressed) {
                    data = data_history[data_history.length - dlen - 1];
@@ -3071,12 +3358,17 @@ function loadDatabase(hooks) {
                    i += dlen;
                }
                const branches_start = i;
+                const branch_flag_alphabet = Alphabet.parseFlag(branch_flag, i, input);
+                const branch_count = branch_flag_alphabet ? branch_flag_alphabet.len : branch_flag;
+                if (branch_flag_alphabet) {
+                    i += branch_flag_alphabet.consumed_len_bytes;
+                }
                // leaf_value_upper
                i += 2;
                // branch_nodes
                for (let j = 0; j < branch_count; j += 1) {
                    const branch_dlen = input[i] & 0x0f;
-                    const branch_leaves_count = ((input[i] >> 4) & 0x0f) + 1;
+                    const branch_leaves_count = (input[i] >> 4) & 0x0f;
                    const branch_leaves_is_run = (input[i] >> 7) !== 0;
                    i += 1;
                    if (!is_pure_suffixes_only_node) {
@@ -3089,7 +3381,9 @@ function loadDatabase(hooks) {
                    }
                }
                // branch keys
-                i += branch_count;
+                if (!branch_flag_alphabet) {
+                    i += branch_count;
+                }
                // leaves
                if (leaves_is_run) {
                    i += 2;
@@ -3099,7 +3393,7 @@ function loadDatabase(hooks) {
                if (is_data_compressed) {
                    const clen = (
                        1 + // first compression header byte
-                        (is_long_compressed ? 1 : 0) + // branch count
+                        (is_long_compressed ? 1 : 0) + // branch flag
                        1 + // data length and other flags
                        dlen + // data
                        (i - branches_start) // branches and leaves
@@ -3112,7 +3406,7 @@ function loadDatabase(hooks) {
                        canonical[ci] = input[start + ci];
                        ci += 1;
                    }
-                    canonical[ci] = dlen | no_leaves_flag | 0x40;
+                    canonical[ci] = dlen | 0xc0;
                    ci += 1;
                    for (let j = 0; j < dlen; j += 1) {
                        canonical[ci] = data[j];
@@ -3145,16 +3439,25 @@ function loadDatabase(hooks) {
                        new Uint8Array(input.buffer, i + input.byteOffset, dlen);
                    i += dlen;
                }
-                const coffset = i;
                const {
-                    cpbranches,
-                    csbranches,
                    cpnodes,
                    csnodes,
                    consumed_len_bytes: branches_consumed_len_bytes,
                    branches,
                    might_have_prefix_branches,
-                } = makeBranchesFromBinaryData(input, i, compression_tag);
+                    branches_header,
+                    branches_keys,
+                } = no_branches_flag !== 0 ?
+                    {
+                        cpnodes: EMPTY_UINT8,
+                        csnodes: EMPTY_UINT8,
+                        consumed_len_bytes: 0,
+                        branches: EMPTY_SEARCH_TREE_BRANCHES,
+                        might_have_prefix_branches: EMPTY_SEARCH_TREE_BRANCHES,
+                        branches_header: EMPTY_UINT8,
+                        branches_keys: EMPTY_UINT8,
+                    } :
+                    makeBranchesFromBinaryData(input, i, compression_tag);
                i += branches_consumed_len_bytes;
                let whole;
                let suffix;
@@ -3175,10 +3478,11 @@ function loadDatabase(hooks) {
                    );
                    const clen = (
                        // lengths of children and data
-                        (is_data_compressed ? 2 : 3) +
+                        (is_data_compressed ? 1 : 2) +
                        // branches
+                        branches_header.length +
                        csnodes.length +
-                        csbranches.length +
+                        branches_keys.length +
                        // leaves
                        suffix.consumed_len_bytes
                    );
@@ -3192,15 +3496,15 @@ function loadDatabase(hooks) {
                    } else {
                        canonical[ci] = 1;
                        ci += 1;
-                        canonical[ci] = dlen | no_leaves_flag;
+                        canonical[ci] = dlen | no_leaves_flag | no_branches_flag;
                        ci += 1;
                    }
-                    canonical[ci] = input[coffset]; // suffix child count
-                    ci += 1;
+                    canonical.set(branches_header, ci);
+                    ci += branches_header.length;
                    canonical.set(csnodes, ci);
                    ci += csnodes.length;
-                    canonical.set(csbranches, ci);
-                    ci += csbranches.length;
+                    canonical.set(branches_keys, ci);
+                    ci += branches_keys.length;
                    const leavesOffset = i - suffix.consumed_len_bytes;
                    for (let j = leavesOffset; j < i; j += 1) {
                        canonical[ci + j - leavesOffset] = input[j];
@@ -3228,10 +3532,11 @@ function loadDatabase(hooks) {
                        suffix,
                    );
                    const clen = (
-                        4 + // lengths of children and data
+                        2 + // lengths of children and data
                        dlen +
+                        branches_header.length +
                        cpnodes.length + csnodes.length +
-                        cpbranches.length + csbranches.length +
+                        branches_keys.length +
                        whole.consumed_len_bytes +
                        suffix.consumed_len_bytes
                    );
@@ -3241,22 +3546,18 @@ function loadDatabase(hooks) {
                    let ci = 0;
                    canonical[ci] = 0;
                    ci += 1;
-                    canonical[ci] = dlen | no_leaves_flag;
+                    canonical[ci] = dlen | no_leaves_flag | no_branches_flag;
                    ci += 1;
                    canonical.set(data, ci);
                    ci += data.length;
-                    canonical[ci] = input[coffset]; // prefix child count
-                    ci += 1;
-                    canonical[ci] = input[coffset + 1]; // suffix child count
-                    ci += 1;
+                    canonical.set(branches_header, ci);
+                    ci += branches_header.length;
                    canonical.set(cpnodes, ci);
                    ci += cpnodes.length;
                    canonical.set(csnodes, ci);
                    ci += csnodes.length;
-                    canonical.set(cpbranches, ci);
-                    ci += cpbranches.length;
-                    canonical.set(csbranches, ci);
-                    ci += csbranches.length;
+                    canonical.set(branches_keys, ci);
+                    ci += branches_keys.length;
                    const leavesOffset = i - whole.consumed_len_bytes - suffix.consumed_len_bytes;
                    for (let j = leavesOffset; j < i; j += 1) {
                        canonical[ci + j - leavesOffset] = input[j];
@@ -3275,7 +3576,13 @@ function loadDatabase(hooks) {
                    consumed_len_bytes: branches_consumed_len_bytes,
                    branches,
                    might_have_prefix_branches,
-                } = makeBranchesFromBinaryData(input, i, compression_tag);
+                } = no_branches_flag !== 0 ?
+                    {
+                        consumed_len_bytes: 0,
+                        branches: EMPTY_SEARCH_TREE_BRANCHES,
+                        might_have_prefix_branches: EMPTY_SEARCH_TREE_BRANCHES,
+                    } :
+                    makeBranchesFromBinaryData(input, i, compression_tag);
                i += branches_consumed_len_bytes;
                let whole;
                let suffix;
@@ -3317,7 +3624,7 @@ function loadDatabase(hooks) {
                        suffix,
                    );
            }
-            hash[2] &= 0x7f;
+            hash[3] &= 0x7f;
            hash_history.push({hash: truncatedHash.slice(), used: false});
            if (data.length !== 0) {
                data_history.push(data);
@@ -3330,8 +3637,8 @@ function loadDatabase(hooks) {
                while (j < lb) {
                    // node id with a 1 in its most significant bit is inlined, and, so
                    // it won't be in the stash
-                    if ((tree_branch_nodeids[j * 6] & 0x80) === 0) {
-                        const subtree = stash.getWithOffsetKey(tree_branch_nodeids, j * 6);
+                    if ((tree_branch_nodeids[j * 5] & 0x80) === 0) {
+                        const subtree = stash.getWithOffsetKey(tree_branch_nodeids, j * 5);
                        if (subtree !== undefined) {
                            tree_branch_subtrees[j] = Promise.resolve(subtree);
                        }
@@ -3347,8 +3654,8 @@ function loadDatabase(hooks) {
                while (j < lb) {
                    // node id with a 1 in its most significant bit is inlined, and, so
                    // it won't be in the stash
-                    if ((tree_mhp_branch_nodeids[j * 6] & 0x80) === 0) {
-                        const subtree = stash.getWithOffsetKey(tree_mhp_branch_nodeids, j * 6);
+                    if ((tree_mhp_branch_nodeids[j * 5] & 0x80) === 0) {
+                        const subtree = stash.getWithOffsetKey(tree_mhp_branch_nodeids, j * 5);
                        if (subtree !== undefined) {
                            tree_mhp_branch_subtrees[j] = Promise.resolve(subtree);
                        }