From 949570f8016391b3182361c8fba34ae0eef3e154 Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Thu, 12 Mar 2026 19:01:39 -0700 Subject: [PATCH] rustdoc-search: update to stringdex 0.0.6 This update includes a few optimizations that reduce the size and index building time: - the wire format uses two bits to store four possibilities, instead of only handling three https://gitlab.com/notriddle/stringdex/-/merge_requests/34 - the hashes themselves are 40 bits instead of 48, and inlining is able to still fit enough data by storing runs https://gitlab.com/notriddle/stringdex/-/merge_requests/35 - scanning for duplicates takes advantage of the rarity of conflicts, using an array with 32 bit numbers and only pulling in the other 8 bits when actually needed https://gitlab.com/notriddle/stringdex/-/merge_requests/37 --- Cargo.lock | 4 +- src/librustdoc/Cargo.toml | 2 +- src/librustdoc/html/static/js/stringdex.js | 1069 +++++++++++++------- 3 files changed, 691 insertions(+), 384 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ba41143ff61d..2230ea023bcf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5372,9 +5372,9 @@ dependencies = [ [[package]] name = "stringdex" -version = "0.0.5" +version = "0.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07ab85c3f308f022ce6861ab57576b5b6ebc4835f9577e67e0f35f6c351e3f0a" +checksum = "155cb460a7ede06f71ac9961e28d3ba4b3408355e233f8edd158b957ceba3950" dependencies = [ "stacker", ] diff --git a/src/librustdoc/Cargo.toml b/src/librustdoc/Cargo.toml index 033d3ecdd03d..440d54b007a1 100644 --- a/src/librustdoc/Cargo.toml +++ b/src/librustdoc/Cargo.toml @@ -22,7 +22,7 @@ rustdoc-json-types = { path = "../rustdoc-json-types" } serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" smallvec = "1.8.1" -stringdex = "=0.0.5" +stringdex = "=0.0.6" tempfile = "3" threadpool = "1.8.1" tracing = "0.1" diff --git a/src/librustdoc/html/static/js/stringdex.js b/src/librustdoc/html/static/js/stringdex.js index 720ac58e76e0..f5384e0fd58a 100644 --- a/src/librustdoc/html/static/js/stringdex.js +++ b/src/librustdoc/html/static/js/stringdex.js @@ -55,21 +55,66 @@ class RoaringBitmap { this.consumed_len_bytes = pspecial - i; return this; } else if (u8array[i] > 0xe0) { - // Special representation of tiny sets that are runs - const lspecial = u8array[i] & 0x0f; - this.keysAndCardinalities = new Uint8Array(lspecial * 4); - i += 1; - const key = u8array[i + 2] | (u8array[i + 3] << 8); - const value = u8array[i] | (u8array[i + 1] << 8); - const container = new RoaringBitmapRun(1, new Uint8Array(4)); - container.array[0] = value & 0xFF; - container.array[1] = (value >> 8) & 0xFF; - container.array[2] = lspecial - 1; - this.containers.push(container); - this.keysAndCardinalities[0] = key & 0xFF; - this.keysAndCardinalities[1] = (key >> 8) & 0xFF; - this.keysAndCardinalities[2] = lspecial - 1; - this.consumed_len_bytes = 5; + // Special representation of a node with multiple runs + const run_count_m1 = (u8array[i] & 0x0f); + const run_count = run_count_m1 + 1; + this.keysAndCardinalities = new Uint8Array(run_count * 4); + // the run keys and values + let pspecial = i + 1; + // the run lengths + let pnspecial = pspecial + (run_count * 4); + /** @type {number|null} */ + let previous_key = null; + /** @type {RoaringBitmapRun|null} */ + let previous_container = null; + for (let j = 0; j < run_count; j += 1) { + const key = u8array[pspecial + 2] | (u8array[pspecial + 3] << 8); + const value = u8array[pspecial] | (u8array[pspecial + 1] << 8); + const run_length_m1 = j % 2 === 0 ? + (u8array[pnspecial] >> 4) : + (u8array[pnspecial] & 0x0f); + if (j % 2 !== 0) { + pnspecial += 1; + } + pspecial += 4; + if (key === previous_key && previous_container !== null) { + const new_container_array = new Uint8Array( + (previous_container.runcount + 1) * 4, + ); + new_container_array.set(previous_container.array); + new_container_array[previous_container.runcount * 4] = value & 0xFF; + new_container_array[(previous_container.runcount * 4) + 1] = + (value >> 8) & 0xFF; + new_container_array[(previous_container.runcount * 4) + 2] = + run_length_m1; + previous_container.array = new_container_array; + previous_container.runcount += 1; + let cardinalitym1 = + this.keysAndCardinalities[(this.containers.length * 4) - 2] | + (this.keysAndCardinalities[(this.containers.length * 4) - 1] << 8); + cardinalitym1 += run_length_m1 + 1; + this.keysAndCardinalities[(this.containers.length * 4) - 2] = + cardinalitym1 & 0xFF; + this.keysAndCardinalities[(this.containers.length * 4) - 1] = + (cardinalitym1 >> 8) & 0xFF; + } else { + previous_key = key; + previous_container = new RoaringBitmapRun(1, Uint8Array.of( + value & 0xFF, + (value >> 8) & 0xFF, + run_length_m1, + 0, + )); + this.containers.push(previous_container); + this.keysAndCardinalities[(this.containers.length * 4) - 4] = key & 0xFF; + this.keysAndCardinalities[(this.containers.length * 4) - 3] = (key >> 8) & 0xFF; + this.keysAndCardinalities[(this.containers.length * 4) - 2] = run_length_m1; + } + } + if (run_count % 2 !== 0) { + pnspecial += 1; + } + this.consumed_len_bytes = pnspecial - i; return this; } else if (u8array[i] > 0xd0) { // Special representation of tiny sets that are close together @@ -100,6 +145,26 @@ class RoaringBitmap { } this.consumed_len_bytes = pspecial - i; return this; + } else if (u8array[i] > 0x80) { + // Special representation of tiny sets that are runs + const lspecial = u8array[i] & 0x3f; + const lspecialm1 = lspecial - 1; + this.keysAndCardinalities = new Uint8Array(4); + i += 1; + const key = u8array[i + 2] | (u8array[i + 3] << 8); + const value = u8array[i] | (u8array[i + 1] << 8); + const container = new RoaringBitmapRun(1, new Uint8Array(4)); + container.array[0] = value & 0xFF; + container.array[1] = (value >> 8) & 0xFF; + container.array[2] = lspecialm1 & 0xFF; + container.array[3] = lspecialm1 >> 8; + this.containers.push(container); + this.keysAndCardinalities[0] = key & 0xFF; + this.keysAndCardinalities[1] = (key >> 8) & 0xFF; + this.keysAndCardinalities[2] = lspecialm1 & 0xFF; + this.keysAndCardinalities[3] = lspecialm1 >> 8; + this.consumed_len_bytes = 5; + return this; } else if (u8array[i] < 0x3a) { // Special representation of tiny sets with arbitrary 32-bit integers const lspecial = u8array[i]; @@ -801,7 +866,7 @@ class HashTable { for (let i = 0; i < l; i += 1) { const value = values[i]; if (value !== undefined) { - yield [keys.subarray(i * 6, (i + 1) * 6), value]; + yield [keys.subarray(i * 5, (i + 1) * 5), value]; } } } @@ -818,7 +883,7 @@ class HashTable { const l = values.length; this.capacityClass += 1; const capacity = 1 << this.capacityClass; - this.keys = new Uint8Array(capacity * 6); + this.keys = new Uint8Array(capacity * 5); this.values = []; for (let i = 0; i < capacity; i += 1) { this.values.push(undefined); @@ -827,7 +892,7 @@ class HashTable { for (let i = 0; i < l; i += 1) { const oldValue = values[i]; if (oldValue !== undefined) { - this.setNoGrow(keys, i * 6, oldValue); + this.setNoGrow(keys, i * 5, oldValue); } } } @@ -844,25 +909,24 @@ class HashTable { const values = this.values; const l = 1 << this.capacityClass; // because we know that our values are already hashed, - // just chop off the lower four bytes + // just chop off the first byte let slot = ( - (key[start + 2] << 24) | - (key[start + 3] << 16) | - (key[start + 4] << 8) | - key[start + 5] + (key[start + 1] << 24) | + (key[start + 2] << 16) | + (key[start + 3] << 8) | + key[start + 4] ) & mask; for (let distance = 0; distance < l; ) { - const j = slot * 6; + const j = slot * 5; const otherValue = values[slot]; if (otherValue === undefined) { values[slot] = value; - const keysStart = slot * 6; + const keysStart = slot * 5; keys[keysStart + 0] = key[start + 0]; keys[keysStart + 1] = key[start + 1]; keys[keysStart + 2] = key[start + 2]; keys[keysStart + 3] = key[start + 3]; keys[keysStart + 4] = key[start + 4]; - keys[keysStart + 5] = key[start + 5]; this.size += 1; break; } else if ( @@ -870,15 +934,14 @@ class HashTable { key[start + 1] === keys[j + 1] && key[start + 2] === keys[j + 2] && key[start + 3] === keys[j + 3] && - key[start + 4] === keys[j + 4] && - key[start + 5] === keys[j + 5] + key[start + 4] === keys[j + 4] ) { values[slot] = value; break; } else { const otherPreferredSlot = ( - (keys[j + 2] << 24) | (keys[j + 3] << 16) | - (keys[j + 4] << 8) | keys[j + 5] + (keys[j + 1] << 24) | (keys[j + 2] << 16) | + (keys[j + 3] << 8) | keys[j + 4] ) & mask; const otherDistance = otherPreferredSlot <= slot ? slot - otherPreferredSlot : @@ -888,7 +951,7 @@ class HashTable { // then insert our node in its place and swap // // https://cglab.ca/~abeinges/blah/robinhood-part-1/ - const otherKey = keys.slice(j, j + 6); + const otherKey = keys.slice(j, j + 5); values[slot] = value; value = otherValue; keys[j + 0] = key[start + 0]; @@ -896,7 +959,6 @@ class HashTable { keys[j + 2] = key[start + 2]; keys[j + 3] = key[start + 3]; keys[j + 4] = key[start + 4]; - keys[j + 5] = key[start + 5]; key = otherKey; start = 0; distance = otherDistance; @@ -912,7 +974,7 @@ class HashTable { * @returns {T|undefined} */ get(key) { - if (key.length !== 6) { + if (key.length !== 5) { throw "invalid key"; } return this.getWithOffsetKey(key, 0); @@ -931,13 +993,13 @@ class HashTable { // because we know that our values are already hashed, // just chop off the lower four bytes let slot = ( - (key[start + 2] << 24) | - (key[start + 3] << 16) | - (key[start + 4] << 8) | - key[start + 5] + (key[start + 1] << 24) | + (key[start + 2] << 16) | + (key[start + 3] << 8) | + key[start + 4] ) & mask; for (let distance = 0; distance < l; distance += 1) { - const j = slot * 6; + const j = slot * 5; const value = values[slot]; if (value === undefined) { break; @@ -946,14 +1008,13 @@ class HashTable { key[start + 1] === keys[j + 1] && key[start + 2] === keys[j + 2] && key[start + 3] === keys[j + 3] && - key[start + 4] === keys[j + 4] && - key[start + 5] === keys[j + 5] + key[start + 4] === keys[j + 4] ) { return value; } else { const otherPreferredSlot = ( - (keys[j + 2] << 24) | (keys[j + 3] << 16) | - (keys[j + 4] << 8) | keys[j + 5] + (keys[j + 1] << 24) | (keys[j + 2] << 16) | + (keys[j + 3] << 8) | keys[j + 4] ) & mask; const otherDistance = otherPreferredSlot <= slot ? slot - otherPreferredSlot : @@ -1133,61 +1194,56 @@ function loadDatabase(hooks) { dataColumns: new Map(), dataColumnsBuckets: new HashTable(), searchTreeLoadByNodeID: function(nodeid) { - const existingPromise = registry.searchTreePromises.get(nodeid); - if (existingPromise) { - return existingPromise; - } /** @type {Promise} */ let newPromise; if ((nodeid[0] & 0x80) !== 0) { - const isWhole = (nodeid[0] & 0x40) !== 0; - let leaves; - if ((nodeid[0] & 0x10) !== 0) { - let id1 = (nodeid[2] << 8) | nodeid[3]; - if ((nodeid[0] & 0x20) !== 0) { - // when data is present, id1 can be up to 20 bits - id1 |= ((nodeid[1] & 0x0f) << 16); - } else { - // otherwise, we fit in 28 - id1 |= ((nodeid[0] & 0x0f) << 24) | (nodeid[1] << 16); - } - const id2 = id1 + ((nodeid[4] << 8) | nodeid[5]); - leaves = RoaringBitmap.makeSingleton(id1) - .union(RoaringBitmap.makeSingleton(id2)); - } else if (!isWhole && (nodeid[0] & 0xf0) === 0x80) { - const id1 = ((nodeid[0] & 0x0f) << 16) | (nodeid[1] << 8) | nodeid[2]; - const id2 = id1 + ((nodeid[3] << 4) | ((nodeid[4] >> 4) & 0x0f)); - const id3 = id2 + (((nodeid[4] & 0x0f) << 8) | nodeid[5]); - leaves = RoaringBitmap.makeSingleton(id1) - .union(RoaringBitmap.makeSingleton(id2)) - .union(RoaringBitmap.makeSingleton(id3)); + const isSuffixOnly = (nodeid[0] & 0x40) !== 0; + const isRun = (nodeid[0] & 0x20) !== 0; + const lengthOrData = nodeid[0] & 0x1F; + const id = (nodeid[1] << 24) | (nodeid[2] << 16) | (nodeid[3] << 8) | nodeid[4]; + let bitmap; + if (isRun) { + bitmap = new RoaringBitmap(null); + bitmap.containers.push(new RoaringBitmapRun( + 1, + Uint8Array.of( + id & 0xFF, + (id >> 8) & 0xFF, + lengthOrData, + 0, + ), + )); + bitmap.keysAndCardinalities = Uint8Array.of( + (id >> 16) & 0xff, + (id >> 24) & 0xff, + lengthOrData, + 0, + ); } else { - leaves = RoaringBitmap.makeSingleton( - (nodeid[2] << 24) | (nodeid[3] << 16) | - (nodeid[4] << 8) | nodeid[5], + bitmap = RoaringBitmap.makeSingleton(id); + } + let tree; + if (isSuffixOnly) { + tree = new SuffixSearchTree( + EMPTY_SEARCH_TREE_BRANCHES, + isRun ? 0 : (lengthOrData + 1), + bitmap, + ); + } else { + tree = new PrefixSearchTree( + EMPTY_SEARCH_TREE_BRANCHES, + EMPTY_SEARCH_TREE_BRANCHES, + isRun ? EMPTY_UINT8 : Uint8Array.of(LONG_ALPHABET.chars[lengthOrData]), + bitmap, + EMPTY_BITMAP, ); } - if (isWhole) { - const data = (nodeid[0] & 0x20) !== 0 ? - Uint8Array.of(((nodeid[0] & 0x0f) << 4) | (nodeid[1] >> 4)) : - EMPTY_UINT8; - newPromise = Promise.resolve(new PrefixSearchTree( - EMPTY_SEARCH_TREE_BRANCHES, - EMPTY_SEARCH_TREE_BRANCHES, - data, - leaves, - EMPTY_BITMAP, - )); - } else { - const data = (nodeid[0] & 0xf0) === 0x80 ? 0 : ( - ((nodeid[0] & 0x0f) << 4) | (nodeid[1] >> 4)); - newPromise = Promise.resolve(new SuffixSearchTree( - EMPTY_SEARCH_TREE_BRANCHES, - data, - leaves, - )); - } + newPromise = Promise.resolve(tree); } else { + const existingPromise = registry.searchTreePromises.get(nodeid); + if (existingPromise) { + return existingPromise; + } const hashHex = makeHexFromUint8Array(nodeid); newPromise = new Promise((resolve, reject) => { const cb = registry.searchTreeLoadPromiseCallbacks.get(nodeid); @@ -1211,8 +1267,8 @@ function loadDatabase(hooks) { hooks.loadTreeByHash(hashHex); } }); + registry.searchTreePromises.set(nodeid, newPromise); } - registry.searchTreePromises.set(nodeid, newPromise); return newPromise; }, dataLoadByNameAndHash: function(name, hash) { @@ -1277,8 +1333,8 @@ function loadDatabase(hooks) { getNodeID(i) { return new Uint8Array( this.nodeids.buffer, - this.nodeids.byteOffset + (i * 6), - 6, + this.nodeids.byteOffset + (i * 5), + 5, ); } // https://github.com/microsoft/TypeScript/issues/17227 @@ -1385,113 +1441,310 @@ function loadDatabase(hooks) { EMPTY_UINT8, ); - /** @type {number[]} */ - const SHORT_ALPHABITMAP_CHARS = []; + class Alphabet { + constructor() { + /** @type {number[]} */ + this.chars = []; + /** @type {number} */ + this.len = 0; + /** @type {number} */ + this.bytes = 0; + /** @type {number} */ + this.flag = 0; + /** @type {number} */ + this.bitwidth = 0; + } + /** + * @param {number} c + * @returns {boolean} + */ + contains(c) { + return this.chars.indexOf(c) !== -1; + } + /** + * @param {number} c + * @returns {number} + */ + index(c) { + return this.chars.indexOf(c); + } + } + + /** @type {Alphabet} */ + const VOWELONLY_ALPHABITMAP = Object.assign( + new Alphabet(), + { + chars: [0x61, 0x65, 0x69, 0x6f, 0x75], + len: 5, + bytes: 0, + flag: 0x80, + }, + ); + + /** @type {Alphabet} */ + const CONSONANTSONLY_ALPHABET = Object.assign( + new Alphabet(), + { + chars: [], + len: 21, + bytes: 2, + flag: 0xc0, + }, + ); + for (let i = 0x61; i <= 0x7A; ++i) { + if (i === 0x61 || i === 0x65 || i === 0x69 || i === 0x6f || i === 0x75) { + // 21 bits, 26 letters, so skip aeiou + continue; + } + CONSONANTSONLY_ALPHABET.chars.push(i); + } + + /** @type {Alphabet} */ + const HEX_ALPHABET = Object.assign( + new Alphabet(), + { + chars: [], + len: 16, + bytes: 2, + flag: 0xfc, + }, + ); + for (let i = 0x30; i <= 0x39; ++i) { + HEX_ALPHABET.chars.push(i); + } + for (let i = 0x61; i <= 0x66; ++i) { + HEX_ALPHABET.chars.push(i); + } + + /** @type {Alphabet} */ + const SHORT_ALPHABET = Object.assign( + new Alphabet(), + { + chars: [], + len: 24, + bytes: 3, + flag: 0xfd, + }, + ); for (let i = 0x61; i <= 0x7A; ++i) { if (i === 0x76 || i === 0x71) { // 24 entries, 26 letters, so we skip q and v continue; } - SHORT_ALPHABITMAP_CHARS.push(i); + SHORT_ALPHABET.chars.push(i); } - /** @type {number[]} */ - const LONG_ALPHABITMAP_CHARS = [0x31, 0x32, 0x33, 0x34, 0x35, 0x36]; + /** @type {Alphabet} */ + const LONG_ALPHABET = Object.assign( + new Alphabet(), + { + chars: [0x31, 0x32, 0x33, 0x34, 0x35, 0x36], + len: 32, + bytes: 4, + flag: 0xfe, + }, + ); for (let i = 0x61; i <= 0x7A; ++i) { - LONG_ALPHABITMAP_CHARS.push(i); + LONG_ALPHABET.chars.push(i); + } + + /** @type {Alphabet} */ + const ASCII_ALPHABET = Object.assign( + new Alphabet(), + { + chars: [], + len: 128, + bytes: 16, + flag: 0xf0, + /** + * @param {number} c + * @returns {boolean} + */ + contains(c) { + return c <= 0x7f; + }, + /** + * @param {number} c + * @returns {number} + */ + index(c) { + return c; + }, + }, + ); + for (let i = 0x00; i <= 0x7f; ++i) { + ASCII_ALPHABET.chars.push(i); + } + + /** @type {Alphabet} */ + const RAWBYTE_ALPHABET = Object.assign( + new Alphabet(), + { + chars: [], + len: 256, + bytes: 32, + flag: 0xff, + /** + * @param {number} _c + * @returns {boolean} + */ + contains(_c) { + return true; + }, + /** + * @param {number} c + * @returns {number} + */ + index(c) { + return c; + }, + }, + ); + for (let i = 0x00; i <= 0xff; ++i) { + RAWBYTE_ALPHABET.chars.push(i); } /** - * @template ST - * @param {number[]} alphabitmap_chars - * @param {number} width - * @return {(typeof SearchTreeBranches)&{"ALPHABITMAP_CHARS": number[], "width": number}} + * Parse an alphabet and buffer where the flag is right at the beginning. + * @param {number} start + * @param {Uint8Array} buf + * @returns {{"alphabet": Alphabet, "consumed_len_bytes": number, "len": number}?} */ - function makeSearchTreeBranchesAlphaBitmapClass(alphabitmap_chars, width) { - const bitwidth = width * 8; + Alphabet.parse = function(start, buf) { + const flag = buf[start]; + const parsed = Alphabet.parseFlag(flag, start + 1, buf); + if (!parsed) { + return null; + } + parsed.consumed_len_bytes += 1; + return parsed; + }; + + /** + * Parse an alphabet and buffer where the flag is not at the beginning. + * @param {number} flag + * @param {number} i + * @param {Uint8Array} buf + * @returns {{"alphabet": Alphabet, "consumed_len_bytes": number, "len": number}?} + */ + Alphabet.parseFlag = function(flag, i, buf) { + if (flag <= 0x80) { + return null; + } + const alphabet = flag === RAWBYTE_ALPHABET.flag ? RAWBYTE_ALPHABET : ( + flag === ASCII_ALPHABET.flag ? ASCII_ALPHABET : ( + flag === LONG_ALPHABET.flag ? LONG_ALPHABET : ( + flag === SHORT_ALPHABET.flag ? SHORT_ALPHABET : ( + flag === HEX_ALPHABET.flag ? HEX_ALPHABET : ( + flag >= CONSONANTSONLY_ALPHABET.flag ? CONSONANTSONLY_ALPHABET : VOWELONLY_ALPHABITMAP + ))))); + let len = alphabet === CONSONANTSONLY_ALPHABET || alphabet === VOWELONLY_ALPHABITMAP ? + bitCount(flag & 0x1f) : 0; + for (let ix = 0; ix < alphabet.bytes; ++ix) { + len += bitCount(buf[i]); + i += 1; + } + return {alphabet, consumed_len_bytes: alphabet.bytes, len}; + }; + + /** + * @template ST + * @extends SearchTreeBranches + */ + class SearchTreeBranchesAlphaBitmap extends SearchTreeBranches { /** - * @extends SearchTreeBranches + * @param {Alphabet} alphabet + * @param {Uint8Array} buffer + * @param {Uint8Array} nodeids */ - const cls = class SearchTreeBranchesAlphaBitmap extends SearchTreeBranches { - /** - * @param {number} bitmap - * @param {Uint8Array} nodeids - */ - constructor(bitmap, nodeids) { - super(nodeids.length / 6, nodeids); - if (nodeids.length / 6 !== bitCount(bitmap)) { - throw new Error(`mismatch ${bitmap} ${nodeids}`); + constructor(alphabet, buffer, nodeids) { + let bitmap; + if (alphabet === VOWELONLY_ALPHABITMAP) { + bitmap = new Uint8Array(1); + bitmap[0] = buffer[0] & 0x1f; + } else if (alphabet === CONSONANTSONLY_ALPHABET) { + bitmap = new Uint8Array(3); + bitmap[0] = buffer[1]; + bitmap[1] = buffer[2]; + bitmap[2] = buffer[0] & 0x1f; + } else { + bitmap = buffer.subarray(1); + } + let cardinality = 0; + for (let i = 0; i < bitmap.length; ++i) { + cardinality += bitCount(bitmap[i]); + } + super(cardinality, nodeids); + this.bitmap = bitmap; + this.alphabet = alphabet; + } + /** + * Yields [character, SearchTree] pairs. + * @returns {Generator<[number, Promise|null]>} + */ + * entries() { + let i = 0; + let j = 0; + while (i < this.alphabet.len) { + if (this.bitmap[i >> 3] & (1 << (i & 0x07))) { + yield [this.alphabet.chars[i], this.subtrees[j]]; + j += 1; } - this.bitmap = bitmap; - this.nodeids = nodeids; + i += 1; } - /** @returns {Generator<[number, Promise|null]>} */ - * entries() { - let i = 0; - let j = 0; - while (i < bitwidth) { - if (this.bitmap & (1 << i)) { - yield [alphabitmap_chars[i], this.subtrees[j]]; - j += 1; - } - i += 1; - } + } + /** + * Given a character, returns the numbered index of the search + * tree, or -1 if there isn't one. + * @param {number} c + * @returns {number} + */ + getIndex(c) { + //return this.getKeys().indexOf(c); + if (!this.alphabet.contains(c)) { + return -1; } - /** - * @param {number} k - * @returns {number} - */ - getIndex(k) { - //return this.getKeys().indexOf(k); - const ix = alphabitmap_chars.indexOf(k); - if (ix < 0) { - return ix; - } - const result = bitCount(~(0xffffffff << ix) & this.bitmap); - return result >= this.subtrees.length ? -1 : result; - } - /** - * @param {number} branch_index - * @returns {number} - */ - getKey(branch_index) { - return this.getKeys()[branch_index]; - } - /** - * @returns {Uint8Array} - */ - getKeys() { - const length = bitCount(this.bitmap); - const result = new Uint8Array(length); - let result_index = 0; - for (let alpha_index = 0; alpha_index < bitwidth; ++alpha_index) { - if (this.bitmap & (1 << alpha_index)) { - result[result_index] = alphabitmap_chars[alpha_index]; - result_index += 1; - } + const k = this.alphabet.index(c); + if (this.bitmap[k >> 3] & (1 << (k & 0x07))) { + let result = bitCount(~(0xff << (k & 0x07)) & this.bitmap[k >> 3]); + for (let ix = 0; ix < (k >> 3); ++ix) { + result += bitCount(this.bitmap[ix]); } return result; + } else { + return -1; } - }; - cls.ALPHABITMAP_CHARS = alphabitmap_chars; - cls.width = width; - return cls; + } + /** + * Given the numbered index of a search tree, returns the key. + * This is the exact opposite of getIndex(). + * @param {number} branch_index + * @returns {number} + */ + getKey(branch_index) { + return this.getKeys()[branch_index]; + } + /** + * Returns a list of one-byte keys. + * @returns {Uint8Array} + */ + getKeys() { + let length = 0; + for (let i = 0; i < this.bitmap.length; ++i) { + length += bitCount(this.bitmap[i]); + } + const result = new Uint8Array(length); + let result_index = 0; + for (let ix = 0; ix < this.alphabet.len; ++ix) { + if (this.bitmap[ix >> 3] & (1 << (ix & 0x07))) { + result[result_index] = this.alphabet.chars[ix]; + result_index += 1; + } + } + return result; + } } - /** - * @template ST - * @type {(typeof SearchTreeBranches)&{"ALPHABITMAP_CHARS": number[], "width": number}} - */ - const SearchTreeBranchesShortAlphaBitmap = - makeSearchTreeBranchesAlphaBitmapClass(SHORT_ALPHABITMAP_CHARS, 3); - - /** - * @template ST - * @type {(typeof SearchTreeBranches)&{"ALPHABITMAP_CHARS": number[], "width": number}} - */ - const SearchTreeBranchesLongAlphaBitmap = - makeSearchTreeBranchesAlphaBitmapClass(LONG_ALPHABITMAP_CHARS, 4); - /** * @typedef {PrefixSearchTree|SuffixSearchTree|InlineNeighborsTree} SearchTree * @typedef {PrefixTrie|SuffixTrie} Trie @@ -2347,19 +2600,15 @@ function loadDatabase(hooks) { const has_branches = (encoded[i] & 0x04) !== 0; /** @type {boolean} */ const is_suffixes_only = (encoded[i] & 0x01) !== 0; - let leaves_count = ((encoded[i] >> 4) & 0x07) + 1; - let leaves_is_run = (encoded[i] >> 7) !== 0; + const leaves_count = (encoded[i] >> 4) & 0x07; + const leaves_is_run = (encoded[i] >> 7) !== 0; i += 1; - let branch_count = 0; + let branch_flag = 0; if (has_branches) { - branch_count = encoded[i] + 1; + branch_flag = encoded[i]; i += 1; } const dlen = encoded[i] & 0x3f; - if ((encoded[i] & 0x80) !== 0) { - leaves_count = 0; - leaves_is_run = false; - } i += 1; /** @type {Uint8Array} */ let data = EMPTY_UINT8; @@ -2367,13 +2616,25 @@ function loadDatabase(hooks) { data = encoded.subarray(i, i + dlen); i += dlen; } + const branch_flag_alphabet = Alphabet.parseFlag(branch_flag, i, encoded); + let branch_alphabitmap = EMPTY_UINT8; + if (branch_flag_alphabet) { + branch_alphabitmap = new Uint8Array(branch_flag_alphabet.consumed_len_bytes + 1); + branch_alphabitmap[0] = branch_flag; + branch_alphabitmap.set( + encoded.subarray(i, i + branch_flag_alphabet.consumed_len_bytes), + 1, + ); + i += branch_flag_alphabet.consumed_len_bytes; + } + const branch_count = branch_flag_alphabet ? branch_flag_alphabet.len : branch_flag; const leaf_value_upper = encoded[i] | (encoded[i + 1] << 8); i += 2; /** @type {Promise[]} */ const branch_nodes = []; for (let j = 0; j < branch_count; j += 1) { const branch_dlen = encoded[i] & 0x0f; - const branch_leaves_count = ((encoded[i] >> 4) & 0x07) + 1; + const branch_leaves_count = (encoded[i] >> 4) & 0x07; const branch_leaves_is_run = (encoded[i] >> 7) !== 0; i += 1; /** @type {Uint8Array} */ @@ -2427,14 +2688,22 @@ function loadDatabase(hooks) { ), )); } - /** @type {SearchTreeBranchesArray} */ + /** @type {SearchTreeBranches} */ const branches = branch_count === 0 ? EMPTY_SEARCH_TREE_BRANCHES : - new SearchTreeBranchesArray( - encoded.subarray(i, i + branch_count), - EMPTY_UINT8, - ); - i += branch_count; + branch_flag_alphabet ? + new SearchTreeBranchesAlphaBitmap( + branch_flag_alphabet.alphabet, + branch_alphabitmap, + EMPTY_UINT8, + ) : + new SearchTreeBranchesArray( + encoded.subarray(i, i + branch_count), + EMPTY_UINT8, + ); + if (!branch_flag_alphabet) { + i += branch_count; + } branches.subtrees = branch_nodes; let leaves = EMPTY_BITMAP; if (leaves_count !== 0) { @@ -2556,7 +2825,7 @@ function loadDatabase(hooks) { k += 1; } const end = k; - const bucket = {hash: hashes.subarray(i * 6, (i + 1) * 6), data: null, end, count}; + const bucket = {hash: hashes.subarray(i * 5, (i + 1) * 5), data: null, end, count}; this.buckets.push(bucket); this.bucket_keys.push(start); } @@ -2698,7 +2967,7 @@ function loadDatabase(hooks) { */ function loadColumnFromBytes(data) { const hashBuf = Uint8Array.of(0, 0, 0, 0, 0, 0, 0, 0); - const truncatedHash = hashBuf.subarray(2, 8); + const truncatedHash = hashBuf.subarray(3, 8); siphashOfBytes(data, 0, 0, 0, 0, hashBuf); const cb = registry.dataColumnLoadPromiseCallbacks.get(truncatedHash); if (cb) { @@ -2744,7 +3013,7 @@ function loadDatabase(hooks) { /** @type {HashTable} */ const stash = new HashTable(); const hash = Uint8Array.of(0, 0, 0, 0, 0, 0, 0, 0); - const truncatedHash = new Uint8Array(hash.buffer, 2, 6); + const truncatedHash = hash.subarray(3, 8); // used for handling compressed (that is, relative-offset) nodes /** @type {{hash: Uint8Array, used: boolean}[]} */ const hash_history = []; @@ -2764,12 +3033,12 @@ function loadDatabase(hooks) { * @param {number} i * @param {number} compression_tag * @returns {{ - * "cpbranches": Uint8Array, - * "csbranches": Uint8Array, * "might_have_prefix_branches": SearchTreeBranches, * "branches": SearchTreeBranches, + * "branches_header": Uint8Array, * "cpnodes": Uint8Array, * "csnodes": Uint8Array, + * "branches_keys": Uint8Array, * "consumed_len_bytes": number, * }} */ @@ -2786,49 +3055,63 @@ function loadDatabase(hooks) { const any_children_are_compressed = (compression_tag & 0xF0) !== 0x00 || is_long_compressed; const start_point = i; - let cplen; - let cslen; /** - * @type {( - * typeof SearchTreeBranches & - * {"ALPHABITMAP_CHARS": number[], "width": number} - * )?} + * @type {Alphabet|null} */ - let alphabitmap = null; + let cpalphabet = null; + /** + * @type {Uint8Array} + */ + let cpalphabitmap = EMPTY_UINT8; + /** + * @type {number} + */ + let cplen; + /** + * @type {Alphabet|null} + */ + let csalphabet = null; + /** + * @type {Uint8Array} + */ + let csalphabitmap = EMPTY_UINT8; + /** + * @type {number} + */ + let cslen; + // might-have-prefix nodes if (is_pure_suffixes_only_node) { cplen = 0; - cslen = input[i]; - i += 1; - if (cslen >= 0xc0) { - alphabitmap = SearchTreeBranchesLongAlphaBitmap; - cslen = cslen & 0x3F; - } else if (cslen >= 0x80) { - alphabitmap = SearchTreeBranchesShortAlphaBitmap; - cslen = cslen & 0x7F; - } } else { - cplen = input[i]; - i += 1; - cslen = input[i]; - i += 1; - if (cplen === 0xff && cslen === 0xff) { - cplen = 0x100; - cslen = 0; - } else if (cplen >= 0xc0 && cslen >= 0xc0) { - alphabitmap = SearchTreeBranchesLongAlphaBitmap; - cplen = cplen & 0x3F; - cslen = cslen & 0x3F; - } else if (cplen >= 0x80 && cslen >= 0x80) { - alphabitmap = SearchTreeBranchesShortAlphaBitmap; - cplen = cplen & 0x7F; - cslen = cslen & 0x7F; + const parsed = Alphabet.parse(i, input); + if (parsed) { + cpalphabitmap = input.subarray(i, i + parsed.consumed_len_bytes); + cpalphabet = parsed.alphabet; + cplen = parsed.len; + i += parsed.consumed_len_bytes; + } else { + cplen = input[i]; + i += 1; } } + // suffix-only nodes + const parsed = Alphabet.parse(i, input); + if (parsed) { + csalphabitmap = input.subarray(i, i + parsed.consumed_len_bytes); + csalphabet = parsed.alphabet; + cslen = parsed.len; + i += parsed.consumed_len_bytes; + } else { + cslen = input[i]; + i += 1; + } + const branches_header = input.subarray(start_point, i); + // now process the hashes, offsets, or stack let j = 0; /** @type {Uint8Array} */ let cpnodes; if (any_children_are_compressed) { - cpnodes = cplen === 0 ? EMPTY_UINT8 : new Uint8Array(cplen * 6); + cpnodes = cplen === 0 ? EMPTY_UINT8 : new Uint8Array(cplen * 5); while (j < cplen) { const is_compressed = all_children_are_compressed || ((0x10 << j) & compression_tag) !== 0; @@ -2845,29 +3128,28 @@ function loadDatabase(hooks) { hash_history[slot].used = true; cpnodes.set( hash_history[slot].hash, - j * 6, + j * 5, ); } else { - const joff = j * 6; + const joff = j * 5; cpnodes[joff + 0] = input[i + 0]; cpnodes[joff + 1] = input[i + 1]; cpnodes[joff + 2] = input[i + 2]; cpnodes[joff + 3] = input[i + 3]; cpnodes[joff + 4] = input[i + 4]; - cpnodes[joff + 5] = input[i + 5]; - i += 6; + i += 5; } j += 1; } } else { - cpnodes = cplen === 0 ? EMPTY_UINT8 : input.subarray(i, i + (cplen * 6)); - i += cplen * 6; + cpnodes = cplen === 0 ? EMPTY_UINT8 : input.subarray(i, i + (cplen * 5)); + i += cplen * 5; } j = 0; /** @type {Uint8Array} */ let csnodes; if (any_children_are_compressed) { - csnodes = cslen === 0 ? EMPTY_UINT8 : new Uint8Array(cslen * 6); + csnodes = cslen === 0 ? EMPTY_UINT8 : new Uint8Array(cslen * 5); while (j < cslen) { const is_compressed = all_children_are_compressed || ((0x10 << (cplen + j)) & compression_tag) !== 0; @@ -2884,138 +3166,146 @@ function loadDatabase(hooks) { hash_history[slot].used = true; csnodes.set( hash_history[slot].hash, - j * 6, + j * 5, ); } else { - const joff = j * 6; + const joff = j * 5; csnodes[joff + 0] = input[i + 0]; csnodes[joff + 1] = input[i + 1]; csnodes[joff + 2] = input[i + 2]; csnodes[joff + 3] = input[i + 3]; csnodes[joff + 4] = input[i + 4]; - csnodes[joff + 5] = input[i + 5]; - i += 6; + i += 5; } j += 1; } } else { - csnodes = cslen === 0 ? EMPTY_UINT8 : input.subarray(i, i + (cslen * 6)); - i += cslen * 6; + csnodes = cslen === 0 ? EMPTY_UINT8 : input.subarray(i, i + (cslen * 5)); + i += cslen * 5; } - let cpbranches; + const start_point_keys = i; let might_have_prefix_branches; if (cplen === 0) { - cpbranches = EMPTY_UINT8; might_have_prefix_branches = EMPTY_SEARCH_TREE_BRANCHES; - } else if (alphabitmap) { - cpbranches = new Uint8Array(input.buffer, i + input.byteOffset, alphabitmap.width); - const branchset = (alphabitmap.width === 4 ? (input[i + 3] << 24) : 0) | - (input[i + 2] << 16) | - (input[i + 1] << 8) | - input[i]; - might_have_prefix_branches = new alphabitmap(branchset, cpnodes); - i += alphabitmap.width; + } else if (cpalphabet) { + might_have_prefix_branches = new SearchTreeBranchesAlphaBitmap( + cpalphabet, + cpalphabitmap, + cpnodes, + ); } else { - cpbranches = new Uint8Array(input.buffer, i + input.byteOffset, cplen); - might_have_prefix_branches = new SearchTreeBranchesArray(cpbranches, cpnodes); + might_have_prefix_branches = new SearchTreeBranchesArray( + new Uint8Array(input.buffer, i + input.byteOffset, cplen), + cpnodes, + ); i += cplen; } - let csbranches; let branches; if (cslen === 0) { - csbranches = EMPTY_UINT8; branches = might_have_prefix_branches; - } else if (alphabitmap) { - csbranches = new Uint8Array(input.buffer, i + input.byteOffset, alphabitmap.width); - const branchset = (alphabitmap.width === 4 ? (input[i + 3] << 24) : 0) | - (input[i + 2] << 16) | - (input[i + 1] << 8) | - input[i]; - if (cplen === 0) { - branches = new alphabitmap(branchset, csnodes); - } else { - const cpoffset = i - alphabitmap.width; - const cpbranchset = - (alphabitmap.width === 4 ? (input[cpoffset + 3] << 24) : 0) | - (input[cpoffset + 2] << 16) | - (input[cpoffset + 1] << 8) | - input[cpoffset]; - const hashes = new Uint8Array((cplen + cslen) * 6); - let cpi = 0; - let csi = 0; - let j = 0; - for (let k = 0; k < alphabitmap.ALPHABITMAP_CHARS.length; k += 1) { - if (branchset & (1 << k)) { - hashes[j + 0] = csnodes[csi + 0]; - hashes[j + 1] = csnodes[csi + 1]; - hashes[j + 2] = csnodes[csi + 2]; - hashes[j + 3] = csnodes[csi + 3]; - hashes[j + 4] = csnodes[csi + 4]; - hashes[j + 5] = csnodes[csi + 5]; - j += 6; - csi += 6; - } else if (cpbranchset & (1 << k)) { - hashes[j + 0] = cpnodes[cpi + 0]; - hashes[j + 1] = cpnodes[cpi + 1]; - hashes[j + 2] = cpnodes[cpi + 2]; - hashes[j + 3] = cpnodes[cpi + 3]; - hashes[j + 4] = cpnodes[cpi + 4]; - hashes[j + 5] = cpnodes[cpi + 5]; - j += 6; - cpi += 6; - } - } - branches = new alphabitmap(branchset | cpbranchset, hashes); - } - i += alphabitmap.width; } else { - csbranches = new Uint8Array(input.buffer, i + input.byteOffset, cslen); - if (cplen === 0) { - branches = new SearchTreeBranchesArray(csbranches, csnodes); + if (csalphabet) { + branches = new SearchTreeBranchesAlphaBitmap( + csalphabet, + csalphabitmap, + csnodes, + ); } else { - const branchset = new Uint8Array(cplen + cslen); - const hashes = new Uint8Array((cplen + cslen) * 6); - let cpi = 0; - let csi = 0; - let j = 0; - while (cpi < cplen || csi < cslen) { - if (cpi >= cplen || (csi < cslen && cpbranches[cpi] > csbranches[csi])) { - branchset[j] = csbranches[csi]; - const joff = j * 6; - const csioff = csi * 6; - hashes[joff + 0] = csnodes[csioff + 0]; - hashes[joff + 1] = csnodes[csioff + 1]; - hashes[joff + 2] = csnodes[csioff + 2]; - hashes[joff + 3] = csnodes[csioff + 3]; - hashes[joff + 4] = csnodes[csioff + 4]; - hashes[joff + 5] = csnodes[csioff + 5]; - csi += 1; - } else { - branchset[j] = cpbranches[cpi]; - const joff = j * 6; - const cpioff = cpi * 6; - hashes[joff + 0] = cpnodes[cpioff + 0]; - hashes[joff + 1] = cpnodes[cpioff + 1]; - hashes[joff + 2] = cpnodes[cpioff + 2]; - hashes[joff + 3] = cpnodes[cpioff + 3]; - hashes[joff + 4] = cpnodes[cpioff + 4]; - hashes[joff + 5] = cpnodes[cpioff + 5]; - cpi += 1; - } - j += 1; - } - branches = new SearchTreeBranchesArray(branchset, hashes); + branches = new SearchTreeBranchesArray( + new Uint8Array(input.buffer, i + input.byteOffset, cslen), + csnodes, + ); + i += cslen; + } + if (cplen !== 0) { + const hashes = new Uint8Array((cplen + cslen) * 5); + if (cplen + cslen > 32) { + const raw_bits = new Uint8Array(RAWBYTE_ALPHABET.bytes + 1); + raw_bits[0] = RAWBYTE_ALPHABET.flag; + const bits = raw_bits.subarray(1); + const mhp_keys = might_have_prefix_branches.getKeys(); + const so_keys = branches.getKeys(); + let mhp_i = 0; + let so_i = 0; + let j = 0; + while (mhp_i < cplen || so_i < cslen) { + if (so_i === cslen || mhp_keys[mhp_i] < so_keys[so_i]) { + const joff = j * 5; + const mhp_off = mhp_i * 5; + hashes[joff + 0] = cpnodes[mhp_off + 0]; + hashes[joff + 1] = cpnodes[mhp_off + 1]; + hashes[joff + 2] = cpnodes[mhp_off + 2]; + hashes[joff + 3] = cpnodes[mhp_off + 3]; + hashes[joff + 4] = cpnodes[mhp_off + 4]; + const ix = mhp_keys[mhp_i]; + bits[ix >> 3] |= 1 << (ix & 0x07); + mhp_i += 1; + } else { + const joff = j * 5; + const so_off = so_i * 5; + hashes[joff + 0] = csnodes[so_off + 0]; + hashes[joff + 1] = csnodes[so_off + 1]; + hashes[joff + 2] = csnodes[so_off + 2]; + hashes[joff + 3] = csnodes[so_off + 3]; + hashes[joff + 4] = csnodes[so_off + 4]; + const ix = so_keys[so_i]; + bits[ix >> 3] |= 1 << (ix & 0x07); + so_i += 1; + } + j += 1; + } + branches = new SearchTreeBranchesAlphaBitmap( + RAWBYTE_ALPHABET, + raw_bits, + hashes, + ); + } else { + const merged_keys = new Uint8Array(cplen + cslen); + const mhp_keys = might_have_prefix_branches.getKeys(); + const so_keys = branches.getKeys(); + let mhp_i = 0; + let so_i = 0; + let j = 0; + while (mhp_i < cplen || so_i < cslen) { + if (so_i === cslen || mhp_keys[mhp_i] < so_keys[so_i]) { + const joff = j * 5; + const mhp_off = mhp_i * 5; + hashes[joff + 0] = cpnodes[mhp_off + 0]; + hashes[joff + 1] = cpnodes[mhp_off + 1]; + hashes[joff + 2] = cpnodes[mhp_off + 2]; + hashes[joff + 3] = cpnodes[mhp_off + 3]; + hashes[joff + 4] = cpnodes[mhp_off + 4]; + merged_keys[j] = mhp_keys[mhp_i]; + mhp_i += 1; + } else { + const joff = j * 5; + const so_off = so_i * 5; + hashes[joff + 0] = csnodes[so_off + 0]; + hashes[joff + 1] = csnodes[so_off + 1]; + hashes[joff + 2] = csnodes[so_off + 2]; + hashes[joff + 3] = csnodes[so_off + 3]; + hashes[joff + 4] = csnodes[so_off + 4]; + merged_keys[j] = so_keys[so_i]; + so_i += 1; + } + j += 1; + } + branches = new SearchTreeBranchesArray( + merged_keys, + hashes, + ); + } } - i += cslen; } + const branches_keys = input.subarray(start_point_keys, i); return { consumed_len_bytes: i - start_point, - cpbranches, - csbranches, cpnodes, csnodes, branches, might_have_prefix_branches, + branches_header, + branches_keys, }; } while (i < l) { @@ -3038,26 +3328,23 @@ function loadDatabase(hooks) { /** @type {number} */ let no_leaves_flag; /** @type {number} */ - let inline_neighbors_flag; + let no_branches_flag; if (is_data_compressed && is_pure_suffixes_only_node) { dlen = 0; no_leaves_flag = 0x80; - inline_neighbors_flag = 0; + no_branches_flag = 0; } else { dlen = input[i] & 0x3F; no_leaves_flag = input[i] & 0x80; - inline_neighbors_flag = input[i] & 0x40; + no_branches_flag = input[i] & 0x40; i += 1; } - if (inline_neighbors_flag !== 0) { + if (no_leaves_flag !== 0 && no_branches_flag !== 0) { // node with packed leaves and common 16bit prefix - const leaves_count = no_leaves_flag !== 0 ? - 0 : - ((compression_tag >> 4) & 0x07) + 1; - const leaves_is_run = no_leaves_flag === 0 && - ((compression_tag >> 4) & 0x08) !== 0; - const branch_count = is_long_compressed ? - ((compression_tag >> 8) & 0xff) + 1 : + const leaves_count = (compression_tag >> 4) & 0x07; + const leaves_is_run = ((compression_tag >> 4) & 0x08) !== 0; + const branch_flag = is_long_compressed ? + (compression_tag >> 8) & 0xff : 0; if (is_data_compressed) { data = data_history[data_history.length - dlen - 1]; @@ -3071,12 +3358,17 @@ function loadDatabase(hooks) { i += dlen; } const branches_start = i; + const branch_flag_alphabet = Alphabet.parseFlag(branch_flag, i, input); + const branch_count = branch_flag_alphabet ? branch_flag_alphabet.len : branch_flag; + if (branch_flag_alphabet) { + i += branch_flag_alphabet.consumed_len_bytes; + } // leaf_value_upper i += 2; // branch_nodes for (let j = 0; j < branch_count; j += 1) { const branch_dlen = input[i] & 0x0f; - const branch_leaves_count = ((input[i] >> 4) & 0x0f) + 1; + const branch_leaves_count = (input[i] >> 4) & 0x0f; const branch_leaves_is_run = (input[i] >> 7) !== 0; i += 1; if (!is_pure_suffixes_only_node) { @@ -3089,7 +3381,9 @@ function loadDatabase(hooks) { } } // branch keys - i += branch_count; + if (!branch_flag_alphabet) { + i += branch_count; + } // leaves if (leaves_is_run) { i += 2; @@ -3099,7 +3393,7 @@ function loadDatabase(hooks) { if (is_data_compressed) { const clen = ( 1 + // first compression header byte - (is_long_compressed ? 1 : 0) + // branch count + (is_long_compressed ? 1 : 0) + // branch flag 1 + // data length and other flags dlen + // data (i - branches_start) // branches and leaves @@ -3112,7 +3406,7 @@ function loadDatabase(hooks) { canonical[ci] = input[start + ci]; ci += 1; } - canonical[ci] = dlen | no_leaves_flag | 0x40; + canonical[ci] = dlen | 0xc0; ci += 1; for (let j = 0; j < dlen; j += 1) { canonical[ci] = data[j]; @@ -3145,16 +3439,25 @@ function loadDatabase(hooks) { new Uint8Array(input.buffer, i + input.byteOffset, dlen); i += dlen; } - const coffset = i; const { - cpbranches, - csbranches, cpnodes, csnodes, consumed_len_bytes: branches_consumed_len_bytes, branches, might_have_prefix_branches, - } = makeBranchesFromBinaryData(input, i, compression_tag); + branches_header, + branches_keys, + } = no_branches_flag !== 0 ? + { + cpnodes: EMPTY_UINT8, + csnodes: EMPTY_UINT8, + consumed_len_bytes: 0, + branches: EMPTY_SEARCH_TREE_BRANCHES, + might_have_prefix_branches: EMPTY_SEARCH_TREE_BRANCHES, + branches_header: EMPTY_UINT8, + branches_keys: EMPTY_UINT8, + } : + makeBranchesFromBinaryData(input, i, compression_tag); i += branches_consumed_len_bytes; let whole; let suffix; @@ -3175,10 +3478,11 @@ function loadDatabase(hooks) { ); const clen = ( // lengths of children and data - (is_data_compressed ? 2 : 3) + + (is_data_compressed ? 1 : 2) + // branches + branches_header.length + csnodes.length + - csbranches.length + + branches_keys.length + // leaves suffix.consumed_len_bytes ); @@ -3192,15 +3496,15 @@ function loadDatabase(hooks) { } else { canonical[ci] = 1; ci += 1; - canonical[ci] = dlen | no_leaves_flag; + canonical[ci] = dlen | no_leaves_flag | no_branches_flag; ci += 1; } - canonical[ci] = input[coffset]; // suffix child count - ci += 1; + canonical.set(branches_header, ci); + ci += branches_header.length; canonical.set(csnodes, ci); ci += csnodes.length; - canonical.set(csbranches, ci); - ci += csbranches.length; + canonical.set(branches_keys, ci); + ci += branches_keys.length; const leavesOffset = i - suffix.consumed_len_bytes; for (let j = leavesOffset; j < i; j += 1) { canonical[ci + j - leavesOffset] = input[j]; @@ -3228,10 +3532,11 @@ function loadDatabase(hooks) { suffix, ); const clen = ( - 4 + // lengths of children and data + 2 + // lengths of children and data dlen + + branches_header.length + cpnodes.length + csnodes.length + - cpbranches.length + csbranches.length + + branches_keys.length + whole.consumed_len_bytes + suffix.consumed_len_bytes ); @@ -3241,22 +3546,18 @@ function loadDatabase(hooks) { let ci = 0; canonical[ci] = 0; ci += 1; - canonical[ci] = dlen | no_leaves_flag; + canonical[ci] = dlen | no_leaves_flag | no_branches_flag; ci += 1; canonical.set(data, ci); ci += data.length; - canonical[ci] = input[coffset]; // prefix child count - ci += 1; - canonical[ci] = input[coffset + 1]; // suffix child count - ci += 1; + canonical.set(branches_header, ci); + ci += branches_header.length; canonical.set(cpnodes, ci); ci += cpnodes.length; canonical.set(csnodes, ci); ci += csnodes.length; - canonical.set(cpbranches, ci); - ci += cpbranches.length; - canonical.set(csbranches, ci); - ci += csbranches.length; + canonical.set(branches_keys, ci); + ci += branches_keys.length; const leavesOffset = i - whole.consumed_len_bytes - suffix.consumed_len_bytes; for (let j = leavesOffset; j < i; j += 1) { canonical[ci + j - leavesOffset] = input[j]; @@ -3275,7 +3576,13 @@ function loadDatabase(hooks) { consumed_len_bytes: branches_consumed_len_bytes, branches, might_have_prefix_branches, - } = makeBranchesFromBinaryData(input, i, compression_tag); + } = no_branches_flag !== 0 ? + { + consumed_len_bytes: 0, + branches: EMPTY_SEARCH_TREE_BRANCHES, + might_have_prefix_branches: EMPTY_SEARCH_TREE_BRANCHES, + } : + makeBranchesFromBinaryData(input, i, compression_tag); i += branches_consumed_len_bytes; let whole; let suffix; @@ -3317,7 +3624,7 @@ function loadDatabase(hooks) { suffix, ); } - hash[2] &= 0x7f; + hash[3] &= 0x7f; hash_history.push({hash: truncatedHash.slice(), used: false}); if (data.length !== 0) { data_history.push(data); @@ -3330,8 +3637,8 @@ function loadDatabase(hooks) { while (j < lb) { // node id with a 1 in its most significant bit is inlined, and, so // it won't be in the stash - if ((tree_branch_nodeids[j * 6] & 0x80) === 0) { - const subtree = stash.getWithOffsetKey(tree_branch_nodeids, j * 6); + if ((tree_branch_nodeids[j * 5] & 0x80) === 0) { + const subtree = stash.getWithOffsetKey(tree_branch_nodeids, j * 5); if (subtree !== undefined) { tree_branch_subtrees[j] = Promise.resolve(subtree); } @@ -3347,8 +3654,8 @@ function loadDatabase(hooks) { while (j < lb) { // node id with a 1 in its most significant bit is inlined, and, so // it won't be in the stash - if ((tree_mhp_branch_nodeids[j * 6] & 0x80) === 0) { - const subtree = stash.getWithOffsetKey(tree_mhp_branch_nodeids, j * 6); + if ((tree_mhp_branch_nodeids[j * 5] & 0x80) === 0) { + const subtree = stash.getWithOffsetKey(tree_mhp_branch_nodeids, j * 5); if (subtree !== undefined) { tree_mhp_branch_subtrees[j] = Promise.resolve(subtree); }