mirror of
https://github.com/rust-lang/rust.git
synced 2026-05-08 01:28:18 +03:00
Auto merge of #70499 - Dylan-DPC:rollup-f9je1l8, r=Dylan-DPC
Rollup of 5 pull requests Successful merges: - #70418 (Add long error explanation for E0703) - #70448 (Create output dir in rustdoc markdown render) - #70486 (Shrink Unicode tables (even more)) - #70493 (Fix rustdoc.css CSS tab-size property) - #70495 (Replace last mention of IRC with Discord) Failed merges: r? @ghost
This commit is contained in:
@@ -32,28 +32,3 @@ pub mod derived_property {
|
||||
pub use unicode_data::n::lookup as N;
|
||||
pub use unicode_data::uppercase::lookup as Uppercase;
|
||||
pub use unicode_data::white_space::lookup as White_Space;
|
||||
|
||||
#[inline(always)]
|
||||
fn range_search<const N: usize, const N1: usize, const N2: usize>(
|
||||
needle: u32,
|
||||
chunk_idx_map: &[u8; N],
|
||||
(last_chunk_idx, last_chunk_mapping): (u16, u8),
|
||||
bitset_chunk_idx: &[[u8; 16]; N1],
|
||||
bitset: &[u64; N2],
|
||||
) -> bool {
|
||||
let bucket_idx = (needle / 64) as usize;
|
||||
let chunk_map_idx = bucket_idx / 16;
|
||||
let chunk_piece = bucket_idx % 16;
|
||||
let chunk_idx = if chunk_map_idx >= N {
|
||||
if chunk_map_idx == last_chunk_idx as usize {
|
||||
last_chunk_mapping
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
chunk_idx_map[chunk_map_idx]
|
||||
};
|
||||
let idx = bitset_chunk_idx[(chunk_idx as usize)][chunk_piece];
|
||||
let word = bitset[(idx as usize)];
|
||||
(word & (1 << (needle % 64) as u64)) != 0
|
||||
}
|
||||
|
||||
+443
-514
@@ -1,618 +1,547 @@
|
||||
///! This file is generated by src/tools/unicode-table-generator; do not edit manually!
|
||||
use super::range_search;
|
||||
|
||||
#[inline(always)]
|
||||
fn bitset_search<
|
||||
const N: usize,
|
||||
const CHUNK_SIZE: usize,
|
||||
const N1: usize,
|
||||
const CANONICAL: usize,
|
||||
const CANONICALIZED: usize,
|
||||
>(
|
||||
needle: u32,
|
||||
chunk_idx_map: &[u8; N],
|
||||
bitset_chunk_idx: &[[u8; CHUNK_SIZE]; N1],
|
||||
bitset_canonical: &[u64; CANONICAL],
|
||||
bitset_canonicalized: &[(u8, u8); CANONICALIZED],
|
||||
) -> bool {
|
||||
let bucket_idx = (needle / 64) as usize;
|
||||
let chunk_map_idx = bucket_idx / CHUNK_SIZE;
|
||||
let chunk_piece = bucket_idx % CHUNK_SIZE;
|
||||
let chunk_idx = if let Some(&v) = chunk_idx_map.get(chunk_map_idx) {
|
||||
v
|
||||
} else {
|
||||
return false;
|
||||
};
|
||||
let idx = bitset_chunk_idx[chunk_idx as usize][chunk_piece] as usize;
|
||||
let word = if let Some(word) = bitset_canonical.get(idx) {
|
||||
*word
|
||||
} else {
|
||||
let (real_idx, mapping) = bitset_canonicalized[idx - bitset_canonical.len()];
|
||||
let mut word = bitset_canonical[real_idx as usize];
|
||||
let should_invert = mapping & (1 << 6) != 0;
|
||||
if should_invert {
|
||||
word = !word;
|
||||
}
|
||||
// Lower 6 bits
|
||||
let quantity = mapping & ((1 << 6) - 1);
|
||||
if mapping & (1 << 7) != 0 {
|
||||
// shift
|
||||
word >>= quantity as u64;
|
||||
} else {
|
||||
word = word.rotate_left(quantity as u32);
|
||||
}
|
||||
word
|
||||
};
|
||||
(word & (1 << (needle % 64) as u64)) != 0
|
||||
}
|
||||
|
||||
fn decode_prefix_sum(short_offset_run_header: u32) -> u32 {
|
||||
short_offset_run_header & ((1 << 21) - 1)
|
||||
}
|
||||
|
||||
fn decode_length(short_offset_run_header: u32) -> usize {
|
||||
(short_offset_run_header >> 21) as usize
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn skip_search<const SOR: usize, const OFFSETS: usize>(
|
||||
needle: u32,
|
||||
short_offset_runs: &[u32; SOR],
|
||||
offsets: &[u8; OFFSETS],
|
||||
) -> bool {
|
||||
// Note that this *cannot* be past the end of the array, as the last
|
||||
// element is greater than std::char::MAX (the largest possible needle).
|
||||
//
|
||||
// So, we cannot have found it (i.e. Ok(idx) + 1 != length) and the correct
|
||||
// location cannot be past it, so Err(idx) != length either.
|
||||
//
|
||||
// This means that we can avoid bounds checking for the accesses below, too.
|
||||
let last_idx =
|
||||
match short_offset_runs.binary_search_by_key(&(needle << 11), |header| header << 11) {
|
||||
Ok(idx) => idx + 1,
|
||||
Err(idx) => idx,
|
||||
};
|
||||
|
||||
let mut offset_idx = decode_length(short_offset_runs[last_idx]);
|
||||
let length = if let Some(next) = short_offset_runs.get(last_idx + 1) {
|
||||
decode_length(*next) - offset_idx
|
||||
} else {
|
||||
offsets.len() - offset_idx
|
||||
};
|
||||
let prev =
|
||||
last_idx.checked_sub(1).map(|prev| decode_prefix_sum(short_offset_runs[prev])).unwrap_or(0);
|
||||
|
||||
let total = needle - prev;
|
||||
let mut prefix_sum = 0;
|
||||
for _ in 0..(length - 1) {
|
||||
let offset = offsets[offset_idx];
|
||||
prefix_sum += offset as u32;
|
||||
if prefix_sum > total {
|
||||
break;
|
||||
}
|
||||
offset_idx += 1;
|
||||
}
|
||||
offset_idx % 2 == 1
|
||||
}
|
||||
|
||||
pub const UNICODE_VERSION: (u32, u32, u32) = (13, 0, 0);
|
||||
|
||||
#[rustfmt::skip]
|
||||
pub mod alphabetic {
|
||||
static BITSET_LAST_CHUNK_MAP: (u16, u8) = (196, 44);
|
||||
static BITSET_CHUNKS_MAP: [u8; 196] = [
|
||||
6, 32, 10, 18, 19, 23, 21, 12, 7, 5, 0, 20, 14, 50, 50, 50, 50, 50, 50, 37, 50, 50, 50, 50,
|
||||
50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 49, 50, 30, 8, 50, 50, 50, 50,
|
||||
50, 50, 50, 50, 50, 50, 46, 0, 0, 0, 0, 0, 0, 0, 0, 4, 36, 17, 31, 16, 25, 24, 26, 13, 15,
|
||||
45, 27, 0, 0, 50, 11, 0, 0, 0, 40, 0, 0, 0, 0, 0, 0, 0, 0, 39, 1, 50, 50, 50, 50, 50, 48,
|
||||
50, 34, 0, 0, 0, 0, 0, 0, 0, 0, 35, 0, 0, 28, 0, 0, 0, 0, 0, 29, 0, 0, 9, 0, 33, 2, 3, 0, 0,
|
||||
0, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
|
||||
50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 42, 50, 50, 50,
|
||||
43, 22, 50, 50, 50, 50, 41, 50, 50, 50, 50, 50, 50, 47, 0, 0, 0, 38, 0, 50, 50, 50, 50,
|
||||
static SHORT_OFFSET_RUNS: [u32; 52] = [
|
||||
706, 33559113, 868226669, 947920662, 1157637302, 1306536960, 1310732293, 1398813696,
|
||||
1449151936, 1451270141, 1455465613, 1459660301, 1468061604, 1648425216, 1658911342,
|
||||
1661009214, 1707147904, 1793132343, 1853951616, 1994464256, 2330009312, 2418090906,
|
||||
2428579840, 2439066671, 2441167872, 2443265607, 2445371392, 2447469113, 2449567296,
|
||||
2476836856, 2508295382, 2512498688, 2518790431, 2520888060, 2533473280, 2535576576,
|
||||
2556548774, 2634145792, 2682380992, 2715936768, 2720132608, 2736910640, 2875326464,
|
||||
2887952094, 2890053429, 2894253730, 2902649825, 2906847232, 2908944926, 2911043584,
|
||||
2913145675, 2916356939,
|
||||
];
|
||||
static BITSET_INDEX_CHUNKS: [[u8; 16]; 51] = [
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 254, 0, 0, 254, 247, 39, 68],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 111, 135, 113, 0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 0, 195, 205, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 0, 254, 254, 254, 254, 254, 210, 254, 25, 136, 251, 71, 243],
|
||||
[0, 0, 182, 52, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 107, 103, 180, 254, 254, 254, 254, 254, 254, 254, 61, 0, 155, 222, 181],
|
||||
[0, 148, 30, 0, 172, 226, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[48, 80, 254, 169, 206, 123, 189, 139, 95, 179, 145, 86, 211, 204, 254, 56],
|
||||
[53, 0, 0, 0, 129, 17, 0, 0, 0, 0, 0, 58, 0, 0, 0, 0],
|
||||
[59, 54, 185, 203, 171, 191, 161, 117, 158, 87, 164, 118, 162, 67, 159, 23],
|
||||
[62, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[95, 131, 168, 105, 254, 254, 254, 82, 254, 254, 254, 254, 236, 130, 137, 120],
|
||||
[101, 0, 225, 146, 151, 2, 217, 45, 144, 246, 32, 101, 0, 0, 0, 0],
|
||||
[119, 253, 224, 175, 193, 254, 227, 195, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[143, 190, 91, 0, 153, 218, 24, 0, 0, 0, 0, 92, 0, 0, 66, 0],
|
||||
[150, 94, 37, 85, 102, 0, 157, 0, 88, 122, 31, 46, 89, 74, 20, 0],
|
||||
[154, 34, 254, 110, 0, 84, 0, 0, 0, 0, 233, 19, 216, 108, 237, 21],
|
||||
[166, 42, 165, 72, 167, 177, 126, 76, 109, 16, 127, 38, 1, 192, 124, 0],
|
||||
[176, 246, 234, 174, 254, 254, 254, 254, 254, 235, 140, 241, 240, 26, 228, 128],
|
||||
[213, 239, 254, 77, 209, 64, 142, 238, 63, 0, 0, 0, 0, 0, 0, 0],
|
||||
[225, 101, 207, 89, 98, 81, 208, 10, 232, 83, 147, 1, 188, 13, 178, 70],
|
||||
[237, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254],
|
||||
[253, 254, 254, 254, 254, 254, 254, 254, 254, 214, 231, 99, 79, 78, 183, 27],
|
||||
[254, 6, 100, 50, 75, 90, 254, 28, 134, 0, 202, 51, 163, 43, 0, 0],
|
||||
[254, 9, 75, 75, 49, 0, 0, 0, 0, 0, 69, 0, 199, 6, 195, 93],
|
||||
[254, 41, 254, 8, 0, 0, 141, 33, 145, 4, 97, 0, 55, 0, 0, 0],
|
||||
[254, 62, 254, 254, 254, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[254, 121, 36, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[254, 242, 170, 252, 138, 245, 254, 254, 254, 254, 220, 173, 186, 212, 219, 14],
|
||||
[254, 254, 15, 132, 254, 254, 254, 254, 57, 149, 254, 65, 223, 254, 249, 187],
|
||||
[254, 254, 196, 114, 201, 44, 0, 0, 254, 254, 254, 254, 95, 47, 0, 0],
|
||||
[254, 254, 250, 254, 194, 229, 156, 73, 230, 215, 254, 152, 246, 248, 71, 104],
|
||||
[254, 254, 254, 5, 254, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[254, 254, 254, 22, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[254, 254, 254, 254, 37, 200, 254, 254, 254, 254, 254, 116, 0, 0, 0, 0],
|
||||
[254, 254, 254, 254, 133, 246, 244, 112, 0, 184, 254, 125, 106, 221, 145, 29],
|
||||
[254, 254, 254, 254, 254, 254, 254, 0, 254, 254, 254, 254, 254, 254, 254, 254],
|
||||
[254, 254, 254, 254, 254, 254, 254, 254, 35, 0, 0, 0, 0, 0, 0, 0],
|
||||
[254, 254, 254, 254, 254, 254, 254, 254, 101, 37, 0, 60, 65, 160, 18, 0],
|
||||
[254, 254, 254, 254, 254, 254, 254, 254, 254, 7, 0, 0, 0, 0, 0, 0],
|
||||
[254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 197, 254, 254, 254, 254, 254],
|
||||
[254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 35, 254, 254, 254, 254],
|
||||
[254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 84, 254, 254, 254],
|
||||
[254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 11, 0, 0],
|
||||
[254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 25, 0],
|
||||
[254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 198, 115],
|
||||
[254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 40],
|
||||
[254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 96],
|
||||
[254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 125],
|
||||
[254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254],
|
||||
static OFFSETS: [u8; 1391] = [
|
||||
65, 26, 6, 26, 47, 1, 10, 1, 4, 1, 5, 23, 1, 31, 1, 0, 4, 12, 14, 5, 7, 1, 1, 1, 86, 1, 42,
|
||||
5, 1, 2, 2, 4, 1, 1, 6, 1, 1, 3, 1, 1, 1, 20, 1, 83, 1, 139, 8, 166, 1, 38, 2, 1, 6, 41, 39,
|
||||
14, 1, 1, 1, 2, 1, 2, 1, 1, 8, 27, 4, 4, 29, 11, 5, 56, 1, 7, 14, 102, 1, 8, 4, 8, 4, 3, 10,
|
||||
3, 2, 1, 16, 48, 13, 101, 24, 33, 9, 2, 4, 1, 5, 24, 2, 19, 19, 25, 7, 11, 53, 21, 1, 18,
|
||||
12, 12, 3, 7, 6, 76, 1, 16, 1, 3, 4, 15, 13, 19, 1, 8, 2, 2, 2, 22, 1, 7, 1, 1, 3, 4, 3, 8,
|
||||
2, 2, 2, 2, 1, 1, 8, 1, 4, 2, 1, 5, 12, 2, 10, 1, 4, 3, 1, 6, 4, 2, 2, 22, 1, 7, 1, 2, 1, 2,
|
||||
1, 2, 4, 5, 4, 2, 2, 2, 4, 1, 7, 4, 1, 1, 17, 6, 11, 3, 1, 9, 1, 3, 1, 22, 1, 7, 1, 2, 1, 5,
|
||||
3, 9, 1, 3, 1, 2, 3, 1, 15, 4, 21, 4, 4, 3, 1, 8, 2, 2, 2, 22, 1, 7, 1, 2, 1, 5, 3, 8, 2, 2,
|
||||
2, 2, 9, 2, 4, 2, 1, 5, 13, 1, 16, 2, 1, 6, 3, 3, 1, 4, 3, 2, 1, 1, 1, 2, 3, 2, 3, 3, 3, 12,
|
||||
4, 5, 3, 3, 1, 3, 3, 1, 6, 1, 40, 4, 1, 8, 1, 3, 1, 23, 1, 16, 3, 8, 1, 3, 1, 3, 8, 2, 1, 3,
|
||||
5, 4, 28, 4, 1, 8, 1, 3, 1, 23, 1, 10, 1, 5, 3, 8, 1, 3, 1, 3, 8, 2, 7, 1, 1, 4, 13, 2, 13,
|
||||
13, 1, 3, 1, 41, 2, 8, 1, 3, 1, 3, 1, 1, 5, 4, 7, 5, 22, 6, 1, 3, 1, 18, 3, 24, 1, 9, 1, 1,
|
||||
2, 7, 8, 6, 1, 1, 1, 8, 18, 2, 13, 58, 5, 7, 6, 1, 51, 2, 1, 1, 1, 5, 1, 24, 1, 1, 1, 19, 1,
|
||||
3, 2, 5, 1, 1, 6, 1, 14, 4, 32, 1, 63, 8, 1, 36, 4, 17, 6, 16, 1, 36, 67, 55, 1, 1, 2, 5,
|
||||
16, 64, 10, 4, 2, 38, 1, 1, 5, 1, 2, 43, 1, 0, 1, 4, 2, 7, 1, 1, 1, 4, 2, 41, 1, 4, 2, 33,
|
||||
1, 4, 2, 7, 1, 1, 1, 4, 2, 15, 1, 57, 1, 4, 2, 67, 37, 16, 16, 86, 2, 6, 3, 0, 2, 17, 1, 26,
|
||||
5, 75, 3, 11, 7, 13, 1, 6, 12, 20, 12, 20, 12, 13, 1, 3, 1, 2, 12, 52, 2, 19, 14, 1, 4, 1,
|
||||
67, 89, 7, 43, 5, 70, 10, 31, 1, 12, 4, 9, 23, 30, 2, 5, 11, 44, 4, 26, 54, 28, 4, 63, 2,
|
||||
20, 50, 1, 23, 2, 63, 52, 1, 15, 1, 7, 52, 42, 2, 4, 10, 44, 1, 11, 14, 55, 22, 3, 10, 36,
|
||||
2, 9, 7, 43, 2, 3, 41, 4, 1, 6, 1, 2, 3, 1, 5, 192, 39, 14, 11, 0, 2, 6, 2, 38, 2, 6, 2, 8,
|
||||
1, 1, 1, 1, 1, 1, 1, 31, 2, 53, 1, 7, 1, 1, 3, 3, 1, 7, 3, 4, 2, 6, 4, 13, 5, 3, 1, 7, 116,
|
||||
1, 13, 1, 16, 13, 101, 1, 4, 1, 2, 10, 1, 1, 3, 5, 6, 1, 1, 1, 1, 1, 1, 4, 1, 11, 2, 4, 5,
|
||||
5, 4, 1, 17, 41, 0, 52, 0, 47, 1, 47, 1, 133, 6, 4, 3, 2, 12, 38, 1, 1, 5, 1, 2, 56, 7, 1,
|
||||
16, 23, 9, 7, 1, 7, 1, 7, 1, 7, 1, 7, 1, 7, 1, 7, 1, 7, 1, 32, 47, 1, 0, 3, 25, 9, 7, 5, 2,
|
||||
5, 4, 86, 6, 3, 1, 90, 1, 4, 5, 43, 1, 94, 17, 32, 48, 16, 0, 0, 64, 0, 3, 0, 67, 46, 2, 0,
|
||||
3, 16, 10, 2, 20, 47, 5, 8, 3, 113, 39, 9, 2, 103, 2, 53, 2, 9, 42, 17, 1, 33, 24, 52, 12,
|
||||
68, 1, 1, 44, 6, 3, 1, 1, 3, 10, 33, 5, 35, 13, 29, 3, 51, 1, 12, 15, 1, 16, 16, 10, 5, 1,
|
||||
55, 9, 14, 18, 23, 3, 69, 1, 1, 1, 1, 24, 3, 2, 16, 2, 4, 11, 6, 2, 6, 2, 6, 9, 7, 1, 7, 1,
|
||||
43, 1, 14, 6, 123, 21, 0, 12, 23, 4, 49, 0, 0, 2, 106, 38, 7, 12, 5, 5, 12, 1, 13, 1, 5, 1,
|
||||
1, 1, 2, 1, 2, 1, 108, 33, 0, 18, 64, 2, 54, 40, 12, 116, 5, 1, 135, 36, 26, 6, 26, 11, 89,
|
||||
3, 6, 2, 6, 2, 6, 2, 3, 35, 12, 1, 26, 1, 19, 1, 2, 1, 15, 2, 14, 34, 123, 69, 53, 0, 29, 3,
|
||||
49, 47, 32, 13, 30, 5, 43, 5, 30, 2, 36, 4, 8, 1, 5, 42, 158, 18, 36, 4, 36, 4, 40, 8, 52,
|
||||
156, 0, 9, 22, 10, 8, 152, 6, 2, 1, 1, 44, 1, 2, 3, 1, 2, 23, 10, 23, 9, 31, 65, 19, 1, 2,
|
||||
10, 22, 10, 26, 70, 56, 6, 2, 64, 4, 1, 2, 5, 8, 1, 3, 1, 29, 42, 29, 3, 29, 35, 8, 1, 28,
|
||||
27, 54, 10, 22, 10, 19, 13, 18, 110, 73, 55, 51, 13, 51, 13, 40, 0, 42, 1, 2, 3, 2, 78, 29,
|
||||
10, 1, 8, 22, 106, 21, 27, 23, 9, 70, 60, 55, 23, 25, 23, 51, 17, 4, 8, 35, 3, 1, 9, 64, 1,
|
||||
4, 9, 2, 10, 1, 1, 1, 35, 18, 1, 34, 2, 1, 6, 1, 65, 7, 1, 1, 1, 4, 1, 15, 1, 10, 7, 57, 23,
|
||||
4, 1, 8, 2, 2, 2, 22, 1, 7, 1, 2, 1, 5, 3, 8, 2, 2, 2, 2, 3, 1, 6, 1, 5, 7, 156, 66, 1, 3,
|
||||
1, 4, 20, 3, 30, 66, 2, 2, 1, 1, 184, 54, 2, 7, 25, 6, 34, 63, 1, 1, 3, 1, 59, 54, 2, 1, 71,
|
||||
27, 2, 14, 213, 57, 103, 64, 31, 8, 2, 1, 2, 8, 1, 2, 1, 30, 1, 2, 2, 2, 2, 4, 93, 8, 2, 46,
|
||||
2, 6, 1, 1, 1, 2, 27, 51, 2, 10, 17, 72, 5, 1, 34, 57, 0, 9, 1, 45, 1, 7, 1, 1, 49, 30, 2,
|
||||
22, 1, 14, 73, 7, 1, 2, 1, 44, 3, 1, 1, 2, 1, 3, 1, 1, 2, 2, 24, 6, 1, 2, 1, 37, 1, 2, 1, 4,
|
||||
1, 1, 0, 23, 185, 1, 79, 0, 102, 111, 17, 196, 0, 0, 0, 0, 0, 0, 7, 31, 113, 30, 18, 48, 16,
|
||||
4, 31, 21, 5, 19, 0, 64, 128, 75, 4, 57, 7, 17, 64, 2, 1, 1, 12, 2, 14, 0, 8, 0, 42, 9, 0,
|
||||
0, 49, 3, 17, 4, 8, 0, 0, 107, 5, 13, 3, 9, 7, 10, 4, 1, 0, 85, 1, 71, 1, 2, 2, 1, 2, 2, 2,
|
||||
4, 1, 12, 1, 1, 1, 7, 1, 65, 1, 4, 2, 8, 1, 7, 1, 28, 1, 4, 1, 5, 1, 1, 3, 7, 1, 0, 2, 25,
|
||||
1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 8, 0, 7, 1, 17, 2, 7, 1,
|
||||
2, 1, 5, 213, 45, 10, 7, 16, 1, 0, 44, 0, 197, 59, 68, 3, 1, 3, 1, 0, 4, 1, 27, 1, 2, 1, 1,
|
||||
2, 1, 1, 10, 1, 4, 1, 1, 1, 1, 6, 1, 4, 1, 1, 1, 1, 1, 1, 3, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 2, 1, 1, 2, 4, 1, 7, 1, 4, 1, 4, 1, 1, 1, 10, 1, 17, 5, 3, 1, 5, 1, 17, 0,
|
||||
26, 6, 26, 6, 26, 0, 0, 34, 0, 11, 222, 2, 0, 14, 0, 0, 0, 0, 0, 0,
|
||||
];
|
||||
static BITSET: [u64; 255] = [
|
||||
0, 1, 7, 15, 17, 31, 63, 127, 179, 511, 1023, 2047, 2191, 4079, 4087, 8191, 8319, 16384,
|
||||
65535, 131071, 262143, 4128527, 4194303, 8461767, 24870911, 67108863, 134217727, 276824575,
|
||||
335593502, 486341884, 536805376, 536870911, 553648127, 1056964608, 1073692671, 1073741823,
|
||||
1140785663, 2147483647, 4026540127, 4294934783, 8589934591, 15032387515, 64548249055,
|
||||
68191066527, 68719476735, 115913785343, 137438953215, 1095220854783, 1099511627711,
|
||||
1099511627775, 2199023190016, 2199023255551, 4398046511103, 8641373536127, 8791831609343,
|
||||
8795690369023, 8796093022207, 13198434443263, 17592186044415, 35184321757183,
|
||||
70368744112128, 88094074470339, 140737488355327, 140737488355328, 141836999983103,
|
||||
281474976710655, 281474976710656, 563017343310239, 844472174772224, 875211255709695,
|
||||
1125625028935679, 1125899906842623, 1688915364814303, 2119858418286774, 2251795522912255,
|
||||
2251799813685247, 3377704004976767, 3509778554814463, 3905461007941631, 4503595333443583,
|
||||
4503599627370495, 8796093022142464, 9006649498927104, 9007192812290047, 9007199254740991,
|
||||
15762594400829440, 17169970223906821, 17732925109967239, 18014398491652207,
|
||||
18014398509481983, 20266198323101936, 36027697507139583, 36028792723996672,
|
||||
36028792723996703, 36028792728190975, 36028797018963967, 72057594037927935,
|
||||
90071992547409919, 143851303137705983, 144053615424700415, 144115188075855868,
|
||||
144115188075855871, 288230371860938751, 297241973452963840, 301749971126844416,
|
||||
319718190147960832, 576460743713488896, 576460743847706622, 576460752303359999,
|
||||
576460752303423486, 576460752303423487, 790380184120328175, 1152640029630136575,
|
||||
1152917029519358975, 1152921504591118335, 1152921504606845055, 1152921504606846975,
|
||||
1153765996922689951, 2161727885562420159, 2251241253188403424, 2295745090394464220,
|
||||
2305570330330005503, 2305843004918726656, 2305843004919250943, 2305843009196916483,
|
||||
2305843009213693951, 3457638613854978030, 4323455298678290390, 4557642822898941951,
|
||||
4575692405780512767, 4611686017001275199, 4611686018360336384, 4611686018427322368,
|
||||
4611686018427387903, 4656722014700830719, 6843210385291930244, 6881498031078244479,
|
||||
6908521828386340863, 8935141660164089791, 8935423131384840192, 9168765891372858879,
|
||||
9169328841326329855, 9187201948305063935, 9187343239835811327, 9216616637413720063,
|
||||
9223372036854775807, 9223372041149743103, 9223372586610589696, 9223934986808197120,
|
||||
10371930679322607615, 10502394331027995967, 11078855083321979519, 11241233151490523135,
|
||||
13006395723845991295, 13258596753222922239, 13609596598936928288, 13834776580305453567,
|
||||
13907115649320091647, 14082190885810440174, 14123225865944680428, 16212958624174047247,
|
||||
16412803692974677999, 16424062692043104238, 16424062692043104239, 16424062692043243502,
|
||||
16424625641996804079, 16429129241624174575, 16717361816799141887, 16717361816799216127,
|
||||
16788293510930366511, 17005555242810474495, 17293822569102704639, 17581979622616071300,
|
||||
17870283321271910397, 17870283321406070975, 17870283321406128127, 17978369712463020031,
|
||||
18158513764145585631, 18158781978395017215, 18194542490281852927, 18410715276682199039,
|
||||
18428729675200069631, 18428729675200069632, 18433233274827440127, 18437455399478099968,
|
||||
18437736870159843328, 18437736874452713471, 18437736874454812668, 18442240474082181119,
|
||||
18444492273895866367, 18445618173802708993, 18446181192473632767, 18446216308128218879,
|
||||
18446462598732840928, 18446462598732840959, 18446462598732840960, 18446462599806582783,
|
||||
18446462615912710143, 18446462667452317695, 18446463149025525759, 18446463629525450752,
|
||||
18446463698244468735, 18446464796682337663, 18446466966713532671, 18446466996645134335,
|
||||
18446466996779287551, 18446471394825862144, 18446471394825863167, 18446480190918885375,
|
||||
18446498607738650623, 18446532967477018623, 18446602782178705022, 18446603336221163519,
|
||||
18446603336221196287, 18446638520593285119, 18446673709243564031, 18446708893632430079,
|
||||
18446740770879700992, 18446741595513422027, 18446741874686295551, 18446743249075830783,
|
||||
18446743798965862398, 18446744056529672000, 18446744060816261120, 18446744068886102015,
|
||||
18446744069414584320, 18446744069414601696, 18446744069414617087, 18446744069414649855,
|
||||
18446744069456527359, 18446744069548736512, 18446744069548802046, 18446744069683019775,
|
||||
18446744069951455231, 18446744070421282815, 18446744070446333439, 18446744070475743231,
|
||||
18446744070488326143, 18446744071553646463, 18446744071562067967, 18446744073696837631,
|
||||
18446744073701162813, 18446744073707454463, 18446744073709027328, 18446744073709355007,
|
||||
18446744073709419615, 18446744073709486080, 18446744073709520895, 18446744073709543424,
|
||||
18446744073709550079, 18446744073709550595, 18446744073709551579, 18446744073709551599,
|
||||
18446744073709551614, 18446744073709551615,
|
||||
];
|
||||
|
||||
pub fn lookup(c: char) -> bool {
|
||||
super::range_search(
|
||||
super::skip_search(
|
||||
c as u32,
|
||||
&BITSET_CHUNKS_MAP,
|
||||
BITSET_LAST_CHUNK_MAP,
|
||||
&BITSET_INDEX_CHUNKS,
|
||||
&BITSET,
|
||||
&SHORT_OFFSET_RUNS,
|
||||
&OFFSETS,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[rustfmt::skip]
|
||||
pub mod case_ignorable {
|
||||
static BITSET_LAST_CHUNK_MAP: (u16, u8) = (896, 33);
|
||||
static BITSET_CHUNKS_MAP: [u8; 125] = [
|
||||
25, 14, 21, 30, 28, 4, 17, 23, 22, 0, 0, 16, 27, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 13, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 3, 6, 9, 0, 7, 11, 32, 31, 26, 29, 0, 0, 0, 0, 0, 24, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 5, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0,
|
||||
10, 0, 8, 0, 19, 0, 12, 0, 1,
|
||||
static SHORT_OFFSET_RUNS: [u32; 32] = [
|
||||
688, 44045149, 555751186, 559947709, 794831996, 866136069, 891330581, 916497656, 920692236,
|
||||
924908318, 1122041344, 1130430973, 1193347585, 1205931300, 1231097515, 1235294255,
|
||||
1445009723, 1453399088, 1512120051, 1575040048, 1579248368, 1583443791, 1596046493,
|
||||
1612829031, 1621219840, 1642192896, 1667359024, 1688330988, 1692526800, 1696723963,
|
||||
1705902081, 1711210992,
|
||||
];
|
||||
static BITSET_INDEX_CHUNKS: [[u8; 16]; 34] = [
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 166],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 22, 47, 57],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 40, 0, 173, 3],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 94, 90, 136, 38],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96, 104, 7, 0, 0],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 78, 27, 0, 148, 138, 81, 44, 119],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 154, 0, 0, 58, 0, 0, 0, 0],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 167, 99, 77, 0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 0, 0, 0, 0, 130, 0, 0, 0, 48, 0, 116, 0, 0],
|
||||
[0, 0, 0, 0, 0, 172, 70, 0, 0, 8, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 0, 60, 0, 0, 0, 0, 0, 67, 0, 0, 24, 0, 0],
|
||||
[0, 0, 0, 29, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 135, 0, 0, 0, 0, 16, 162, 46, 86, 51, 80, 13, 111],
|
||||
[0, 0, 12, 0, 0, 43, 163, 92, 35, 82, 0, 71, 175, 14, 83, 131],
|
||||
[0, 0, 56, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 133, 0, 87, 0, 150, 0, 178, 75, 0, 0, 0, 0, 0, 0, 0],
|
||||
[20, 5, 61, 0, 120, 0, 0, 0, 32, 156, 176, 1, 126, 91, 69, 88],
|
||||
[26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[62, 0, 0, 0, 137, 0, 0, 0, 0, 0, 0, 76, 0, 0, 0, 0],
|
||||
[66, 0, 0, 152, 72, 25, 134, 59, 102, 124, 165, 101, 0, 64, 0, 68],
|
||||
[73, 33, 0, 181, 125, 85, 122, 139, 123, 100, 123, 169, 155, 54, 4, 18],
|
||||
[74, 151, 36, 84, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[106, 135, 0, 112, 177, 107, 180, 168, 0, 0, 0, 0, 0, 0, 157, 142],
|
||||
[109, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[113, 50, 108, 0, 0, 0, 0, 0, 0, 0, 174, 182, 182, 114, 10, 0],
|
||||
[115, 0, 0, 0, 141, 5, 0, 49, 145, 34, 31, 0, 0, 0, 0, 0],
|
||||
[118, 0, 42, 144, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[143, 95, 37, 121, 0, 0, 0, 0, 0, 0, 0, 0, 0, 45, 0, 0],
|
||||
[161, 0, 103, 0, 160, 11, 30, 0, 0, 0, 0, 93, 0, 0, 0, 0],
|
||||
[164, 55, 155, 53, 127, 52, 2, 28, 117, 21, 128, 19, 110, 147, 129, 9],
|
||||
[170, 41, 153, 6, 0, 0, 159, 39, 158, 1, 105, 0, 65, 0, 0, 0],
|
||||
[171, 149, 132, 17, 98, 89, 146, 23, 140, 0, 0, 63, 127, 97, 0, 0],
|
||||
[179, 182, 0, 0, 182, 182, 182, 79, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
static OFFSETS: [u8; 821] = [
|
||||
39, 1, 6, 1, 11, 1, 35, 1, 1, 1, 71, 1, 4, 1, 1, 1, 4, 1, 2, 2, 0, 192, 4, 2, 4, 1, 9, 2,
|
||||
1, 1, 251, 7, 207, 1, 5, 1, 49, 45, 1, 1, 1, 2, 1, 2, 1, 1, 44, 1, 11, 6, 10, 11, 1, 1, 35,
|
||||
1, 10, 21, 16, 1, 101, 8, 1, 10, 1, 4, 33, 1, 1, 1, 30, 27, 91, 11, 58, 11, 4, 1, 2, 1, 24,
|
||||
24, 43, 3, 119, 48, 55, 1, 1, 1, 4, 8, 4, 1, 3, 7, 10, 2, 13, 1, 15, 1, 58, 1, 4, 4, 8, 1,
|
||||
20, 2, 26, 1, 2, 2, 57, 1, 4, 2, 4, 2, 2, 3, 3, 1, 30, 2, 3, 1, 11, 2, 57, 1, 4, 5, 1, 2, 4,
|
||||
1, 20, 2, 22, 6, 1, 1, 58, 1, 2, 1, 1, 4, 8, 1, 7, 2, 11, 2, 30, 1, 61, 1, 12, 1, 50, 1, 3,
|
||||
1, 57, 3, 5, 3, 1, 4, 7, 2, 11, 2, 29, 1, 58, 1, 2, 1, 6, 1, 5, 2, 20, 2, 28, 2, 57, 2, 4,
|
||||
4, 8, 1, 20, 2, 29, 1, 72, 1, 7, 3, 1, 1, 90, 1, 2, 7, 11, 9, 98, 1, 2, 9, 9, 1, 1, 6, 74,
|
||||
2, 27, 1, 1, 1, 1, 1, 55, 14, 1, 5, 1, 2, 5, 11, 1, 36, 9, 1, 102, 4, 1, 6, 1, 2, 2, 2, 25,
|
||||
2, 4, 3, 16, 4, 13, 1, 2, 2, 6, 1, 15, 1, 94, 1, 0, 3, 0, 3, 29, 3, 29, 2, 30, 2, 64, 2, 1,
|
||||
7, 8, 1, 2, 11, 3, 1, 5, 1, 45, 4, 52, 1, 65, 2, 34, 1, 118, 3, 4, 2, 9, 1, 6, 3, 219, 2, 2,
|
||||
1, 58, 1, 1, 7, 1, 1, 1, 1, 2, 8, 6, 10, 2, 1, 39, 1, 8, 17, 63, 4, 48, 1, 1, 5, 1, 1, 5, 1,
|
||||
40, 9, 12, 2, 32, 4, 2, 2, 1, 3, 56, 1, 1, 2, 3, 1, 1, 3, 58, 8, 2, 2, 64, 6, 82, 3, 1, 13,
|
||||
1, 7, 4, 1, 6, 1, 3, 2, 50, 63, 13, 1, 34, 95, 1, 5, 0, 1, 1, 3, 11, 3, 13, 3, 13, 3, 13, 2,
|
||||
12, 5, 8, 2, 10, 1, 2, 1, 2, 5, 49, 5, 1, 10, 1, 1, 13, 1, 16, 13, 51, 33, 0, 2, 113, 3,
|
||||
125, 1, 15, 1, 96, 32, 47, 1, 0, 1, 36, 4, 3, 5, 5, 1, 93, 6, 93, 3, 0, 1, 0, 6, 0, 1, 98,
|
||||
4, 1, 10, 1, 1, 28, 4, 80, 2, 14, 34, 78, 1, 23, 3, 109, 2, 8, 1, 3, 1, 4, 1, 25, 2, 5, 1,
|
||||
151, 2, 26, 18, 13, 1, 38, 8, 25, 11, 46, 3, 48, 1, 2, 4, 2, 2, 17, 1, 21, 2, 66, 6, 2, 2,
|
||||
2, 2, 12, 1, 8, 1, 35, 1, 11, 1, 51, 1, 1, 3, 2, 2, 5, 2, 1, 1, 27, 1, 14, 2, 5, 2, 1, 1,
|
||||
100, 5, 9, 3, 121, 1, 2, 1, 4, 1, 0, 1, 147, 16, 0, 16, 3, 1, 12, 16, 34, 1, 2, 1, 169, 1,
|
||||
7, 1, 6, 1, 11, 1, 35, 1, 1, 1, 47, 1, 45, 2, 67, 1, 21, 3, 0, 1, 226, 1, 149, 5, 0, 3, 1,
|
||||
2, 5, 4, 40, 3, 4, 1, 165, 2, 0, 4, 0, 2, 153, 11, 176, 1, 54, 15, 56, 3, 49, 4, 2, 2, 2, 1,
|
||||
15, 1, 50, 3, 36, 5, 1, 8, 62, 1, 12, 2, 52, 9, 10, 4, 2, 1, 95, 3, 2, 1, 1, 2, 6, 1, 160,
|
||||
1, 3, 8, 21, 2, 57, 2, 3, 1, 37, 7, 3, 5, 195, 8, 2, 3, 1, 1, 23, 1, 84, 6, 1, 1, 4, 2, 1,
|
||||
2, 238, 4, 6, 2, 1, 2, 27, 2, 85, 8, 2, 1, 1, 2, 106, 1, 1, 1, 2, 6, 1, 1, 101, 3, 2, 4, 1,
|
||||
5, 0, 9, 1, 2, 0, 2, 1, 1, 4, 1, 144, 4, 2, 2, 4, 1, 32, 10, 40, 6, 2, 4, 8, 1, 9, 6, 2, 3,
|
||||
46, 13, 1, 2, 0, 7, 1, 6, 1, 1, 82, 22, 2, 7, 1, 2, 1, 2, 122, 6, 3, 1, 1, 2, 1, 7, 1, 1,
|
||||
72, 2, 3, 1, 1, 1, 0, 2, 0, 9, 0, 5, 59, 7, 9, 4, 0, 1, 63, 17, 64, 2, 1, 2, 0, 2, 1, 4, 0,
|
||||
3, 9, 16, 2, 7, 30, 4, 148, 3, 0, 55, 4, 50, 8, 1, 14, 1, 22, 5, 1, 15, 0, 7, 1, 17, 2, 7,
|
||||
1, 2, 1, 5, 0, 14, 0, 4, 0, 7, 109, 8, 0, 5, 0, 1, 30, 96, 128, 240, 0,
|
||||
];
|
||||
static BITSET: [u64; 183] = [
|
||||
0, 1, 2, 3, 4, 8, 13, 15, 28, 64, 176, 191, 1016, 1792, 2047, 4080, 4096, 8192, 8193,
|
||||
16192, 30720, 32704, 32768, 40448, 131008, 262016, 2097152, 2359296, 6030336, 8323072,
|
||||
10682368, 58719232, 159383552, 234881024, 243138688, 402587711, 536805376, 536879204,
|
||||
546307648, 805306369, 1073741824, 1073741916, 2113929216, 2181038080, 3221225472,
|
||||
3758096384, 4026531840, 4294934528, 4294967296, 4512022528, 5368709120, 17179869183,
|
||||
51539615774, 51539619904, 51545907230, 51545914817, 66035122176, 115964116992, 412316860416,
|
||||
412316893184, 1030792151040, 2199023255648, 8641373536127, 8763880767488, 15397323538432,
|
||||
17303886364672, 18004502906948, 26388279066624, 36421322670080, 65128884076547,
|
||||
65970697670631, 68168642985984, 70093866270720, 70368739983360, 136957967529984,
|
||||
140737488355328, 263882790666240, 281470547525648, 281470682333183, 281474976710655,
|
||||
281474976710656, 281474976710657, 281479271675905, 562675075514368, 562949953355776,
|
||||
563001509683710, 844424930131968, 985162418487296, 1023920203366400, 2251799813685248,
|
||||
3377699721314304, 4494803534348292, 4503599627370678, 6755399441055744, 7881299349733376,
|
||||
8444256867844096, 8725724278030336, 8760633772212225, 8989057312882695, 9042383626829823,
|
||||
9851624185018758, 24822575045541890, 28848986089586688, 30958948903026688,
|
||||
35747322042253312, 53805701016846336, 58529202969772032, 72066390130950143,
|
||||
112767012056334336, 143833713099145216, 189151184399892480, 216172782113783808,
|
||||
220713756545974272, 288301294651703296, 302022650010533887, 504262420777140224,
|
||||
558446353793941504, 572520102629474304, 593978171557150752, 1008806350890729472,
|
||||
1009933895770046464, 1152921504606846976, 1152921504606846978, 1152921504606846982,
|
||||
1153202979583561736, 1441151880758558727, 1715871458028158991, 1729382256910270467,
|
||||
2301902359539744768, 2305843009196908767, 2305843009213693952, 2612078987781865472,
|
||||
2771965570646540291, 3458764513820540928, 3731232291276455943, 4539628424389459968,
|
||||
4589168020290535424, 4611404543450677248, 4611686018494513280, 4611686069967003678,
|
||||
4671217976001691648, 6341068275337658368, 6917775322003857411, 7421334051581067264,
|
||||
8070450532247928832, 8788774672813524990, 9205357638345293827, 9222809086901354496,
|
||||
9223372036854775808, 9223372036854775935, 9223512774343131136, 9224216320050987008,
|
||||
9224497932466651184, 9653465801268658176, 9727775195120332910, 10376293541461622786,
|
||||
11526998316797657088, 11529215046068469760, 12103423998558208000, 12699025049277956096,
|
||||
13005832773892571136, 13798747783286489088, 13832665517980123136, 13835058055282032640,
|
||||
13835058055282163729, 13951307220663664640, 17870283321406128128, 17906312118425092095,
|
||||
18158513697557839871, 18158513749097456062, 18374686479671623680, 18374686479671623682,
|
||||
18444496122186563584, 18445618173802708992, 18446462598732840960, 18446462598733004800,
|
||||
18446463148488654848, 18446726481523507200, 18446744069414584320, 18446744069414584322,
|
||||
18446744073575333888, 18446744073709027328, 18446744073709551615,
|
||||
];
|
||||
|
||||
pub fn lookup(c: char) -> bool {
|
||||
super::range_search(
|
||||
super::skip_search(
|
||||
c as u32,
|
||||
&BITSET_CHUNKS_MAP,
|
||||
BITSET_LAST_CHUNK_MAP,
|
||||
&BITSET_INDEX_CHUNKS,
|
||||
&BITSET,
|
||||
&SHORT_OFFSET_RUNS,
|
||||
&OFFSETS,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[rustfmt::skip]
|
||||
pub mod cased {
|
||||
static BITSET_LAST_CHUNK_MAP: (u16, u8) = (124, 6);
|
||||
static BITSET_CHUNKS_MAP: [u8; 123] = [
|
||||
13, 18, 0, 0, 12, 0, 0, 9, 14, 10, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 1, 2, 0, 16, 0, 8, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 0,
|
||||
0, 0, 0, 7,
|
||||
static SHORT_OFFSET_RUNS: [u32; 19] = [
|
||||
4256, 115348384, 136322176, 144711446, 163587254, 320875520, 325101120, 358656816,
|
||||
392231680, 404815649, 413205504, 421596288, 434182304, 442592832, 446813184, 451008166,
|
||||
528607488, 576844080, 582152586,
|
||||
];
|
||||
static BITSET_INDEX_CHUNKS: [[u8; 16]; 19] = [
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 8, 0, 0],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 43, 62, 0],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 10, 0, 50, 62, 58, 20],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 62, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 0, 42, 44, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 0, 62, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 31, 0, 62, 62, 62, 0, 62, 62, 62, 62, 54, 26, 27, 24],
|
||||
[0, 0, 39, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 51, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 51, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 51, 25],
|
||||
[0, 22, 19, 37, 62, 62, 36, 61, 62, 62, 18, 12, 0, 30, 49, 38],
|
||||
[0, 29, 9, 0, 34, 52, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[46, 55, 62, 17, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[62, 6, 42, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[62, 56, 33, 60, 28, 57, 62, 62, 62, 62, 48, 35, 40, 45, 47, 5],
|
||||
[62, 62, 59, 62, 41, 53, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
static OFFSETS: [u8; 283] = [
|
||||
65, 26, 6, 26, 47, 1, 10, 1, 4, 1, 5, 23, 1, 31, 1, 195, 1, 4, 4, 208, 1, 36, 7, 2, 30, 5,
|
||||
96, 1, 42, 4, 2, 2, 2, 4, 1, 1, 6, 1, 1, 3, 1, 1, 1, 20, 1, 83, 1, 139, 8, 166, 1, 38, 9,
|
||||
41, 0, 38, 1, 1, 5, 1, 2, 43, 2, 3, 0, 86, 2, 6, 0, 9, 7, 43, 2, 3, 64, 192, 64, 0, 2, 6, 2,
|
||||
38, 2, 6, 2, 8, 1, 1, 1, 1, 1, 1, 1, 31, 2, 53, 1, 7, 1, 1, 3, 3, 1, 7, 3, 4, 2, 6, 4, 13,
|
||||
5, 3, 1, 7, 116, 1, 13, 1, 16, 13, 101, 1, 4, 1, 2, 10, 1, 1, 3, 5, 6, 1, 1, 1, 1, 1, 1, 4,
|
||||
1, 6, 4, 1, 2, 4, 5, 5, 4, 1, 17, 32, 3, 2, 0, 52, 0, 47, 1, 47, 1, 133, 6, 4, 3, 2, 12, 38,
|
||||
1, 1, 5, 1, 0, 46, 18, 30, 132, 102, 3, 4, 1, 48, 2, 9, 42, 2, 1, 3, 0, 43, 1, 13, 7, 80, 0,
|
||||
7, 12, 5, 0, 26, 6, 26, 0, 80, 96, 36, 4, 36, 0, 51, 13, 51, 0, 64, 0, 64, 0, 85, 1, 71, 1,
|
||||
2, 2, 1, 2, 2, 2, 4, 1, 12, 1, 1, 1, 7, 1, 65, 1, 4, 2, 8, 1, 7, 1, 28, 1, 4, 1, 5, 1, 1, 3,
|
||||
7, 1, 0, 2, 25, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 8, 0, 68,
|
||||
0, 26, 6, 26, 6, 26, 0,
|
||||
];
|
||||
static BITSET: [u64; 63] = [
|
||||
0, 15, 24, 511, 1023, 4087, 65535, 16253055, 134217726, 536805376, 1073741823, 4294967295,
|
||||
133143986179, 4398046511103, 36009005809663, 70368744177663, 2251799813685247,
|
||||
3509778554814463, 144115188074807295, 297241973452963840, 531424756029720572,
|
||||
576460743713488896, 576460743847706622, 1152921504591118335, 2295745090394464220,
|
||||
4557642822898941951, 4611686017001275199, 6908521828386340863, 8935141660164089791,
|
||||
9223934986808197120, 13605092999309557792, 16717361816799216127, 16717361816799223999,
|
||||
17005555242810474495, 17446871633794956420, 17870283321271910397, 17870283321406128127,
|
||||
18410715276682199039, 18428729675200069631, 18428729675200069632, 18437736874452713471,
|
||||
18446462598732840959, 18446462598732840960, 18446464797621878783, 18446466996779287551,
|
||||
18446603336221163519, 18446603336221196287, 18446741874686295551, 18446743249075830783,
|
||||
18446744056529672000, 18446744056529682432, 18446744069414584320, 18446744069414601696,
|
||||
18446744069422972927, 18446744070475743231, 18446744071562067967, 18446744073707454463,
|
||||
18446744073709419615, 18446744073709517055, 18446744073709550595, 18446744073709551599,
|
||||
18446744073709551600, 18446744073709551615,
|
||||
];
|
||||
|
||||
pub fn lookup(c: char) -> bool {
|
||||
super::range_search(
|
||||
super::skip_search(
|
||||
c as u32,
|
||||
&BITSET_CHUNKS_MAP,
|
||||
BITSET_LAST_CHUNK_MAP,
|
||||
&BITSET_INDEX_CHUNKS,
|
||||
&BITSET,
|
||||
&SHORT_OFFSET_RUNS,
|
||||
&OFFSETS,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[rustfmt::skip]
|
||||
pub mod cc {
|
||||
static BITSET_LAST_CHUNK_MAP: (u16, u8) = (0, 0);
|
||||
static BITSET_CHUNKS_MAP: [u8; 0] = [
|
||||
static SHORT_OFFSET_RUNS: [u32; 1] = [
|
||||
1114272,
|
||||
];
|
||||
static BITSET_INDEX_CHUNKS: [[u8; 16]; 1] = [
|
||||
[1, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
static OFFSETS: [u8; 5] = [
|
||||
0, 32, 95, 33, 0,
|
||||
];
|
||||
static BITSET: [u64; 3] = [
|
||||
0, 4294967295, 9223372036854775808,
|
||||
];
|
||||
|
||||
pub fn lookup(c: char) -> bool {
|
||||
super::range_search(
|
||||
super::skip_search(
|
||||
c as u32,
|
||||
&BITSET_CHUNKS_MAP,
|
||||
BITSET_LAST_CHUNK_MAP,
|
||||
&BITSET_INDEX_CHUNKS,
|
||||
&BITSET,
|
||||
&SHORT_OFFSET_RUNS,
|
||||
&OFFSETS,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[rustfmt::skip]
|
||||
pub mod grapheme_extend {
|
||||
static BITSET_LAST_CHUNK_MAP: (u16, u8) = (896, 30);
|
||||
static BITSET_CHUNKS_MAP: [u8; 123] = [
|
||||
4, 15, 21, 27, 25, 3, 18, 23, 17, 0, 0, 14, 22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 2, 7, 10, 0, 8, 12, 29, 28, 24, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0,
|
||||
11, 0, 9, 0, 19, 0, 13,
|
||||
static SHORT_OFFSET_RUNS: [u32; 31] = [
|
||||
768, 2098307, 6292881, 10490717, 513808146, 518004748, 723528943, 731918378, 744531567,
|
||||
752920578, 769719070, 899743232, 903937950, 912327165, 916523521, 929107236, 954273451,
|
||||
958470191, 1180769328, 1252073203, 1315007216, 1319202639, 1327611037, 1340199269,
|
||||
1344395776, 1373757440, 1398923568, 1419895532, 1424091344, 1429078048, 1438581232,
|
||||
];
|
||||
static BITSET_INDEX_CHUNKS: [[u8; 16]; 31] = [
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 20, 46],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 33, 0, 0, 0],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 77, 74, 106, 31],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 143, 66, 0, 0],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 79, 87, 0, 0, 0],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 107, 37, 70, 0, 0, 0, 0],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 65, 0, 0, 0, 0, 0, 37, 0],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 0, 48, 0, 0, 0, 0],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 134, 82, 64, 0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 0, 0, 0, 0, 103, 0, 0, 0, 39, 0, 94, 0, 0],
|
||||
[0, 0, 0, 0, 0, 133, 58, 0, 0, 5, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 0, 49, 0, 0, 0, 0, 0, 55, 0, 0, 18, 0, 0],
|
||||
[0, 0, 0, 21, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 71, 0, 118, 0, 142, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 9, 0, 0, 0, 129, 7, 26, 67, 0, 59, 140, 11, 68, 104],
|
||||
[0, 0, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[12, 0, 0, 69, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[13, 0, 50, 0, 96, 0, 0, 0, 27, 123, 139, 1, 100, 75, 57, 72],
|
||||
[51, 0, 0, 0, 87, 0, 0, 0, 0, 0, 0, 62, 0, 0, 0, 0],
|
||||
[54, 0, 0, 120, 61, 19, 105, 47, 85, 98, 131, 84, 0, 0, 0, 56],
|
||||
[60, 28, 0, 141, 99, 45, 111, 109, 97, 83, 97, 136, 132, 44, 108, 22],
|
||||
[63, 0, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[89, 0, 0, 91, 0, 0, 0, 135, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[93, 0, 0, 0, 113, 3, 0, 40, 115, 29, 24, 0, 0, 0, 0, 0],
|
||||
[114, 78, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 38, 0, 0],
|
||||
[128, 0, 86, 0, 127, 8, 23, 0, 0, 0, 0, 76, 0, 0, 0, 0],
|
||||
[130, 42, 122, 41, 112, 43, 2, 36, 95, 15, 101, 14, 90, 117, 102, 6],
|
||||
[137, 34, 124, 4, 0, 0, 126, 32, 125, 1, 88, 0, 53, 0, 0, 0],
|
||||
[138, 119, 92, 0, 81, 73, 116, 17, 110, 0, 0, 52, 112, 80, 0, 0],
|
||||
[142, 143, 0, 0, 143, 143, 143, 66, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
static OFFSETS: [u8; 689] = [
|
||||
0, 112, 0, 7, 0, 45, 1, 1, 1, 2, 1, 2, 1, 1, 72, 11, 48, 21, 16, 1, 101, 7, 2, 6, 2, 2, 1,
|
||||
4, 35, 1, 30, 27, 91, 11, 58, 9, 9, 1, 24, 4, 1, 9, 1, 3, 1, 5, 43, 3, 119, 15, 1, 32, 55,
|
||||
1, 1, 1, 4, 8, 4, 1, 3, 7, 10, 2, 29, 1, 58, 1, 1, 1, 2, 4, 8, 1, 9, 1, 10, 2, 26, 1, 2, 2,
|
||||
57, 1, 4, 2, 4, 2, 2, 3, 3, 1, 30, 2, 3, 1, 11, 2, 57, 1, 4, 5, 1, 2, 4, 1, 20, 2, 22, 6, 1,
|
||||
1, 58, 1, 1, 2, 1, 4, 8, 1, 7, 3, 10, 2, 30, 1, 59, 1, 1, 1, 12, 1, 9, 1, 40, 1, 3, 1, 57,
|
||||
3, 5, 3, 1, 4, 7, 2, 11, 2, 29, 1, 58, 1, 2, 1, 2, 1, 3, 1, 5, 2, 7, 2, 11, 2, 28, 2, 57, 2,
|
||||
1, 1, 2, 4, 8, 1, 9, 1, 10, 2, 29, 1, 72, 1, 4, 1, 2, 3, 1, 1, 8, 1, 81, 1, 2, 7, 12, 8, 98,
|
||||
1, 2, 9, 11, 6, 74, 2, 27, 1, 1, 1, 1, 1, 55, 14, 1, 5, 1, 2, 5, 11, 1, 36, 9, 1, 102, 4, 1,
|
||||
6, 1, 2, 2, 2, 25, 2, 4, 3, 16, 4, 13, 1, 2, 2, 6, 1, 15, 1, 0, 3, 0, 3, 29, 3, 29, 2, 30,
|
||||
2, 64, 2, 1, 7, 8, 1, 2, 11, 9, 1, 45, 3, 119, 2, 34, 1, 118, 3, 4, 2, 9, 1, 6, 3, 219, 2,
|
||||
2, 1, 58, 1, 1, 7, 1, 1, 1, 1, 2, 8, 6, 10, 2, 1, 48, 17, 63, 4, 48, 7, 1, 1, 5, 1, 40, 9,
|
||||
12, 2, 32, 4, 2, 2, 1, 3, 56, 1, 1, 2, 3, 1, 1, 3, 58, 8, 2, 2, 152, 3, 1, 13, 1, 7, 4, 1,
|
||||
6, 1, 3, 2, 198, 58, 1, 5, 0, 1, 195, 33, 0, 3, 141, 1, 96, 32, 0, 6, 105, 2, 0, 4, 1, 10,
|
||||
32, 2, 80, 2, 0, 1, 3, 1, 4, 1, 25, 2, 5, 1, 151, 2, 26, 18, 13, 1, 38, 8, 25, 11, 46, 3,
|
||||
48, 1, 2, 4, 2, 2, 39, 1, 67, 6, 2, 2, 2, 2, 12, 1, 8, 1, 47, 1, 51, 1, 1, 3, 2, 2, 5, 2, 1,
|
||||
1, 42, 2, 8, 1, 238, 1, 2, 1, 4, 1, 0, 1, 0, 16, 16, 16, 0, 2, 0, 1, 226, 1, 149, 5, 0, 3,
|
||||
1, 2, 5, 4, 40, 3, 4, 1, 165, 2, 0, 4, 0, 2, 153, 11, 176, 1, 54, 15, 56, 3, 49, 4, 2, 2,
|
||||
69, 3, 36, 5, 1, 8, 62, 1, 12, 2, 52, 9, 10, 4, 2, 1, 95, 3, 2, 1, 1, 2, 6, 1, 160, 1, 3, 8,
|
||||
21, 2, 57, 2, 1, 1, 1, 1, 22, 1, 14, 7, 3, 5, 195, 8, 2, 3, 1, 1, 23, 1, 81, 1, 2, 6, 1, 1,
|
||||
2, 1, 1, 2, 1, 2, 235, 1, 2, 4, 6, 2, 1, 2, 27, 2, 85, 8, 2, 1, 1, 2, 106, 1, 1, 1, 2, 6, 1,
|
||||
1, 101, 3, 2, 4, 1, 5, 0, 9, 1, 2, 245, 1, 10, 2, 1, 1, 4, 1, 144, 4, 2, 2, 4, 1, 32, 10,
|
||||
40, 6, 2, 4, 8, 1, 9, 6, 2, 3, 46, 13, 1, 2, 0, 7, 1, 6, 1, 1, 82, 22, 2, 7, 1, 2, 1, 2,
|
||||
122, 6, 3, 1, 1, 2, 1, 7, 1, 1, 72, 2, 3, 1, 1, 1, 0, 2, 0, 5, 59, 7, 0, 1, 63, 4, 81, 1, 0,
|
||||
2, 0, 1, 1, 3, 4, 5, 8, 8, 2, 7, 30, 4, 148, 3, 0, 55, 4, 50, 8, 1, 14, 1, 22, 5, 1, 15, 0,
|
||||
7, 1, 17, 2, 7, 1, 2, 1, 5, 0, 7, 0, 4, 0, 7, 109, 7, 0, 96, 128, 240, 0,
|
||||
];
|
||||
static BITSET: [u64; 144] = [
|
||||
0, 1, 2, 8, 13, 28, 64, 182, 191, 1016, 2032, 2047, 4096, 14336, 16128, 32640, 32768,
|
||||
40448, 131008, 262016, 491520, 8323072, 8396801, 10682368, 58719232, 100663296, 134152192,
|
||||
159383552, 234881024, 243138688, 536879204, 537919040, 805306369, 1073741824, 1073741916,
|
||||
1610612736, 2153546752, 3221225472, 3758096384, 4294967296, 4512022528, 51545911364,
|
||||
51545914817, 51548004382, 51554295838, 51556262398, 68719476736, 137438953472, 412316860416,
|
||||
1030792151040, 2199023255648, 8641373536127, 8763880767488, 17303886364672, 18004502906948,
|
||||
26388279066624, 36421322670080, 65128884076547, 65970697670631, 67755789254656,
|
||||
69200441769984, 70093866270720, 263882790666240, 277076930199552, 281470547525648,
|
||||
281470681808895, 281474976710655, 281479271675904, 562675075514368, 562949953355776,
|
||||
844424930131968, 985162418487296, 1023920203366400, 2251799813685248, 3377699721314304,
|
||||
4494803534348292, 6755399441055744, 7881299349733376, 8444256867844096, 8725724278030336,
|
||||
8760633780600833, 8989057312882695, 9042383626829823, 9851624185018758, 18067175067615234,
|
||||
28848986089586688, 30958948903026688, 35747322042253312, 53805701016846336,
|
||||
58529202969772032, 189151184399892480, 220713756545974272, 466122561432846339,
|
||||
504262420777140224, 558446353793941504, 572520102629474304, 1009933895770046464,
|
||||
1152921504606846982, 1152921504606851080, 1441151880758558727, 1724878657282899983,
|
||||
2301902359539744768, 2305843009196908767, 2305843009213693952, 2310337812748042240,
|
||||
3731232291276455943, 4589168020290535424, 4609293481125347328, 4611686018427387908,
|
||||
4611686069975392286, 4671217976001691648, 5764607523034234882, 6341068275337658371,
|
||||
6341349750314369024, 7421334051581067264, 8788774672813524990, 9205357638345293827,
|
||||
9222809086901354496, 9223372036854775808, 9223372036854775935, 9224497932466651184,
|
||||
9727775195120332910, 10376293541461622786, 11526998316797657088, 11959590285459062784,
|
||||
12103423998558208000, 12699165786766311424, 13005832773892571136, 13798747783286489088,
|
||||
13835058055282032640, 13835058055282163729, 13951307220663664640, 14987979559889010690,
|
||||
17872468738205286400, 17906312118425092095, 18158513697557839871, 18158513749097456062,
|
||||
18374686479671623680, 18374686479671623682, 18446462598732840960, 18446462598732972032,
|
||||
18446744056529158144, 18446744069414584320, 18446744073709551615,
|
||||
];
|
||||
|
||||
pub fn lookup(c: char) -> bool {
|
||||
super::range_search(
|
||||
super::skip_search(
|
||||
c as u32,
|
||||
&BITSET_CHUNKS_MAP,
|
||||
BITSET_LAST_CHUNK_MAP,
|
||||
&BITSET_INDEX_CHUNKS,
|
||||
&BITSET,
|
||||
&SHORT_OFFSET_RUNS,
|
||||
&OFFSETS,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[rustfmt::skip]
|
||||
pub mod lowercase {
|
||||
static BITSET_LAST_CHUNK_MAP: (u16, u8) = (122, 6);
|
||||
static BITSET_CHUNKS_MAP: [u8; 118] = [
|
||||
12, 16, 0, 0, 10, 0, 0, 11, 13, 8, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 2, 1, 0, 17, 0, 9, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14,
|
||||
static BITSET_CHUNKS_MAP: [u8; 123] = [
|
||||
13, 16, 0, 0, 8, 0, 0, 11, 12, 9, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 3, 1, 0, 14, 0, 7, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 0, 0,
|
||||
0, 0, 6,
|
||||
];
|
||||
static BITSET_INDEX_CHUNKS: [[u8; 16]; 18] = [
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 59, 62, 71, 0],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 9, 0, 50, 42, 44, 28],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 69, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 0, 68, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 53, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 35],
|
||||
[0, 0, 3, 0, 71, 71, 71, 0, 46, 46, 48, 46, 24, 37, 38, 23],
|
||||
[0, 29, 27, 57, 39, 51, 52, 43, 41, 70, 26, 11, 0, 34, 64, 32],
|
||||
[0, 40, 8, 0, 33, 60, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[22, 13, 54, 66, 25, 15, 56, 63, 30, 19, 12, 55, 58, 61, 65, 4],
|
||||
[59, 36, 46, 21, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[59, 49, 45, 47, 18, 69, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[67, 5, 0, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 56, 0, 0],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 14, 52, 0],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 40, 0, 0, 0],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 62, 39, 0, 47, 43, 45, 30],
|
||||
[0, 0, 0, 0, 10, 53, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 26],
|
||||
[0, 0, 0, 57, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 67, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 54, 0, 52, 52, 52, 0, 21, 21, 64, 21, 33, 24, 23, 34],
|
||||
[0, 5, 71, 0, 28, 15, 69, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 61, 31, 17, 22, 48, 49, 44, 42, 8, 32, 38, 0, 27, 13, 29],
|
||||
[11, 55, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[16, 25, 21, 35, 36, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[16, 46, 2, 20, 63, 9, 54, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[60, 37, 51, 12, 70, 58, 18, 1, 6, 59, 68, 19, 65, 66, 3, 41],
|
||||
];
|
||||
static BITSET: [u64; 72] = [
|
||||
0, 15, 16, 511, 3063, 65535, 16253055, 134217726, 536805376, 984263338, 4294967295,
|
||||
133143986179, 274877905920, 1099509514240, 4398046445568, 17592185782272, 36009005809663,
|
||||
46912496118442, 187649984473770, 281474972516352, 2251799813685247, 2339875276368554,
|
||||
4503599560261632, 61925590106570972, 71777214282006783, 72057592964186127,
|
||||
144115188074807295, 297241973452963840, 522417556774978824, 576460743713488896,
|
||||
1152921487426978047, 1152921504590069760, 1814856824841797631, 3607524039012697088,
|
||||
4362299189061746720, 4539628424389459968, 4601013482110844927, 4611405638684049471,
|
||||
4674456033467236607, 6172933889249159850, 9223934986808197120, 10663022717737544362,
|
||||
10808545280696953514, 12261519110656315968, 12294970652241842346, 12297829382473033730,
|
||||
12297829382473034410, 12297829382473045332, 12297829382829550250, 12297829383904690175,
|
||||
12298110845996498944, 15324248332066007893, 16596095761559859497, 16717361816799215616,
|
||||
16987577794709946364, 17293822586148356092, 18158513701852807104, 18410715274543104000,
|
||||
18428729675466407935, 18446462598732840960, 18446462598732858304, 18446462598737002495,
|
||||
18446464797621878783, 18446673704966422527, 18446726481523572736, 18446739675663105535,
|
||||
18446739675663106031, 18446742974197923840, 18446744056529682432, 18446744069414584320,
|
||||
18446744073709529733, 18446744073709551615,
|
||||
static BITSET_CANONICAL: [u64; 52] = [
|
||||
0b0000000000000000000000000000000000000000000000000000000000000000,
|
||||
0b1111111111111111110000000000000000000000000011111111111111111111,
|
||||
0b1010101010101010101010101010101010101010101010101010100000000010,
|
||||
0b1111111111111111111111000000000000000000000000001111110111111111,
|
||||
0b0000111111111111111111111111111111111111000000000000000000000000,
|
||||
0b1000000000000010000000000000000000000000000000000000000000000000,
|
||||
0b0000111111111111111111111111110000000000000000000000000011111111,
|
||||
0b0000000000000111111111111111111111111111111111111111111111111111,
|
||||
0b1111111111111111111111111111111111111111111111111010101010000101,
|
||||
0b1111111111111111111111111111111100000000000000000000000000000000,
|
||||
0b1111111111111111111111111111110000000000000000000000000000000000,
|
||||
0b1111111111111111111111110000000000000000000000000000000000000000,
|
||||
0b1111111111111111111111000000000000000000000000001111111111101111,
|
||||
0b1111111111111111111100000000000000000000000000010000000000000000,
|
||||
0b1111111111111111000000011111111111110111111111111111111111111111,
|
||||
0b1111111111111111000000000000000000000000000000000100001111000000,
|
||||
0b1111111111111111000000000000000000000000000000000000000000000000,
|
||||
0b1111111101111111111111111111111110000000000000000000000000000000,
|
||||
0b1111110000000000000000000000000011111111111111111111111111000000,
|
||||
0b1111000000000000000000000000001111110111111111111111111111111100,
|
||||
0b1010101010101010101010101010101010101010101010101101010101010100,
|
||||
0b1010101010101010101010101010101010101010101010101010101010101010,
|
||||
0b0101010110101010101010101010101010101010101010101010101010101010,
|
||||
0b0100000011011111000000001111111100000000111111110000000011111111,
|
||||
0b0011111111111111000000001111111100000000111111110000000000111111,
|
||||
0b0011111111011010000101010110001001111111111111111111111111111111,
|
||||
0b0011111100000000000000000000000000000000000000000000000000000000,
|
||||
0b0011110010001010000000000000000000000000000000000000000000100000,
|
||||
0b0011001000010000100000000000000000000000000010001100010000000000,
|
||||
0b0001100100101111101010101010101010101010111000110111111111111111,
|
||||
0b0000011101000000000000000000000000000000000000000000010100001000,
|
||||
0b0000010000100000000001000000000000000000000000000000000000000000,
|
||||
0b0000000111111111111111111111111111111111111011111111111111111111,
|
||||
0b0000000011111111000000001111111100000000001111110000000011111111,
|
||||
0b0000000011011100000000001111111100000000110011110000000011011100,
|
||||
0b0000000000001000010100000001101010101010101010101010101010101010,
|
||||
0b0000000000000000001000001011111111111111111111111111111111111111,
|
||||
0b0000000000000000000000001111111111111111110111111100000000000000,
|
||||
0b0000000000000000000000000001111100000000000000000000000000000011,
|
||||
0b0000000000000000000000000000000000111010101010101010101010101010,
|
||||
0b0000000000000000000000000000000000000000111110000000000001111111,
|
||||
0b0000000000000000000000000000000000000000000000000000101111110111,
|
||||
0b1001001111111010101010101010101010101010101010101010101010101010,
|
||||
0b1001010111111111101010101010101010101010101010101010101010101010,
|
||||
0b1010101000101001101010101010101010110101010101010101001001000000,
|
||||
0b1010101010100000100000101010101010101010101110100101000010101010,
|
||||
0b1010101010101010101010101010101011111111111111111111111111111111,
|
||||
0b1010101010101011101010101010100000000000000000000000000000000000,
|
||||
0b1101010010101010101010101010101010101010101010101010101101010101,
|
||||
0b1110011001010001001011010010101001001110001001000011000100101001,
|
||||
0b1110011111111111111111111111111111111111111111110000000000000000,
|
||||
0b1110101111000000000000000000000000001111111111111111111111111100,
|
||||
];
|
||||
static BITSET_MAPPING: [(u8, u8); 20] = [
|
||||
(0, 64), (1, 188), (1, 183), (1, 176), (1, 109), (1, 124), (1, 126), (1, 66), (1, 70),
|
||||
(1, 77), (2, 146), (2, 144), (2, 83), (3, 12), (3, 6), (4, 156), (4, 78), (5, 187),
|
||||
(6, 132), (7, 93),
|
||||
];
|
||||
|
||||
pub fn lookup(c: char) -> bool {
|
||||
super::range_search(
|
||||
super::bitset_search(
|
||||
c as u32,
|
||||
&BITSET_CHUNKS_MAP,
|
||||
BITSET_LAST_CHUNK_MAP,
|
||||
&BITSET_INDEX_CHUNKS,
|
||||
&BITSET,
|
||||
&BITSET_CANONICAL,
|
||||
&BITSET_MAPPING,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[rustfmt::skip]
|
||||
pub mod n {
|
||||
static BITSET_LAST_CHUNK_MAP: (u16, u8) = (127, 0);
|
||||
static BITSET_CHUNKS_MAP: [u8; 127] = [
|
||||
31, 8, 11, 25, 19, 4, 29, 21, 24, 28, 0, 16, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 3, 13, 18, 26, 17, 23, 20, 15, 22, 0, 33, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
5, 2, 0, 0, 10, 0, 14, 27, 12, 0, 1,
|
||||
static SHORT_OFFSET_RUNS: [u32; 38] = [
|
||||
1632, 18876774, 31461440, 102765417, 111154926, 115349830, 132128880, 165684320, 186656630,
|
||||
195046653, 199241735, 203436434, 216049184, 241215536, 249605104, 274792208, 278987015,
|
||||
283181793, 295766104, 320933114, 383848032, 392238160, 434181712, 442570976, 455154768,
|
||||
463544256, 476128256, 480340576, 484535936, 497144544, 501340110, 509731136, 513925872,
|
||||
518121671, 522316913, 530706688, 551681008, 556989434,
|
||||
];
|
||||
static BITSET_INDEX_CHUNKS: [[u8; 16]; 34] = [
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 47],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 72],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 33, 0, 0, 0, 49],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 36, 0, 43, 0, 0],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 0, 0, 0, 22, 0, 0],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 0, 47, 0, 0, 0, 2],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 25, 0, 0, 31, 0, 0, 0, 0],
|
||||
[0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0],
|
||||
[0, 0, 0, 0, 0, 31, 0, 45, 0, 31, 0, 31, 0, 41, 0, 34],
|
||||
[0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 0, 37, 44, 4, 0, 0, 0, 0, 52, 23, 3, 0, 13],
|
||||
[0, 0, 0, 7, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 35, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 62, 47, 0, 0, 0, 0, 60, 0, 0, 24, 10, 0, 5],
|
||||
[0, 0, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 2, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 53, 0, 0],
|
||||
[0, 15, 0, 15, 0, 0, 0, 0, 0, 15, 0, 2, 51, 0, 0, 0],
|
||||
[0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 26, 0, 0, 0, 15, 25, 0, 0, 0, 0, 0, 0, 0, 0, 11],
|
||||
[0, 32, 0, 47, 65, 0, 0, 39, 0, 0, 0, 47, 0, 0, 0, 0],
|
||||
[0, 46, 2, 0, 0, 71, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 59, 0, 31, 0, 42, 0, 31, 0, 15, 0, 15, 36, 0, 0, 0],
|
||||
[0, 63, 30, 61, 18, 0, 55, 70, 0, 57, 20, 28, 0, 64, 29, 0],
|
||||
[0, 66, 38, 0, 56, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 69, 19, 68, 0, 0, 0, 0, 0, 0, 0, 0, 0, 65, 9, 0],
|
||||
[15, 0, 0, 0, 0, 8, 0, 17, 0, 0, 16, 0, 0, 15, 47, 0],
|
||||
[40, 0, 0, 15, 2, 0, 0, 48, 0, 15, 0, 0, 0, 0, 0, 47],
|
||||
[47, 0, 58, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[50, 0, 0, 0, 0, 0, 12, 0, 25, 21, 67, 0, 0, 0, 0, 0],
|
||||
[73, 27, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
static OFFSETS: [u8; 267] = [
|
||||
48, 10, 120, 2, 5, 1, 2, 3, 0, 10, 134, 10, 198, 10, 0, 10, 118, 10, 4, 6, 108, 10, 118,
|
||||
10, 118, 10, 2, 6, 110, 13, 115, 10, 8, 7, 103, 10, 104, 7, 7, 19, 109, 10, 96, 10, 118, 10,
|
||||
70, 20, 0, 10, 70, 10, 0, 20, 0, 3, 239, 10, 6, 10, 22, 10, 0, 10, 128, 11, 165, 10, 6, 10,
|
||||
182, 10, 86, 10, 134, 10, 6, 10, 0, 1, 3, 6, 6, 10, 198, 51, 2, 5, 0, 60, 78, 22, 0, 30, 0,
|
||||
1, 0, 1, 25, 9, 14, 3, 0, 4, 138, 10, 30, 8, 1, 15, 32, 10, 39, 15, 0, 10, 188, 10, 0, 6,
|
||||
154, 10, 38, 10, 198, 10, 22, 10, 86, 10, 0, 10, 0, 10, 0, 45, 12, 57, 17, 2, 0, 27, 36, 4,
|
||||
29, 1, 8, 1, 134, 5, 202, 10, 0, 8, 25, 7, 39, 9, 75, 5, 22, 6, 160, 2, 2, 16, 2, 46, 64, 9,
|
||||
52, 2, 30, 3, 75, 5, 104, 8, 24, 8, 41, 7, 0, 6, 48, 10, 0, 31, 158, 10, 42, 4, 112, 7, 134,
|
||||
30, 128, 10, 60, 10, 144, 10, 7, 20, 251, 10, 0, 10, 118, 10, 0, 10, 102, 10, 102, 12, 0,
|
||||
19, 93, 10, 0, 29, 227, 10, 70, 10, 0, 21, 0, 111, 0, 10, 230, 10, 1, 7, 0, 23, 0, 20, 108,
|
||||
25, 0, 50, 0, 10, 0, 10, 0, 9, 128, 10, 0, 59, 1, 3, 1, 4, 76, 45, 1, 15, 0, 13, 0, 10, 0,
|
||||
];
|
||||
static BITSET: [u64; 74] = [
|
||||
0, 999, 1023, 1026, 3072, 4064, 8191, 65408, 65472, 1048575, 1966080, 2097151, 3932160,
|
||||
4063232, 8388607, 67043328, 67044351, 134152192, 264241152, 268435455, 3758096384,
|
||||
4294901504, 17112694784, 64424509440, 549218942976, 4393751543808, 35184372023296,
|
||||
140737488355327, 272678883688448, 279275953455104, 280925220896768, 281200098803712,
|
||||
281474976448512, 492581209243648, 2251524935778304, 2251795518717952, 4503595332403200,
|
||||
4503599627370368, 8708132091985919, 9007190731849728, 17732923532771328, 71212894229889024,
|
||||
144114915328655360, 144115183780888576, 144115188075855871, 284007976623144960,
|
||||
284008251501051904, 287948901175001088, 287948901242044416, 287953294926544896,
|
||||
504407547722072192, 1152640029630136320, 1152921496016912384, 2305840810190438400,
|
||||
2305843009213693952, 3458764513820540928, 4611615649683210238, 6917529027641082367,
|
||||
8217943420044312576, 9151595642915651584, 9223372032559808512, 17870283321406128128,
|
||||
18158513697557839872, 18302628889911885824, 18374686483949813760, 18428729675200069632,
|
||||
18446181123756130304, 18446181123756131327, 18446739675663040512, 18446744069414584320,
|
||||
18446744073709355007, 18446744073709486080, 18446744073709535232, 18446744073709551615,
|
||||
];
|
||||
|
||||
pub fn lookup(c: char) -> bool {
|
||||
super::range_search(
|
||||
super::skip_search(
|
||||
c as u32,
|
||||
&BITSET_CHUNKS_MAP,
|
||||
BITSET_LAST_CHUNK_MAP,
|
||||
&BITSET_INDEX_CHUNKS,
|
||||
&BITSET,
|
||||
&SHORT_OFFSET_RUNS,
|
||||
&OFFSETS,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[rustfmt::skip]
|
||||
pub mod uppercase {
|
||||
static BITSET_LAST_CHUNK_MAP: (u16, u8) = (124, 6);
|
||||
static BITSET_CHUNKS_MAP: [u8; 123] = [
|
||||
12, 15, 0, 0, 11, 0, 0, 8, 5, 9, 0, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 1, 0, 13, 0, 7, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0,
|
||||
0, 0, 4,
|
||||
static BITSET_CHUNKS_MAP: [u8; 125] = [
|
||||
12, 15, 5, 5, 0, 5, 5, 2, 4, 11, 5, 14, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 8, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 6, 5, 13, 5, 10, 5, 5, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 7, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 16, 5, 5,
|
||||
5, 5, 9, 5, 3,
|
||||
];
|
||||
static BITSET_INDEX_CHUNKS: [[u8; 16]; 17] = [
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 33, 0, 0, 0],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 9, 0, 38, 46, 44, 28],
|
||||
[0, 0, 0, 0, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 0, 51, 23, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 0, 60, 62, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 54, 0, 0, 0, 0, 0, 43, 43, 40, 43, 56, 22, 34, 35],
|
||||
[0, 0, 57, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 66, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 66, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 66, 30],
|
||||
[0, 10, 0, 11, 50, 37, 36, 45, 47, 5, 0, 0, 0, 49, 17, 53],
|
||||
[14, 0, 60, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[21, 52, 43, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[24, 39, 42, 41, 59, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[58, 65, 29, 16, 48, 63, 31, 19, 55, 61, 64, 32, 27, 20, 15, 3],
|
||||
[41, 41, 5, 33, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 5, 0],
|
||||
[41, 41, 5, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41],
|
||||
[41, 41, 38, 41, 41, 41, 41, 41, 17, 17, 61, 17, 40, 29, 24, 23],
|
||||
[41, 41, 41, 41, 9, 8, 42, 41, 41, 41, 41, 41, 41, 41, 41, 41],
|
||||
[41, 41, 41, 41, 35, 28, 65, 41, 41, 41, 41, 41, 41, 41, 41, 41],
|
||||
[41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41],
|
||||
[41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 56, 41, 41, 41],
|
||||
[41, 41, 41, 41, 41, 41, 41, 41, 41, 46, 41, 41, 41, 41, 41, 41],
|
||||
[41, 41, 41, 41, 41, 41, 41, 41, 41, 60, 59, 41, 20, 14, 16, 4],
|
||||
[41, 41, 41, 41, 47, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41],
|
||||
[41, 41, 51, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41],
|
||||
[41, 41, 52, 43, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41],
|
||||
[41, 53, 41, 31, 34, 21, 22, 15, 13, 32, 41, 41, 41, 11, 30, 37],
|
||||
[48, 41, 9, 44, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41],
|
||||
[49, 36, 17, 27, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41],
|
||||
[50, 19, 2, 18, 10, 45, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41],
|
||||
[57, 1, 26, 54, 12, 7, 25, 55, 39, 58, 6, 3, 64, 63, 62, 66],
|
||||
];
|
||||
static BITSET: [u64; 67] = [
|
||||
0, 8, 1023, 1024, 8383, 21882, 65535, 1048575, 8388607, 89478485, 134217726, 2139095039,
|
||||
4294967295, 17179869183, 1099511627775, 2199023190016, 4398046445568, 17575006099264,
|
||||
23456248059221, 70368743129088, 140737484161024, 140737488355327, 280378317225728,
|
||||
281470681743392, 281474976710655, 1169903278445909, 2251799813685247, 9007198986305536,
|
||||
9007199254741748, 17977448100528131, 18014398509481983, 288230371856744511,
|
||||
576460735123554305, 576460743713488896, 1080863910568919040, 1080897995681042176,
|
||||
1274187559846268630, 3122495741643543722, 6148633210533183488, 6148914689804861440,
|
||||
6148914690880001365, 6148914691236506283, 6148914691236516865, 6148914691236517205,
|
||||
6151773421467674709, 6184099063146390672, 7638198793012598101, 7783721355972007253,
|
||||
8863084067199903664, 9242793810247811072, 12273810184460391765, 13839347594782259332,
|
||||
13845730589451223040, 16613872850358272000, 16717361816799215616, 17293822586282573568,
|
||||
18374966856193736448, 18428729675200069632, 18442240474149289983, 18446274948748367189,
|
||||
18446462598732840960, 18446462598737035263, 18446466996779287551, 18446726481523637343,
|
||||
18446742974197924863, 18446742974197940223, 18446744069414584320,
|
||||
static BITSET_CANONICAL: [u64; 41] = [
|
||||
0b0000000000111111111111111111111111111111111111111111111111111111,
|
||||
0b1111111111111111111111110000000000000000000000000011111111111111,
|
||||
0b0101010101010101010101010101010101010101010101010101010000000001,
|
||||
0b0000011111111111111111111111110000000000000000000000000000000001,
|
||||
0b0000000000100000000000000000000000000000000000000000001011110100,
|
||||
0b1111111111111111111111111111111100000000000000000000000000000000,
|
||||
0b1111111111111111111111110000000000000000000000000000001111111111,
|
||||
0b1111111111111111111100000000000000000000000000011111110001011111,
|
||||
0b1111111111111111000000111111111111111111111111110000001111111111,
|
||||
0b1111111111111111000000000000000000000000000000000000000000000000,
|
||||
0b1111111111111110010101010101010101010101010101010101010101010101,
|
||||
0b1000000001000101000000000000000000000000000000000000000000000000,
|
||||
0b0111101100000000000000000000000000011111110111111110011110110000,
|
||||
0b0110110000000101010101010101010101010101010101010101010101010101,
|
||||
0b0110101000000000010101010101010101010101010101010101010101010101,
|
||||
0b0101010111010010010101010101010101001010101010101010010010010000,
|
||||
0b0101010101011111011111010101010101010101010001010010100001010101,
|
||||
0b0101010101010101010101010101010101010101010101010101010101010101,
|
||||
0b0101010101010101010101010101010101010101010101010010101010101011,
|
||||
0b0101010101010101010101010101010100000000000000000000000000000000,
|
||||
0b0101010101010100010101010101010000000000000000000000000000000000,
|
||||
0b0010101101010101010101010101010101010101010101010101010010101010,
|
||||
0b0001000110101110110100101101010110110001110110111100111011010110,
|
||||
0b0000111100000000000111110000000000001111000000000000111100000000,
|
||||
0b0000111100000000000000000000000000000000000000000000000000000000,
|
||||
0b0000001111111111111111111111111100000000000000000000000000111111,
|
||||
0b0000000000111111110111100110010011010000000000000000000000000011,
|
||||
0b0000000000000100001010000000010101010101010101010101010101010101,
|
||||
0b0000000000000000111111111111111100000000000000000000000000100000,
|
||||
0b0000000000000000111111110000000010101010000000000011111100000000,
|
||||
0b0000000000000000000011111111101111111111111111101101011101000000,
|
||||
0b0000000000000000000000000000000001111111011111111111111111111111,
|
||||
0b0000000000000000000000000000000000000000000000000101010101111010,
|
||||
0b0000000000000000000000000000000000000000000000000010000010111111,
|
||||
0b1010101001010101010101010101010101010101010101010101010101010101,
|
||||
0b1100000000001111001111010101000000111110001001110011100010000100,
|
||||
0b1100000000100101111010101001110100000000000000000000000000000000,
|
||||
0b1110011010010000010101010101010101010101000111001000000000000000,
|
||||
0b1110011111111111111111111111111111111111111111110000000000000000,
|
||||
0b1111000000000000000000000000001111111111111111111111111100000000,
|
||||
0b1111111100000000111111110000000000111111000000001111111100000000,
|
||||
];
|
||||
static BITSET_MAPPING: [(u8, u8); 26] = [
|
||||
(0, 182), (0, 74), (0, 166), (0, 162), (0, 159), (0, 150), (0, 148), (0, 142), (0, 135),
|
||||
(0, 134), (0, 131), (0, 64), (1, 115), (1, 66), (1, 70), (1, 83), (1, 12), (1, 8), (2, 164),
|
||||
(2, 146), (2, 20), (3, 146), (3, 140), (3, 134), (4, 178), (4, 171),
|
||||
];
|
||||
|
||||
pub fn lookup(c: char) -> bool {
|
||||
super::range_search(
|
||||
super::bitset_search(
|
||||
c as u32,
|
||||
&BITSET_CHUNKS_MAP,
|
||||
BITSET_LAST_CHUNK_MAP,
|
||||
&BITSET_INDEX_CHUNKS,
|
||||
&BITSET,
|
||||
&BITSET_CANONICAL,
|
||||
&BITSET_MAPPING,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[rustfmt::skip]
|
||||
pub mod white_space {
|
||||
static BITSET_LAST_CHUNK_MAP: (u16, u8) = (12, 2);
|
||||
static BITSET_CHUNKS_MAP: [u8; 9] = [
|
||||
3, 0, 0, 0, 0, 1, 0, 0, 4,
|
||||
static SHORT_OFFSET_RUNS: [u32; 4] = [
|
||||
5760, 18882560, 23080960, 40972289,
|
||||
];
|
||||
static BITSET_INDEX_CHUNKS: [[u8; 16]; 5] = [
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
|
||||
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[4, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[5, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
static OFFSETS: [u8; 21] = [
|
||||
9, 5, 18, 1, 100, 1, 26, 1, 0, 1, 0, 11, 29, 2, 5, 1, 47, 1, 0, 1, 0,
|
||||
];
|
||||
static BITSET: [u64; 6] = [
|
||||
0, 1, 2147483648, 4294967328, 4294983168, 144036023240703,
|
||||
];
|
||||
|
||||
pub fn lookup(c: char) -> bool {
|
||||
super::range_search(
|
||||
super::skip_search(
|
||||
c as u32,
|
||||
&BITSET_CHUNKS_MAP,
|
||||
BITSET_LAST_CHUNK_MAP,
|
||||
&BITSET_INDEX_CHUNKS,
|
||||
&BITSET,
|
||||
&SHORT_OFFSET_RUNS,
|
||||
&OFFSETS,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -389,6 +389,7 @@
|
||||
E0699: include_str!("./error_codes/E0699.md"),
|
||||
E0700: include_str!("./error_codes/E0700.md"),
|
||||
E0701: include_str!("./error_codes/E0701.md"),
|
||||
E0703: include_str!("./error_codes/E0703.md"),
|
||||
E0704: include_str!("./error_codes/E0704.md"),
|
||||
E0705: include_str!("./error_codes/E0705.md"),
|
||||
E0706: include_str!("./error_codes/E0706.md"),
|
||||
@@ -603,7 +604,6 @@
|
||||
// E0694, // an unknown tool name found in scoped attributes
|
||||
E0696, // `continue` pointing to a labeled block
|
||||
// E0702, // replaced with a generic attribute input check
|
||||
E0703, // invalid ABI
|
||||
// E0707, // multiple elided lifetimes used in arguments of `async fn`
|
||||
E0708, // `async` non-`move` closures with parameters are not currently
|
||||
// supported
|
||||
|
||||
@@ -0,0 +1,17 @@
|
||||
Invalid ABI (Application Binary Interface) used in the code.
|
||||
|
||||
Erroneous code example:
|
||||
|
||||
```compile_fail,E0703
|
||||
extern "invalid" fn foo() {} // error!
|
||||
# fn main() {}
|
||||
```
|
||||
|
||||
At present few predefined ABI's (like Rust, C, system, etc.) can be
|
||||
used in Rust. Verify that the ABI is predefined. For example you can
|
||||
replace the given ABI from 'Rust'.
|
||||
|
||||
```
|
||||
extern "Rust" fn foo() {} // ok!
|
||||
# fn main() { }
|
||||
```
|
||||
@@ -1082,8 +1082,8 @@ h3 > .collapse-toggle, h4 > .collapse-toggle {
|
||||
|
||||
pre.rust {
|
||||
position: relative;
|
||||
tab-width: 4;
|
||||
-moz-tab-width: 4;
|
||||
tab-size: 4;
|
||||
-moz-tab-size: 4;
|
||||
}
|
||||
|
||||
.search-failed {
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use std::fs::File;
|
||||
use std::fs::{create_dir_all, File};
|
||||
use std::io::prelude::*;
|
||||
use std::path::PathBuf;
|
||||
|
||||
@@ -40,6 +40,11 @@ pub fn render(
|
||||
diag: &rustc_errors::Handler,
|
||||
edition: Edition,
|
||||
) -> i32 {
|
||||
if let Err(e) = create_dir_all(&options.output) {
|
||||
diag.struct_err(&format!("{}: {}", options.output.display(), e)).emit();
|
||||
return 4;
|
||||
}
|
||||
|
||||
let mut output = options.output;
|
||||
output.push(input.file_name().unwrap());
|
||||
output.set_extension("html");
|
||||
|
||||
+3
-1
@@ -91,7 +91,8 @@
|
||||
//! pull-requests for your suggested changes.
|
||||
//!
|
||||
//! Contributions are appreciated! If you see a part of the docs that can be
|
||||
//! improved, submit a PR, or chat with us first on irc.mozilla.org #rust-docs.
|
||||
//! improved, submit a PR, or chat with us first on [Discord][rust-discord]
|
||||
//! #docs.
|
||||
//!
|
||||
//! # A Tour of The Rust Standard Library
|
||||
//!
|
||||
@@ -194,6 +195,7 @@
|
||||
//! [multithreading]: thread/index.html
|
||||
//! [other]: #what-is-in-the-standard-library-documentation
|
||||
//! [primitive types]: ../book/ch03-02-data-types.html
|
||||
//! [rust-discord]: https://discord.gg/rust-lang
|
||||
|
||||
#![stable(feature = "rust1", since = "1.0.0")]
|
||||
#![doc(
|
||||
|
||||
@@ -8,3 +8,4 @@ LL | extern "路濫狼á́́" fn foo() {}
|
||||
|
||||
error: aborting due to previous error
|
||||
|
||||
For more information about this error, try `rustc --explain E0703`.
|
||||
|
||||
@@ -8,3 +8,4 @@ LL | "invalid-ab_isize"
|
||||
|
||||
error: aborting due to previous error
|
||||
|
||||
For more information about this error, try `rustc --explain E0703`.
|
||||
|
||||
@@ -1,9 +1,83 @@
|
||||
//! This implements the core logic of the compression scheme used to compactly
|
||||
//! encode Unicode properties.
|
||||
//!
|
||||
//! We have two primary goals with the encoding: we want to be compact, because
|
||||
//! these tables often end up in ~every Rust program (especially the
|
||||
//! grapheme_extend table, used for str debugging), including those for embedded
|
||||
//! targets (where space is important). We also want to be relatively fast,
|
||||
//! though this is more of a nice to have rather than a key design constraint.
|
||||
//! It is expected that libraries/applications which are performance-sensitive
|
||||
//! to Unicode property lookups are extremely rare, and those that care may find
|
||||
//! the tradeoff of the raw bitsets worth it. For most applications, a
|
||||
//! relatively fast but much smaller (and as such less cache-impacting, etc.)
|
||||
//! data set is likely preferable.
|
||||
//!
|
||||
//! We have two separate encoding schemes: a skiplist-like approach, and a
|
||||
//! compressed bitset. The datasets we consider mostly use the skiplist (it's
|
||||
//! smaller) but the lowercase and uppercase sets are sufficiently sparse for
|
||||
//! the bitset to be worthwhile -- for those sets the biset is a 2x size win.
|
||||
//! Since the bitset is also faster, this seems an obvious choice. (As a
|
||||
//! historical note, the bitset was also the prior implementation, so its
|
||||
//! relative complexity had already been paid).
|
||||
//!
|
||||
//! ## The bitset
|
||||
//!
|
||||
//! The primary idea is that we 'flatten' the Unicode ranges into an enormous
|
||||
//! bitset. To represent any arbitrary codepoint in a raw bitset, we would need
|
||||
//! over 17 kilobytes of data per character set -- way too much for our
|
||||
//! purposes.
|
||||
//!
|
||||
//! First, the raw bitset (one bit for every valid `char`, from 0 to 0x10FFFF,
|
||||
//! not skipping the small 'gap') is associated into words (u64) and
|
||||
//! deduplicated. On random data, this would be useless; on our data, this is
|
||||
//! incredibly beneficial -- our data sets have (far) less than 256 unique
|
||||
//! words.
|
||||
//!
|
||||
//! This gives us an array that maps `u8 -> word`; the current algorithm does
|
||||
//! not handle the case of more than 256 unique words, but we are relatively far
|
||||
//! from coming that close.
|
||||
//!
|
||||
//! With that scheme, we now have a single byte for every 64 codepoints.
|
||||
//!
|
||||
//! We further chunk these by some constant N (between 1 and 64 per group,
|
||||
//! dynamically chosen for smallest size), and again deduplicate and store in an
|
||||
//! array (u8 -> [u8; N]).
|
||||
//!
|
||||
//! The bytes of this array map into the words from the bitset above, but we
|
||||
//! apply another trick here: some of these words are similar enough that they
|
||||
//! can be represented by some function of another word. The particular
|
||||
//! functions chosen are rotation, inversion, and shifting (right).
|
||||
//!
|
||||
//! ## The skiplist
|
||||
//!
|
||||
//! The skip list arose out of the desire for an even smaller encoding than the
|
||||
//! bitset -- and was the answer to the question "what is the smallest
|
||||
//! representation we can imagine?". However, it is not necessarily the
|
||||
//! smallest, and if you have a better proposal, please do suggest it!
|
||||
//!
|
||||
//! This is a relatively straightforward encoding. First, we break up all the
|
||||
//! ranges in the input data into offsets from each other, essentially a gap
|
||||
//! encoding. In practice, most gaps are small -- less than u8::MAX -- so we
|
||||
//! store those directly. We make use of the larger gaps (which are nicely
|
||||
//! interspersed already) throughout the dataset to index this data set.
|
||||
//!
|
||||
//! In particular, each run of small gaps (terminating in a large gap) is
|
||||
//! indexed in a separate dataset. That data set stores an index into the
|
||||
//! primary offset list and a prefix sum of that offset list. These are packed
|
||||
//! into a single u32 (11 bits for the offset, 21 bits for the prefix sum).
|
||||
//!
|
||||
//! Lookup proceeds via a binary search in the index and then a straightforward
|
||||
//! linear scan (adding up the offsets) until we reach the needle, and then the
|
||||
//! index of that offset is utilized as the answer to whether we're in the set
|
||||
//! or not.
|
||||
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::ops::Range;
|
||||
use ucd_parse::Codepoints;
|
||||
|
||||
mod case_mapping;
|
||||
mod raw_emitter;
|
||||
mod skiplist;
|
||||
mod unicode_download;
|
||||
|
||||
use raw_emitter::{emit_codepoints, RawEmitter};
|
||||
@@ -152,9 +226,17 @@ fn main() {
|
||||
std::process::exit(1);
|
||||
});
|
||||
|
||||
// Optional test path, which is a Rust source file testing that the unicode
|
||||
// property lookups are correct.
|
||||
let test_path = std::env::args().nth(2);
|
||||
|
||||
let unicode_data = load_data();
|
||||
let ranges_by_property = &unicode_data.ranges;
|
||||
|
||||
if let Some(path) = test_path {
|
||||
std::fs::write(&path, generate_tests(&write_location, &ranges_by_property)).unwrap();
|
||||
}
|
||||
|
||||
let mut total_bytes = 0;
|
||||
let mut modules = Vec::new();
|
||||
for (property, ranges) in ranges_by_property {
|
||||
@@ -163,7 +245,16 @@ fn main() {
|
||||
emit_codepoints(&mut emitter, &ranges);
|
||||
|
||||
modules.push((property.to_lowercase().to_string(), emitter.file));
|
||||
println!("{:15}: {} bytes, {} codepoints", property, emitter.bytes_used, datapoints,);
|
||||
println!(
|
||||
"{:15}: {} bytes, {} codepoints in {} ranges ({} - {}) using {}",
|
||||
property,
|
||||
emitter.bytes_used,
|
||||
datapoints,
|
||||
ranges.len(),
|
||||
ranges.first().unwrap().start,
|
||||
ranges.last().unwrap().end,
|
||||
emitter.desc,
|
||||
);
|
||||
total_bytes += emitter.bytes_used;
|
||||
}
|
||||
|
||||
@@ -173,7 +264,10 @@ fn main() {
|
||||
"///! This file is generated by src/tools/unicode-table-generator; do not edit manually!\n",
|
||||
);
|
||||
|
||||
table_file.push_str("use super::range_search;\n\n");
|
||||
// Include the range search function
|
||||
table_file.push('\n');
|
||||
table_file.push_str(include_str!("range_search.rs"));
|
||||
table_file.push('\n');
|
||||
|
||||
table_file.push_str(&version());
|
||||
|
||||
@@ -236,21 +330,97 @@ fn fmt_list<V: std::fmt::Debug>(values: impl IntoIterator<Item = V>) -> String {
|
||||
out
|
||||
}
|
||||
|
||||
fn generate_tests(data_path: &str, ranges: &[(&str, Vec<Range<u32>>)]) -> String {
|
||||
let mut s = String::new();
|
||||
s.push_str("#![allow(incomplete_features, unused)]\n");
|
||||
s.push_str("#![feature(const_generics)]\n\n");
|
||||
s.push_str("\n#[allow(unused)]\nuse std::hint;\n");
|
||||
s.push_str(&format!("#[path = \"{}\"]\n", data_path));
|
||||
s.push_str("mod unicode_data;\n\n");
|
||||
|
||||
s.push_str("\nfn main() {\n");
|
||||
|
||||
for (property, ranges) in ranges {
|
||||
s.push_str(&format!(r#" println!("Testing {}");"#, property));
|
||||
s.push('\n');
|
||||
s.push_str(&format!(" {}_true();\n", property.to_lowercase()));
|
||||
s.push_str(&format!(" {}_false();\n", property.to_lowercase()));
|
||||
let mut is_true = Vec::new();
|
||||
let mut is_false = Vec::new();
|
||||
for ch_num in 0..(std::char::MAX as u32) {
|
||||
if std::char::from_u32(ch_num).is_none() {
|
||||
continue;
|
||||
}
|
||||
if ranges.iter().any(|r| r.contains(&ch_num)) {
|
||||
is_true.push(ch_num);
|
||||
} else {
|
||||
is_false.push(ch_num);
|
||||
}
|
||||
}
|
||||
|
||||
s.push_str(&format!(" fn {}_true() {{\n", property.to_lowercase()));
|
||||
generate_asserts(&mut s, property, &is_true, true);
|
||||
s.push_str(" }\n\n");
|
||||
s.push_str(&format!(" fn {}_false() {{\n", property.to_lowercase()));
|
||||
generate_asserts(&mut s, property, &is_false, false);
|
||||
s.push_str(" }\n\n");
|
||||
}
|
||||
|
||||
s.push_str("}");
|
||||
s
|
||||
}
|
||||
|
||||
fn generate_asserts(s: &mut String, property: &str, points: &[u32], truthy: bool) {
|
||||
for range in ranges_from_set(points) {
|
||||
if range.end == range.start + 1 {
|
||||
s.push_str(&format!(
|
||||
" assert!({}unicode_data::{}::lookup({:?}), \"{}\");\n",
|
||||
if truthy { "" } else { "!" },
|
||||
property.to_lowercase(),
|
||||
std::char::from_u32(range.start).unwrap(),
|
||||
range.start,
|
||||
));
|
||||
} else {
|
||||
s.push_str(&format!(" for chn in {:?}u32 {{\n", range));
|
||||
s.push_str(&format!(
|
||||
" assert!({}unicode_data::{}::lookup(std::char::from_u32(chn).unwrap()), \"{{:?}}\", chn);\n",
|
||||
if truthy { "" } else { "!" },
|
||||
property.to_lowercase(),
|
||||
));
|
||||
s.push_str(" }\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn ranges_from_set(set: &[u32]) -> Vec<Range<u32>> {
|
||||
let mut ranges = set.iter().map(|e| (*e)..(*e + 1)).collect::<Vec<Range<u32>>>();
|
||||
merge_ranges(&mut ranges);
|
||||
ranges
|
||||
}
|
||||
|
||||
fn merge_ranges(ranges: &mut Vec<Range<u32>>) {
|
||||
loop {
|
||||
let mut new_ranges = Vec::new();
|
||||
let mut idx_iter = 0..(ranges.len() - 1);
|
||||
let mut should_insert_last = true;
|
||||
while let Some(idx) = idx_iter.next() {
|
||||
let cur = ranges[idx].clone();
|
||||
let next = ranges[idx + 1].clone();
|
||||
if cur.end == next.start {
|
||||
let _ = idx_iter.next(); // skip next as we're merging it in
|
||||
if idx_iter.next().is_none() {
|
||||
// We're merging the last element
|
||||
should_insert_last = false;
|
||||
}
|
||||
new_ranges.push(cur.start..next.end);
|
||||
} else {
|
||||
// We're *not* merging the last element
|
||||
should_insert_last = true;
|
||||
new_ranges.push(cur);
|
||||
}
|
||||
}
|
||||
new_ranges.push(ranges.last().unwrap().clone());
|
||||
if should_insert_last {
|
||||
new_ranges.push(ranges.last().unwrap().clone());
|
||||
}
|
||||
if new_ranges.len() == ranges.len() {
|
||||
*ranges = new_ranges;
|
||||
break;
|
||||
@@ -258,4 +428,12 @@ fn merge_ranges(ranges: &mut Vec<Range<u32>>) {
|
||||
*ranges = new_ranges;
|
||||
}
|
||||
}
|
||||
|
||||
let mut last_end = None;
|
||||
for range in ranges {
|
||||
if let Some(last) = last_end {
|
||||
assert!(range.start > last, "{:?}", range);
|
||||
}
|
||||
last_end = Some(range.end);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,93 @@
|
||||
#[inline(always)]
|
||||
fn bitset_search<
|
||||
const N: usize,
|
||||
const CHUNK_SIZE: usize,
|
||||
const N1: usize,
|
||||
const CANONICAL: usize,
|
||||
const CANONICALIZED: usize,
|
||||
>(
|
||||
needle: u32,
|
||||
chunk_idx_map: &[u8; N],
|
||||
bitset_chunk_idx: &[[u8; CHUNK_SIZE]; N1],
|
||||
bitset_canonical: &[u64; CANONICAL],
|
||||
bitset_canonicalized: &[(u8, u8); CANONICALIZED],
|
||||
) -> bool {
|
||||
let bucket_idx = (needle / 64) as usize;
|
||||
let chunk_map_idx = bucket_idx / CHUNK_SIZE;
|
||||
let chunk_piece = bucket_idx % CHUNK_SIZE;
|
||||
let chunk_idx = if let Some(&v) = chunk_idx_map.get(chunk_map_idx) {
|
||||
v
|
||||
} else {
|
||||
return false;
|
||||
};
|
||||
let idx = bitset_chunk_idx[chunk_idx as usize][chunk_piece] as usize;
|
||||
let word = if let Some(word) = bitset_canonical.get(idx) {
|
||||
*word
|
||||
} else {
|
||||
let (real_idx, mapping) = bitset_canonicalized[idx - bitset_canonical.len()];
|
||||
let mut word = bitset_canonical[real_idx as usize];
|
||||
let should_invert = mapping & (1 << 6) != 0;
|
||||
if should_invert {
|
||||
word = !word;
|
||||
}
|
||||
// Lower 6 bits
|
||||
let quantity = mapping & ((1 << 6) - 1);
|
||||
if mapping & (1 << 7) != 0 {
|
||||
// shift
|
||||
word >>= quantity as u64;
|
||||
} else {
|
||||
word = word.rotate_left(quantity as u32);
|
||||
}
|
||||
word
|
||||
};
|
||||
(word & (1 << (needle % 64) as u64)) != 0
|
||||
}
|
||||
|
||||
fn decode_prefix_sum(short_offset_run_header: u32) -> u32 {
|
||||
short_offset_run_header & ((1 << 21) - 1)
|
||||
}
|
||||
|
||||
fn decode_length(short_offset_run_header: u32) -> usize {
|
||||
(short_offset_run_header >> 21) as usize
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn skip_search<const SOR: usize, const OFFSETS: usize>(
|
||||
needle: u32,
|
||||
short_offset_runs: &[u32; SOR],
|
||||
offsets: &[u8; OFFSETS],
|
||||
) -> bool {
|
||||
// Note that this *cannot* be past the end of the array, as the last
|
||||
// element is greater than std::char::MAX (the largest possible needle).
|
||||
//
|
||||
// So, we cannot have found it (i.e. Ok(idx) + 1 != length) and the correct
|
||||
// location cannot be past it, so Err(idx) != length either.
|
||||
//
|
||||
// This means that we can avoid bounds checking for the accesses below, too.
|
||||
let last_idx =
|
||||
match short_offset_runs.binary_search_by_key(&(needle << 11), |header| header << 11) {
|
||||
Ok(idx) => idx + 1,
|
||||
Err(idx) => idx,
|
||||
};
|
||||
|
||||
let mut offset_idx = decode_length(short_offset_runs[last_idx]);
|
||||
let length = if let Some(next) = short_offset_runs.get(last_idx + 1) {
|
||||
decode_length(*next) - offset_idx
|
||||
} else {
|
||||
offsets.len() - offset_idx
|
||||
};
|
||||
let prev =
|
||||
last_idx.checked_sub(1).map(|prev| decode_prefix_sum(short_offset_runs[prev])).unwrap_or(0);
|
||||
|
||||
let total = needle - prev;
|
||||
let mut prefix_sum = 0;
|
||||
for _ in 0..(length - 1) {
|
||||
let offset = offsets[offset_idx];
|
||||
prefix_sum += offset as u32;
|
||||
if prefix_sum > total {
|
||||
break;
|
||||
}
|
||||
offset_idx += 1;
|
||||
}
|
||||
offset_idx % 2 == 1
|
||||
}
|
||||
@@ -1,55 +1,19 @@
|
||||
//! This implements the core logic of the compression scheme used to compactly
|
||||
//! encode the Unicode character classes.
|
||||
//!
|
||||
//! The primary idea is that we 'flatten' the Unicode ranges into an enormous
|
||||
//! bitset. To represent any arbitrary codepoint in a raw bitset, we would need
|
||||
//! over 17 kilobytes of data per character set -- way too much for our
|
||||
//! purposes.
|
||||
//!
|
||||
//! We have two primary goals with the encoding: we want to be compact, because
|
||||
//! these tables often end up in ~every Rust program (especially the
|
||||
//! grapheme_extend table, used for str debugging), including those for embedded
|
||||
//! targets (where space is important). We also want to be relatively fast,
|
||||
//! though this is more of a nice to have rather than a key design constraint.
|
||||
//! In practice, due to modern processor design these two are closely related.
|
||||
//!
|
||||
//! The encoding scheme here compresses the bitset by first deduplicating the
|
||||
//! "words" (64 bits on all platforms). In practice very few words are present
|
||||
//! in most data sets.
|
||||
//!
|
||||
//! This gives us an array that maps `u8 -> word` (if we ever went beyond 256
|
||||
//! words, we could go to u16 -> word or have some dual compression scheme
|
||||
//! mapping into two separate sets; currently this is not dealt with).
|
||||
//!
|
||||
//! With that scheme, we now have a single byte for every 64 codepoints. We
|
||||
//! further group these by 16 (arbitrarily chosen), and again deduplicate and
|
||||
//! store in an array (u8 -> [u8; 16]).
|
||||
//!
|
||||
//! The indices into this array represent ranges of 64*16 = 1024 codepoints.
|
||||
//!
|
||||
//! This already reduces the top-level array to at most 1,086 bytes, but in
|
||||
//! practice we usually can encode in far fewer (the first couple Unicode planes
|
||||
//! are dense).
|
||||
//!
|
||||
//! The last byte of this top-level array is pulled out to a separate static
|
||||
//! and trailing zeros are dropped; this is simply because grapheme_extend and
|
||||
//! case_ignorable have a single entry in the 896th entry, so this shrinks them
|
||||
//! down considerably.
|
||||
|
||||
use crate::fmt_list;
|
||||
use std::collections::{BTreeSet, HashMap};
|
||||
use std::collections::{BTreeMap, BTreeSet, HashMap};
|
||||
use std::convert::TryFrom;
|
||||
use std::fmt::Write;
|
||||
use std::fmt::{self, Write};
|
||||
use std::ops::Range;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct RawEmitter {
|
||||
pub file: String,
|
||||
pub desc: String,
|
||||
pub bytes_used: usize,
|
||||
}
|
||||
|
||||
impl RawEmitter {
|
||||
pub fn new() -> RawEmitter {
|
||||
RawEmitter { file: String::new(), bytes_used: 0 }
|
||||
RawEmitter { file: String::new(), bytes_used: 0, desc: String::new() }
|
||||
}
|
||||
|
||||
fn blank_line(&mut self) {
|
||||
@@ -59,30 +23,100 @@ fn blank_line(&mut self) {
|
||||
writeln!(&mut self.file, "").unwrap();
|
||||
}
|
||||
|
||||
fn emit_bitset(&mut self, words: &[u64]) {
|
||||
fn emit_bitset(&mut self, ranges: &[Range<u32>]) {
|
||||
let last_code_point = ranges.last().unwrap().end;
|
||||
// bitset for every bit in the codepoint range
|
||||
//
|
||||
// + 2 to ensure an all zero word to use for padding
|
||||
let mut buckets = vec![0u64; (last_code_point as usize / 64) + 2];
|
||||
for range in ranges {
|
||||
for codepoint in range.clone() {
|
||||
let bucket = codepoint as usize / 64;
|
||||
let bit = codepoint as u64 % 64;
|
||||
buckets[bucket] |= 1 << bit;
|
||||
}
|
||||
}
|
||||
|
||||
let mut words = buckets;
|
||||
// Ensure that there's a zero word in the dataset, used for padding and
|
||||
// such.
|
||||
words.push(0);
|
||||
let unique_words =
|
||||
words.iter().cloned().collect::<BTreeSet<_>>().into_iter().collect::<Vec<_>>();
|
||||
if unique_words.len() > u8::max_value() as usize {
|
||||
panic!("cannot pack {} into 8 bits", unique_words.len());
|
||||
}
|
||||
// needed for the chunk mapping to work
|
||||
assert_eq!(unique_words[0], 0, "has a zero word");
|
||||
let canonicalized = Canonicalized::canonicalize(&unique_words);
|
||||
|
||||
let word_indices = unique_words
|
||||
.iter()
|
||||
.cloned()
|
||||
.enumerate()
|
||||
.map(|(idx, word)| (word, u8::try_from(idx).unwrap()))
|
||||
.collect::<HashMap<_, _>>();
|
||||
let word_indices = canonicalized.unique_mapping.clone();
|
||||
let compressed_words = words.iter().map(|w| word_indices[w]).collect::<Vec<u8>>();
|
||||
|
||||
let mut idx = words.iter().map(|w| word_indices[w]).collect::<Vec<u8>>();
|
||||
let chunk_length = 16;
|
||||
for _ in 0..(chunk_length - (idx.len() % chunk_length)) {
|
||||
assert_eq!(unique_words[0], 0, "first word is all zeros");
|
||||
// pad out bitset index with zero words so we have all chunks of 16
|
||||
idx.push(0);
|
||||
let mut best = None;
|
||||
for length in 1..=64 {
|
||||
let mut temp = self.clone();
|
||||
temp.emit_chunk_map(word_indices[&0], &compressed_words, length);
|
||||
if let Some((_, size)) = best {
|
||||
if temp.bytes_used < size {
|
||||
best = Some((length, temp.bytes_used));
|
||||
}
|
||||
} else {
|
||||
best = Some((length, temp.bytes_used));
|
||||
}
|
||||
}
|
||||
self.emit_chunk_map(word_indices[&0], &compressed_words, best.unwrap().0);
|
||||
|
||||
struct Bits(u64);
|
||||
impl fmt::Debug for Bits {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "0b{:064b}", self.0)
|
||||
}
|
||||
}
|
||||
|
||||
writeln!(
|
||||
&mut self.file,
|
||||
"static BITSET_CANONICAL: [u64; {}] = [{}];",
|
||||
canonicalized.canonical_words.len(),
|
||||
fmt_list(canonicalized.canonical_words.iter().map(|v| Bits(*v))),
|
||||
)
|
||||
.unwrap();
|
||||
self.bytes_used += 8 * canonicalized.canonical_words.len();
|
||||
writeln!(
|
||||
&mut self.file,
|
||||
"static BITSET_MAPPING: [(u8, u8); {}] = [{}];",
|
||||
canonicalized.canonicalized_words.len(),
|
||||
fmt_list(&canonicalized.canonicalized_words),
|
||||
)
|
||||
.unwrap();
|
||||
// 8 bit index into shifted words, 7 bits for shift + optional flip
|
||||
// We only need it for the words that we removed by applying a shift and
|
||||
// flip to them.
|
||||
self.bytes_used += 2 * canonicalized.canonicalized_words.len();
|
||||
|
||||
self.blank_line();
|
||||
|
||||
writeln!(&mut self.file, "pub fn lookup(c: char) -> bool {{").unwrap();
|
||||
writeln!(&mut self.file, " super::bitset_search(",).unwrap();
|
||||
writeln!(&mut self.file, " c as u32,").unwrap();
|
||||
writeln!(&mut self.file, " &BITSET_CHUNKS_MAP,").unwrap();
|
||||
writeln!(&mut self.file, " &BITSET_INDEX_CHUNKS,").unwrap();
|
||||
writeln!(&mut self.file, " &BITSET_CANONICAL,").unwrap();
|
||||
writeln!(&mut self.file, " &BITSET_MAPPING,").unwrap();
|
||||
writeln!(&mut self.file, " )").unwrap();
|
||||
writeln!(&mut self.file, "}}").unwrap();
|
||||
}
|
||||
|
||||
fn emit_chunk_map(&mut self, zero_at: u8, compressed_words: &[u8], chunk_length: usize) {
|
||||
let mut compressed_words = compressed_words.to_vec();
|
||||
for _ in 0..(chunk_length - (compressed_words.len() % chunk_length)) {
|
||||
// pad out bitset index with zero words so we have all chunks of
|
||||
// chunkchunk_length
|
||||
compressed_words.push(zero_at);
|
||||
}
|
||||
|
||||
let mut chunks = BTreeSet::new();
|
||||
for chunk in idx.chunks(chunk_length) {
|
||||
for chunk in compressed_words.chunks(chunk_length) {
|
||||
chunks.insert(chunk);
|
||||
}
|
||||
let chunk_map = chunks
|
||||
@@ -92,23 +126,10 @@ fn emit_bitset(&mut self, words: &[u64]) {
|
||||
.map(|(idx, chunk)| (chunk, idx))
|
||||
.collect::<HashMap<_, _>>();
|
||||
let mut chunk_indices = Vec::new();
|
||||
for chunk in idx.chunks(chunk_length) {
|
||||
for chunk in compressed_words.chunks(chunk_length) {
|
||||
chunk_indices.push(chunk_map[chunk]);
|
||||
}
|
||||
writeln!(
|
||||
&mut self.file,
|
||||
"static BITSET_LAST_CHUNK_MAP: (u16, u8) = ({}, {});",
|
||||
chunk_indices.len() - 1,
|
||||
chunk_indices.pop().unwrap(),
|
||||
)
|
||||
.unwrap();
|
||||
self.bytes_used += 3;
|
||||
// Strip out the empty pieces, presuming our above pop() made us now
|
||||
// have some trailing zeros.
|
||||
assert_eq!(unique_words[0], 0, "first word is all zeros");
|
||||
while let Some(0) = chunk_indices.last() {
|
||||
chunk_indices.pop();
|
||||
}
|
||||
|
||||
writeln!(
|
||||
&mut self.file,
|
||||
"static BITSET_CHUNKS_MAP: [u8; {}] = [{}];",
|
||||
@@ -119,52 +140,253 @@ fn emit_bitset(&mut self, words: &[u64]) {
|
||||
self.bytes_used += chunk_indices.len();
|
||||
writeln!(
|
||||
&mut self.file,
|
||||
"static BITSET_INDEX_CHUNKS: [[u8; 16]; {}] = [{}];",
|
||||
"static BITSET_INDEX_CHUNKS: [[u8; {}]; {}] = [{}];",
|
||||
chunk_length,
|
||||
chunks.len(),
|
||||
fmt_list(chunks.iter()),
|
||||
)
|
||||
.unwrap();
|
||||
self.bytes_used += 16 * chunks.len();
|
||||
writeln!(
|
||||
&mut self.file,
|
||||
"static BITSET: [u64; {}] = [{}];",
|
||||
unique_words.len(),
|
||||
fmt_list(&unique_words),
|
||||
)
|
||||
.unwrap();
|
||||
self.bytes_used += 8 * unique_words.len();
|
||||
}
|
||||
|
||||
pub fn emit_lookup(&mut self) {
|
||||
writeln!(&mut self.file, "pub fn lookup(c: char) -> bool {{").unwrap();
|
||||
writeln!(&mut self.file, " super::range_search(",).unwrap();
|
||||
writeln!(&mut self.file, " c as u32,").unwrap();
|
||||
writeln!(&mut self.file, " &BITSET_CHUNKS_MAP,").unwrap();
|
||||
writeln!(&mut self.file, " BITSET_LAST_CHUNK_MAP,").unwrap();
|
||||
writeln!(&mut self.file, " &BITSET_INDEX_CHUNKS,").unwrap();
|
||||
writeln!(&mut self.file, " &BITSET,").unwrap();
|
||||
writeln!(&mut self.file, " )").unwrap();
|
||||
writeln!(&mut self.file, "}}").unwrap();
|
||||
self.bytes_used += chunk_length * chunks.len();
|
||||
}
|
||||
}
|
||||
|
||||
pub fn emit_codepoints(emitter: &mut RawEmitter, ranges: &[Range<u32>]) {
|
||||
emitter.blank_line();
|
||||
|
||||
let last_code_point = ranges.last().unwrap().end;
|
||||
// bitset for every bit in the codepoint range
|
||||
//
|
||||
// + 2 to ensure an all zero word to use for padding
|
||||
let mut buckets = vec![0u64; (last_code_point as usize / 64) + 2];
|
||||
for range in ranges {
|
||||
for codepoint in range.clone() {
|
||||
let bucket = codepoint as usize / 64;
|
||||
let bit = codepoint as u64 % 64;
|
||||
buckets[bucket] |= 1 << bit;
|
||||
}
|
||||
}
|
||||
let mut bitset = emitter.clone();
|
||||
bitset.emit_bitset(&ranges);
|
||||
|
||||
emitter.emit_bitset(&buckets);
|
||||
emitter.blank_line();
|
||||
emitter.emit_lookup();
|
||||
let mut skiplist = emitter.clone();
|
||||
skiplist.emit_skiplist(&ranges);
|
||||
|
||||
if bitset.bytes_used <= skiplist.bytes_used {
|
||||
*emitter = bitset;
|
||||
emitter.desc = format!("bitset");
|
||||
} else {
|
||||
*emitter = skiplist;
|
||||
emitter.desc = format!("skiplist");
|
||||
}
|
||||
}
|
||||
|
||||
struct Canonicalized {
|
||||
canonical_words: Vec<u64>,
|
||||
canonicalized_words: Vec<(u8, u8)>,
|
||||
|
||||
/// Maps an input unique word to the associated index (u8) which is into
|
||||
/// canonical_words or canonicalized_words (in order).
|
||||
unique_mapping: HashMap<u64, u8>,
|
||||
}
|
||||
|
||||
impl Canonicalized {
|
||||
fn canonicalize(unique_words: &[u64]) -> Self {
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
enum Mapping {
|
||||
Rotate(u32),
|
||||
Invert,
|
||||
RotateAndInvert(u32),
|
||||
ShiftRight(u32),
|
||||
}
|
||||
|
||||
// key is the word being mapped to
|
||||
let mut mappings: BTreeMap<u64, Vec<(u64, Mapping)>> = BTreeMap::new();
|
||||
for &a in unique_words {
|
||||
'b: for &b in unique_words {
|
||||
// skip self
|
||||
if a == b {
|
||||
continue;
|
||||
}
|
||||
|
||||
// All possible distinct rotations
|
||||
for rotation in 1..64 {
|
||||
if a.rotate_right(rotation) == b {
|
||||
mappings.entry(b).or_default().push((a, Mapping::Rotate(rotation)));
|
||||
// We're not interested in further mappings between a and b
|
||||
continue 'b;
|
||||
}
|
||||
}
|
||||
|
||||
if (!a) == b {
|
||||
mappings.entry(b).or_default().push((a, Mapping::Invert));
|
||||
// We're not interested in further mappings between a and b
|
||||
continue 'b;
|
||||
}
|
||||
|
||||
// All possible distinct rotations, inverted
|
||||
for rotation in 1..64 {
|
||||
if (!a.rotate_right(rotation)) == b {
|
||||
mappings
|
||||
.entry(b)
|
||||
.or_default()
|
||||
.push((a, Mapping::RotateAndInvert(rotation)));
|
||||
// We're not interested in further mappings between a and b
|
||||
continue 'b;
|
||||
}
|
||||
}
|
||||
|
||||
// All possible shifts
|
||||
for shift_by in 1..64 {
|
||||
if a == (b >> shift_by) {
|
||||
mappings
|
||||
.entry(b)
|
||||
.or_default()
|
||||
.push((a, Mapping::ShiftRight(shift_by as u32)));
|
||||
// We're not interested in further mappings between a and b
|
||||
continue 'b;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// These are the bitset words which will be represented "raw" (as a u64)
|
||||
let mut canonical_words = Vec::new();
|
||||
// These are mapped words, which will be represented by an index into
|
||||
// the canonical_words and a Mapping; u16 when encoded.
|
||||
let mut canonicalized_words = Vec::new();
|
||||
let mut unique_mapping = HashMap::new();
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
enum UniqueMapping {
|
||||
Canonical(usize),
|
||||
Canonicalized(usize),
|
||||
}
|
||||
|
||||
// Map 0 first, so that it is the first canonical word.
|
||||
// This is realistically not inefficient because 0 is not mapped to by
|
||||
// anything else (a shift pattern could do it, but would be wasteful).
|
||||
//
|
||||
// However, 0s are quite common in the overall dataset, and it is quite
|
||||
// wasteful to have to go through a mapping function to determine that
|
||||
// we have a zero.
|
||||
//
|
||||
// FIXME: Experiment with choosing most common words in overall data set
|
||||
// for canonical when possible.
|
||||
while let Some((&to, _)) = mappings
|
||||
.iter()
|
||||
.find(|(&to, _)| to == 0)
|
||||
.or_else(|| mappings.iter().max_by_key(|m| m.1.len()))
|
||||
{
|
||||
// Get the mapping with the most entries. Currently, no mapping can
|
||||
// only exist transitively (i.e., there is no A, B, C such that A
|
||||
// does not map to C and but A maps to B maps to C), so this is
|
||||
// guaranteed to be acceptable.
|
||||
//
|
||||
// In the future, we may need a more sophisticated algorithm to
|
||||
// identify which keys to prefer as canonical.
|
||||
let mapped_from = mappings.remove(&to).unwrap();
|
||||
for (from, how) in &mapped_from {
|
||||
// Remove the entries which mapped to this one.
|
||||
// Noting that it should be associated with the Nth canonical word.
|
||||
//
|
||||
// We do not assert that this is present, because there may be
|
||||
// no mappings to the `from` word; that's fine.
|
||||
mappings.remove(from);
|
||||
assert_eq!(
|
||||
unique_mapping
|
||||
.insert(*from, UniqueMapping::Canonicalized(canonicalized_words.len())),
|
||||
None
|
||||
);
|
||||
canonicalized_words.push((canonical_words.len(), *how));
|
||||
|
||||
// Remove the now-canonicalized word from other mappings,
|
||||
// to ensure that we deprioritize them in the next iteration of
|
||||
// the while loop.
|
||||
for (_, mapped) in &mut mappings {
|
||||
let mut i = 0;
|
||||
while i != mapped.len() {
|
||||
if mapped[i].0 == *from {
|
||||
mapped.remove(i);
|
||||
} else {
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
assert!(
|
||||
unique_mapping
|
||||
.insert(to, UniqueMapping::Canonical(canonical_words.len()))
|
||||
.is_none()
|
||||
);
|
||||
canonical_words.push(to);
|
||||
|
||||
// Remove the now-canonical word from other mappings, to ensure that
|
||||
// we deprioritize them in the next iteration of the while loop.
|
||||
for (_, mapped) in &mut mappings {
|
||||
let mut i = 0;
|
||||
while i != mapped.len() {
|
||||
if mapped[i].0 == to {
|
||||
mapped.remove(i);
|
||||
} else {
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Any words which we couldn't shrink, just stick into the canonical
|
||||
// words.
|
||||
//
|
||||
// FIXME: work harder -- there are more possibilities for mapping
|
||||
// functions (e.g., multiplication, shifting instead of rotation, etc.)
|
||||
// We'll probably always have some slack though so this loop will still
|
||||
// be needed.
|
||||
for &w in unique_words {
|
||||
if !unique_mapping.contains_key(&w) {
|
||||
assert!(
|
||||
unique_mapping
|
||||
.insert(w, UniqueMapping::Canonical(canonical_words.len()))
|
||||
.is_none()
|
||||
);
|
||||
canonical_words.push(w);
|
||||
}
|
||||
}
|
||||
assert_eq!(canonicalized_words.len() + canonical_words.len(), unique_words.len());
|
||||
assert_eq!(unique_mapping.len(), unique_words.len());
|
||||
|
||||
let unique_mapping = unique_mapping
|
||||
.into_iter()
|
||||
.map(|(key, value)| {
|
||||
(
|
||||
key,
|
||||
match value {
|
||||
UniqueMapping::Canonicalized(idx) => {
|
||||
u8::try_from(canonical_words.len() + idx).unwrap()
|
||||
}
|
||||
UniqueMapping::Canonical(idx) => u8::try_from(idx).unwrap(),
|
||||
},
|
||||
)
|
||||
})
|
||||
.collect::<HashMap<_, _>>();
|
||||
|
||||
let mut distinct_indices = BTreeSet::new();
|
||||
for &w in unique_words {
|
||||
let idx = unique_mapping.get(&w).unwrap();
|
||||
assert!(distinct_indices.insert(idx));
|
||||
}
|
||||
|
||||
const LOWER_6: u32 = (1 << 6) - 1;
|
||||
|
||||
let canonicalized_words = canonicalized_words
|
||||
.into_iter()
|
||||
.map(|v| {
|
||||
(
|
||||
u8::try_from(v.0).unwrap(),
|
||||
match v.1 {
|
||||
Mapping::RotateAndInvert(amount) => {
|
||||
assert_eq!(amount, amount & LOWER_6);
|
||||
1 << 6 | (amount as u8)
|
||||
}
|
||||
Mapping::Rotate(amount) => {
|
||||
assert_eq!(amount, amount & LOWER_6);
|
||||
amount as u8
|
||||
}
|
||||
Mapping::Invert => 1 << 6,
|
||||
Mapping::ShiftRight(shift_by) => {
|
||||
assert_eq!(shift_by, shift_by & LOWER_6);
|
||||
1 << 7 | (shift_by as u8)
|
||||
}
|
||||
},
|
||||
)
|
||||
})
|
||||
.collect::<Vec<(u8, u8)>>();
|
||||
Canonicalized { unique_mapping, canonical_words, canonicalized_words }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,98 @@
|
||||
use crate::fmt_list;
|
||||
use crate::raw_emitter::RawEmitter;
|
||||
use std::convert::TryInto;
|
||||
use std::fmt::Write as _;
|
||||
use std::ops::Range;
|
||||
|
||||
/// This will get packed into a single u32 before inserting into the data set.
|
||||
#[derive(Debug, PartialEq)]
|
||||
struct ShortOffsetRunHeader {
|
||||
/// Note, we only allow for 21 bits here.
|
||||
prefix_sum: u32,
|
||||
|
||||
/// Note, we actually only allow for 11 bits here. This should be enough --
|
||||
/// our largest sets are around ~1400 offsets long.
|
||||
start_idx: u16,
|
||||
}
|
||||
|
||||
impl ShortOffsetRunHeader {
|
||||
fn pack(&self) -> u32 {
|
||||
assert!(self.start_idx < (1 << 11));
|
||||
assert!(self.prefix_sum < (1 << 21));
|
||||
|
||||
(self.start_idx as u32) << 21 | self.prefix_sum
|
||||
}
|
||||
}
|
||||
|
||||
impl RawEmitter {
|
||||
pub fn emit_skiplist(&mut self, ranges: &[Range<u32>]) {
|
||||
let mut offsets = Vec::<u32>::new();
|
||||
let points = ranges.iter().flat_map(|r| vec![r.start, r.end]).collect::<Vec<u32>>();
|
||||
let mut offset = 0;
|
||||
for pt in points {
|
||||
let delta = pt - offset;
|
||||
offsets.push(delta);
|
||||
offset = pt;
|
||||
}
|
||||
// Guaranteed to terminate, as it's impossible to subtract a value this
|
||||
// large from a valid char.
|
||||
offsets.push(std::char::MAX as u32 + 1);
|
||||
let mut coded_offsets: Vec<u8> = Vec::new();
|
||||
let mut short_offset_runs: Vec<ShortOffsetRunHeader> = vec![];
|
||||
let mut iter = offsets.iter().cloned();
|
||||
let mut prefix_sum = 0;
|
||||
loop {
|
||||
let mut any_elements = false;
|
||||
let mut inserted = false;
|
||||
let start = coded_offsets.len();
|
||||
for offset in iter.by_ref() {
|
||||
any_elements = true;
|
||||
prefix_sum += offset;
|
||||
if let Ok(offset) = offset.try_into() {
|
||||
coded_offsets.push(offset);
|
||||
} else {
|
||||
short_offset_runs.push(ShortOffsetRunHeader {
|
||||
start_idx: start.try_into().unwrap(),
|
||||
prefix_sum,
|
||||
});
|
||||
// This is just needed to maintain indices even/odd
|
||||
// correctly.
|
||||
coded_offsets.push(0);
|
||||
inserted = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if !any_elements {
|
||||
break;
|
||||
}
|
||||
// We always append the huge char::MAX offset to the end which
|
||||
// should never be able to fit into the u8 offsets.
|
||||
assert!(inserted);
|
||||
}
|
||||
|
||||
writeln!(
|
||||
&mut self.file,
|
||||
"static SHORT_OFFSET_RUNS: [u32; {}] = [{}];",
|
||||
short_offset_runs.len(),
|
||||
fmt_list(short_offset_runs.iter().map(|v| v.pack()))
|
||||
)
|
||||
.unwrap();
|
||||
self.bytes_used += 4 * short_offset_runs.len();
|
||||
writeln!(
|
||||
&mut self.file,
|
||||
"static OFFSETS: [u8; {}] = [{}];",
|
||||
coded_offsets.len(),
|
||||
fmt_list(&coded_offsets)
|
||||
)
|
||||
.unwrap();
|
||||
self.bytes_used += coded_offsets.len();
|
||||
|
||||
writeln!(&mut self.file, "pub fn lookup(c: char) -> bool {{").unwrap();
|
||||
writeln!(&mut self.file, " super::skip_search(",).unwrap();
|
||||
writeln!(&mut self.file, " c as u32,").unwrap();
|
||||
writeln!(&mut self.file, " &SHORT_OFFSET_RUNS,").unwrap();
|
||||
writeln!(&mut self.file, " &OFFSETS,").unwrap();
|
||||
writeln!(&mut self.file, " )").unwrap();
|
||||
writeln!(&mut self.file, "}}").unwrap();
|
||||
}
|
||||
}
|
||||
@@ -11,10 +11,15 @@
|
||||
|
||||
pub fn fetch_latest() {
|
||||
let directory = Path::new(UNICODE_DIRECTORY);
|
||||
if directory.exists() {
|
||||
eprintln!(
|
||||
"Not refetching unicode data, already exists, please delete {:?} to regenerate",
|
||||
directory
|
||||
);
|
||||
return;
|
||||
}
|
||||
if let Err(e) = std::fs::create_dir_all(directory) {
|
||||
if e.kind() != std::io::ErrorKind::AlreadyExists {
|
||||
panic!("Failed to create {:?}: {}", UNICODE_DIRECTORY, e);
|
||||
}
|
||||
panic!("Failed to create {:?}: {}", UNICODE_DIRECTORY, e);
|
||||
}
|
||||
let output = Command::new("curl").arg(URL_PREFIX.to_owned() + README).output().unwrap();
|
||||
if !output.status.success() {
|
||||
|
||||
Reference in New Issue
Block a user