mirror of
https://github.com/rust-lang/rust.git
synced 2026-04-27 18:57:42 +03:00
Auto merge of #145479 - Kmeakin:km/hardcode-char-is-control, r=joboet
Hard-code `char::is_control` Split off from https://github.com/rust-lang/rust/pull/145219 According to https://www.unicode.org/policies/stability_policy.html#Property_Value, the set of codepoints in `Cc` will never change. So we can hard-code the patterns to match against instead of using a table. This doesn't change the generated assembly, since the lookup table is small enough that[ LLVM is able to inline the whole search](https://godbolt.org/z/bG8dM37YG). But this does reduce the chance of regressions if LLVM's heuristics change in the future, and means less generated Rust code checked in to `unicode-data.rs`.
This commit is contained in:
@@ -950,7 +950,11 @@ pub fn is_alphanumeric(self) -> bool {
|
||||
#[stable(feature = "rust1", since = "1.0.0")]
|
||||
#[inline]
|
||||
pub fn is_control(self) -> bool {
|
||||
unicode::Cc(self)
|
||||
// According to
|
||||
// https://www.unicode.org/policies/stability_policy.html#Property_Value,
|
||||
// the set of codepoints in `Cc` will never change.
|
||||
// So we can just hard-code the patterns to match against instead of using a table.
|
||||
matches!(self, '\0'..='\x1f' | '\x7f'..='\u{9f}')
|
||||
}
|
||||
|
||||
/// Returns `true` if this `char` has the `Grapheme_Extend` property.
|
||||
|
||||
@@ -10,7 +10,6 @@
|
||||
|
||||
#[rustfmt::skip]
|
||||
pub(crate) use unicode_data::alphabetic::lookup as Alphabetic;
|
||||
pub(crate) use unicode_data::cc::lookup as Cc;
|
||||
pub(crate) use unicode_data::grapheme_extend::lookup as Grapheme_Extend;
|
||||
pub(crate) use unicode_data::lowercase::lookup as Lowercase;
|
||||
pub(crate) use unicode_data::n::lookup as N;
|
||||
|
||||
@@ -358,31 +358,6 @@ pub fn lookup(c: char) -> bool {
|
||||
}
|
||||
}
|
||||
|
||||
#[rustfmt::skip]
|
||||
pub mod cc {
|
||||
use super::ShortOffsetRunHeader;
|
||||
|
||||
static SHORT_OFFSET_RUNS: [ShortOffsetRunHeader; 1] = [
|
||||
ShortOffsetRunHeader::new(0, 1114272),
|
||||
];
|
||||
static OFFSETS: [u8; 5] = [
|
||||
0, 32, 95, 33, 0,
|
||||
];
|
||||
pub fn lookup(c: char) -> bool {
|
||||
const {
|
||||
assert!(SHORT_OFFSET_RUNS.last().unwrap().0 > char::MAX as u32);
|
||||
let mut i = 0;
|
||||
while i < SHORT_OFFSET_RUNS.len() {
|
||||
assert!(SHORT_OFFSET_RUNS[i].start_index() < OFFSETS.len());
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
// SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX`
|
||||
// and the start indices of all elements in `SHORT_OFFSET_RUNS` are smaller than `OFFSETS.len()`.
|
||||
unsafe { super::skip_search(c, &SHORT_OFFSET_RUNS, &OFFSETS) }
|
||||
}
|
||||
}
|
||||
|
||||
#[rustfmt::skip]
|
||||
pub mod grapheme_extend {
|
||||
use super::ShortOffsetRunHeader;
|
||||
|
||||
@@ -92,7 +92,6 @@
|
||||
"Case_Ignorable",
|
||||
"Grapheme_Extend",
|
||||
"White_Space",
|
||||
"Cc",
|
||||
"N",
|
||||
];
|
||||
|
||||
|
||||
Reference in New Issue
Block a user