Auto merge of #145479 - Kmeakin:km/hardcode-char-is-control, r=joboet

Hard-code `char::is_control`

Split off from https://github.com/rust-lang/rust/pull/145219

According to
https://www.unicode.org/policies/stability_policy.html#Property_Value, the set of codepoints in `Cc` will never change. So we can hard-code the patterns to match against instead of using a table.

This doesn't change the generated assembly, since the lookup table is small enough that[ LLVM is able to inline the whole search](https://godbolt.org/z/bG8dM37YG). But this does reduce the chance of regressions if LLVM's heuristics change in the future, and means less generated Rust code checked in to `unicode-data.rs`.
This commit is contained in:
bors
2025-08-30 14:18:21 +00:00
4 changed files with 5 additions and 28 deletions
+5 -1
View File
@@ -950,7 +950,11 @@ pub fn is_alphanumeric(self) -> bool {
#[stable(feature = "rust1", since = "1.0.0")]
#[inline]
pub fn is_control(self) -> bool {
unicode::Cc(self)
// According to
// https://www.unicode.org/policies/stability_policy.html#Property_Value,
// the set of codepoints in `Cc` will never change.
// So we can just hard-code the patterns to match against instead of using a table.
matches!(self, '\0'..='\x1f' | '\x7f'..='\u{9f}')
}
/// Returns `true` if this `char` has the `Grapheme_Extend` property.
-1
View File
@@ -10,7 +10,6 @@
#[rustfmt::skip]
pub(crate) use unicode_data::alphabetic::lookup as Alphabetic;
pub(crate) use unicode_data::cc::lookup as Cc;
pub(crate) use unicode_data::grapheme_extend::lookup as Grapheme_Extend;
pub(crate) use unicode_data::lowercase::lookup as Lowercase;
pub(crate) use unicode_data::n::lookup as N;
-25
View File
@@ -358,31 +358,6 @@ pub fn lookup(c: char) -> bool {
}
}
#[rustfmt::skip]
pub mod cc {
use super::ShortOffsetRunHeader;
static SHORT_OFFSET_RUNS: [ShortOffsetRunHeader; 1] = [
ShortOffsetRunHeader::new(0, 1114272),
];
static OFFSETS: [u8; 5] = [
0, 32, 95, 33, 0,
];
pub fn lookup(c: char) -> bool {
const {
assert!(SHORT_OFFSET_RUNS.last().unwrap().0 > char::MAX as u32);
let mut i = 0;
while i < SHORT_OFFSET_RUNS.len() {
assert!(SHORT_OFFSET_RUNS[i].start_index() < OFFSETS.len());
i += 1;
}
}
// SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX`
// and the start indices of all elements in `SHORT_OFFSET_RUNS` are smaller than `OFFSETS.len()`.
unsafe { super::skip_search(c, &SHORT_OFFSET_RUNS, &OFFSETS) }
}
}
#[rustfmt::skip]
pub mod grapheme_extend {
use super::ShortOffsetRunHeader;
@@ -92,7 +92,6 @@
"Case_Ignorable",
"Grapheme_Extend",
"White_Space",
"Cc",
"N",
];