mirror of
https://github.com/rust-lang/rust.git
synced 2026-04-27 18:57:42 +03:00
char: µoptimise UTF-16 surrogates decoding
According to Godbolt¹, on x86_64 using binary and produces slightly better code than using subtraction. Readability of both is pretty much equivalent so might just as well use the shorter option. ¹ https://rust.godbolt.org/z/9jM3ejbMx
This commit is contained in:
@@ -67,7 +67,7 @@ fn next(&mut self) -> Option<Result<char, DecodeUtf16Error>> {
|
||||
}
|
||||
|
||||
// all ok, so lets decode it.
|
||||
let c = (((u - 0xD800) as u32) << 10 | (u2 - 0xDC00) as u32) + 0x1_0000;
|
||||
let c = (((u & 0x3ff) as u32) << 10 | (u2 & 0x3ff) as u32) + 0x1_0000;
|
||||
// SAFETY: we checked that it's a legal unicode value
|
||||
Some(Ok(unsafe { from_u32_unchecked(c) }))
|
||||
}
|
||||
|
||||
@@ -306,6 +306,10 @@ fn check(s: &[u16], expected: &[Result<char, u16>]) {
|
||||
}
|
||||
check(&[0xD800, 0x41, 0x42], &[Err(0xD800), Ok('A'), Ok('B')]);
|
||||
check(&[0xD800, 0], &[Err(0xD800), Ok('\0')]);
|
||||
check(&[0xD800], &[Err(0xD800)]);
|
||||
check(&[0xD840, 0xDC00], &[Ok('\u{20000}')]);
|
||||
check(&[0xD840, 0xD840, 0xDC00], &[Err(0xD840), Ok('\u{20000}')]);
|
||||
check(&[0xDC00, 0xD840], &[Err(0xDC00), Err(0xD840)]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
Reference in New Issue
Block a user