diff --git a/library/core/src/char/methods.rs b/library/core/src/char/methods.rs index ce33fd1d8f9d..6f6557510243 100644 --- a/library/core/src/char/methods.rs +++ b/library/core/src/char/methods.rs @@ -1932,6 +1932,9 @@ pub const fn is_ascii_graphic(&self) -> bool { /// U+0020 SPACE, U+0009 HORIZONTAL TAB, U+000A LINE FEED, /// U+000C FORM FEED, or U+000D CARRIAGE RETURN. /// + /// **Warning:** Because the list above excludes U+000B VERTICAL TAB, + /// `c.is_ascii_whitespace()` is **not** equivalent to `c.is_ascii() && c.is_whitespace()`. + /// /// Rust uses the WhatWG Infra Standard's [definition of ASCII /// whitespace][infra-aw]. There are several other definitions in /// wide use. For instance, [the POSIX locale][pct] includes diff --git a/library/core/src/num/mod.rs b/library/core/src/num/mod.rs index 5ba3695bc45c..e305cff31189 100644 --- a/library/core/src/num/mod.rs +++ b/library/core/src/num/mod.rs @@ -1065,6 +1065,9 @@ pub const fn is_ascii_graphic(&self) -> bool { /// U+0020 SPACE, U+0009 HORIZONTAL TAB, U+000A LINE FEED, /// U+000C FORM FEED, or U+000D CARRIAGE RETURN. /// + /// **Warning:** Because the list above excludes U+000B VERTICAL TAB, + /// `b.is_ascii_whitespace()` is **not** equivalent to `char::from(b).is_whitespace()`. + /// /// Rust uses the WhatWG Infra Standard's [definition of ASCII /// whitespace][infra-aw]. There are several other definitions in /// wide use. For instance, [the POSIX locale][pct] includes diff --git a/library/core/src/slice/ascii.rs b/library/core/src/slice/ascii.rs index edf058c96a52..9db07d8abbbe 100644 --- a/library/core/src/slice/ascii.rs +++ b/library/core/src/slice/ascii.rs @@ -222,7 +222,11 @@ pub fn escape_ascii(&self) -> EscapeAscii<'_> { /// Returns a byte slice with leading ASCII whitespace bytes removed. /// /// 'Whitespace' refers to the definition used by - /// [`u8::is_ascii_whitespace`]. + /// [`u8::is_ascii_whitespace`]. Importantly, this definition excludes + /// the `\0x0B` byte even though it has the Unicode [`White_Space`] property + /// and is removed by [`str::trim_start`]. + /// + /// [`White_Space`]: https://www.unicode.org/reports/tr44/#White_Space /// /// # Examples /// @@ -251,7 +255,11 @@ pub const fn trim_ascii_start(&self) -> &[u8] { /// Returns a byte slice with trailing ASCII whitespace bytes removed. /// /// 'Whitespace' refers to the definition used by - /// [`u8::is_ascii_whitespace`]. + /// [`u8::is_ascii_whitespace`]. Importantly, this definition excludes + /// the `\0x0B` byte even though it has the Unicode [`White_Space`] property + /// and is removed by [`str::trim_end`]. + /// + /// [`White_Space`]: https://www.unicode.org/reports/tr44/#White_Space /// /// # Examples /// @@ -281,7 +289,11 @@ pub const fn trim_ascii_end(&self) -> &[u8] { /// removed. /// /// 'Whitespace' refers to the definition used by - /// [`u8::is_ascii_whitespace`]. + /// [`u8::is_ascii_whitespace`]. Importantly, this definition excludes + /// the `\0x0B` byte even though it has the Unicode [`White_Space`] property + /// and is removed by [`str::trim`]. + /// + /// [`White_Space`]: https://www.unicode.org/reports/tr44/#White_Space /// /// # Examples /// diff --git a/library/core/src/str/mod.rs b/library/core/src/str/mod.rs index 1a4493de30a1..73fb4c6b2c87 100644 --- a/library/core/src/str/mod.rs +++ b/library/core/src/str/mod.rs @@ -1202,6 +1202,9 @@ pub fn split_whitespace(&self) -> SplitWhitespace<'_> { /// /// This uses the same definition as [`char::is_ascii_whitespace`]. /// To split by Unicode `Whitespace` instead, use [`split_whitespace`]. + /// Note that because of this difference in definition, even if `s.is_ascii()` + /// is `true`, `s.split_ascii_whitespace()` behavior will differ from `s.split_whitespace()` + /// if `s` contains U+000B VERTICAL TAB. /// /// [`split_whitespace`]: str::split_whitespace /// @@ -2896,9 +2899,12 @@ pub const fn make_ascii_lowercase(&mut self) { /// Returns a string slice with leading ASCII whitespace removed. /// /// 'Whitespace' refers to the definition used by - /// [`u8::is_ascii_whitespace`]. + /// [`u8::is_ascii_whitespace`]. Importantly, this definition excludes + /// the U+000B code point even though it has the Unicode [`White_Space`] property + /// and is removed by [`str::trim_start`]. /// /// [`u8::is_ascii_whitespace`]: u8::is_ascii_whitespace + /// [`White_Space`]: https://www.unicode.org/reports/tr44/#White_Space /// /// # Examples /// @@ -2921,9 +2927,12 @@ pub const fn trim_ascii_start(&self) -> &str { /// Returns a string slice with trailing ASCII whitespace removed. /// /// 'Whitespace' refers to the definition used by - /// [`u8::is_ascii_whitespace`]. + /// [`u8::is_ascii_whitespace`]. Importantly, this definition excludes + /// the U+000B code point even though it has the Unicode [`White_Space`] property + /// and is removed by [`str::trim_end`]. /// /// [`u8::is_ascii_whitespace`]: u8::is_ascii_whitespace + /// [`White_Space`]: https://www.unicode.org/reports/tr44/#White_Space /// /// # Examples /// @@ -2947,9 +2956,12 @@ pub const fn trim_ascii_end(&self) -> &str { /// removed. /// /// 'Whitespace' refers to the definition used by - /// [`u8::is_ascii_whitespace`]. + /// [`u8::is_ascii_whitespace`]. Importantly, this definition excludes + /// the U+000B code point even though it has the Unicode [`White_Space`] property + /// and is removed by [`str::trim`]. /// /// [`u8::is_ascii_whitespace`]: u8::is_ascii_whitespace + /// [`White_Space`]: https://www.unicode.org/reports/tr44/#White_Space /// /// # Examples ///