Rollup merge of #154765 - krtab:doc_ascii_whitespace, r=Mark-Simulacrum,WaffleLapkin

Clarify ascii whitespace exclusion of vertical tab in the doc

This especially means that for `c: char`, `c.is_ascii() && c.is_whitespace()` does **not** imply `c.is_ascii_whitespace()`, which can cause bug and is highly counterintuitive.
This commit is contained in:
Jacob Pratt
2026-04-14 23:02:33 -04:00
committed by GitHub
4 changed files with 36 additions and 6 deletions
+3
View File
@@ -1932,6 +1932,9 @@ pub const fn is_ascii_graphic(&self) -> bool {
/// U+0020 SPACE, U+0009 HORIZONTAL TAB, U+000A LINE FEED,
/// U+000C FORM FEED, or U+000D CARRIAGE RETURN.
///
/// **Warning:** Because the list above excludes U+000B VERTICAL TAB,
/// `c.is_ascii_whitespace()` is **not** equivalent to `c.is_ascii() && c.is_whitespace()`.
///
/// Rust uses the WhatWG Infra Standard's [definition of ASCII
/// whitespace][infra-aw]. There are several other definitions in
/// wide use. For instance, [the POSIX locale][pct] includes
+3
View File
@@ -1065,6 +1065,9 @@ pub const fn is_ascii_graphic(&self) -> bool {
/// U+0020 SPACE, U+0009 HORIZONTAL TAB, U+000A LINE FEED,
/// U+000C FORM FEED, or U+000D CARRIAGE RETURN.
///
/// **Warning:** Because the list above excludes U+000B VERTICAL TAB,
/// `b.is_ascii_whitespace()` is **not** equivalent to `char::from(b).is_whitespace()`.
///
/// Rust uses the WhatWG Infra Standard's [definition of ASCII
/// whitespace][infra-aw]. There are several other definitions in
/// wide use. For instance, [the POSIX locale][pct] includes
+15 -3
View File
@@ -222,7 +222,11 @@ pub fn escape_ascii(&self) -> EscapeAscii<'_> {
/// Returns a byte slice with leading ASCII whitespace bytes removed.
///
/// 'Whitespace' refers to the definition used by
/// [`u8::is_ascii_whitespace`].
/// [`u8::is_ascii_whitespace`]. Importantly, this definition excludes
/// the `\0x0B` byte even though it has the Unicode [`White_Space`] property
/// and is removed by [`str::trim_start`].
///
/// [`White_Space`]: https://www.unicode.org/reports/tr44/#White_Space
///
/// # Examples
///
@@ -251,7 +255,11 @@ pub const fn trim_ascii_start(&self) -> &[u8] {
/// Returns a byte slice with trailing ASCII whitespace bytes removed.
///
/// 'Whitespace' refers to the definition used by
/// [`u8::is_ascii_whitespace`].
/// [`u8::is_ascii_whitespace`]. Importantly, this definition excludes
/// the `\0x0B` byte even though it has the Unicode [`White_Space`] property
/// and is removed by [`str::trim_end`].
///
/// [`White_Space`]: https://www.unicode.org/reports/tr44/#White_Space
///
/// # Examples
///
@@ -281,7 +289,11 @@ pub const fn trim_ascii_end(&self) -> &[u8] {
/// removed.
///
/// 'Whitespace' refers to the definition used by
/// [`u8::is_ascii_whitespace`].
/// [`u8::is_ascii_whitespace`]. Importantly, this definition excludes
/// the `\0x0B` byte even though it has the Unicode [`White_Space`] property
/// and is removed by [`str::trim`].
///
/// [`White_Space`]: https://www.unicode.org/reports/tr44/#White_Space
///
/// # Examples
///
+15 -3
View File
@@ -1202,6 +1202,9 @@ pub fn split_whitespace(&self) -> SplitWhitespace<'_> {
///
/// This uses the same definition as [`char::is_ascii_whitespace`].
/// To split by Unicode `Whitespace` instead, use [`split_whitespace`].
/// Note that because of this difference in definition, even if `s.is_ascii()`
/// is `true`, `s.split_ascii_whitespace()` behavior will differ from `s.split_whitespace()`
/// if `s` contains U+000B VERTICAL TAB.
///
/// [`split_whitespace`]: str::split_whitespace
///
@@ -2896,9 +2899,12 @@ pub const fn make_ascii_lowercase(&mut self) {
/// Returns a string slice with leading ASCII whitespace removed.
///
/// 'Whitespace' refers to the definition used by
/// [`u8::is_ascii_whitespace`].
/// [`u8::is_ascii_whitespace`]. Importantly, this definition excludes
/// the U+000B code point even though it has the Unicode [`White_Space`] property
/// and is removed by [`str::trim_start`].
///
/// [`u8::is_ascii_whitespace`]: u8::is_ascii_whitespace
/// [`White_Space`]: https://www.unicode.org/reports/tr44/#White_Space
///
/// # Examples
///
@@ -2921,9 +2927,12 @@ pub const fn trim_ascii_start(&self) -> &str {
/// Returns a string slice with trailing ASCII whitespace removed.
///
/// 'Whitespace' refers to the definition used by
/// [`u8::is_ascii_whitespace`].
/// [`u8::is_ascii_whitespace`]. Importantly, this definition excludes
/// the U+000B code point even though it has the Unicode [`White_Space`] property
/// and is removed by [`str::trim_end`].
///
/// [`u8::is_ascii_whitespace`]: u8::is_ascii_whitespace
/// [`White_Space`]: https://www.unicode.org/reports/tr44/#White_Space
///
/// # Examples
///
@@ -2947,9 +2956,12 @@ pub const fn trim_ascii_end(&self) -> &str {
/// removed.
///
/// 'Whitespace' refers to the definition used by
/// [`u8::is_ascii_whitespace`].
/// [`u8::is_ascii_whitespace`]. Importantly, this definition excludes
/// the U+000B code point even though it has the Unicode [`White_Space`] property
/// and is removed by [`str::trim`].
///
/// [`u8::is_ascii_whitespace`]: u8::is_ascii_whitespace
/// [`White_Space`]: https://www.unicode.org/reports/tr44/#White_Space
///
/// # Examples
///