From 7058d23ab230dcafe63e9cbe12ac3a5d64c5a876 Mon Sep 17 00:00:00 2001 From: Yotam Ofek Date: Fri, 14 Feb 2025 07:31:00 +0000 Subject: [PATCH 1/4] Simplify test for escaping with tags --- src/librustdoc/html/escape/tests.rs | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/src/librustdoc/html/escape/tests.rs b/src/librustdoc/html/escape/tests.rs index de702e160635..0891249baf70 100644 --- a/src/librustdoc/html/escape/tests.rs +++ b/src/librustdoc/html/escape/tests.rs @@ -47,21 +47,8 @@ fn escape_body_text_with_wbr_makes_sense() { use itertools::Itertools as _; use super::EscapeBodyTextWithWbr as E; - const C: [u8; 3] = [b'a', b'A', b'_']; - for chars in [ - C.into_iter(), - C.into_iter(), - C.into_iter(), - C.into_iter(), - C.into_iter(), - C.into_iter(), - C.into_iter(), - C.into_iter(), - ] - .into_iter() - .multi_cartesian_product() - { - let s = String::from_utf8(chars).unwrap(); + for chars in iter::repeat("aA_").take(8).map(str::chars).multi_cartesian_product() { + let s = chars.into_iter().collect::(); assert_eq!(s.len(), 8); let esc = E(&s).to_string(); assert!(!esc.contains("")); From 9445359ef1013325a8f39cf837fb0eece0b24b97 Mon Sep 17 00:00:00 2001 From: Yotam Ofek Date: Fri, 14 Feb 2025 07:33:45 +0000 Subject: [PATCH 2/4] De-dup escaping logic --- src/librustdoc/html/escape.rs | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/src/librustdoc/html/escape.rs b/src/librustdoc/html/escape.rs index ac9e2f42cc6d..ac027e1b4b65 100644 --- a/src/librustdoc/html/escape.rs +++ b/src/librustdoc/html/escape.rs @@ -8,13 +8,41 @@ use pulldown_cmark_escape::FmtWriter; use unicode_segmentation::UnicodeSegmentation; +#[inline(always)] +fn escape(s: &str, mut w: impl fmt::Write, escape_quotes: bool) -> fmt::Result { + // Because the internet is always right, turns out there's not that many + // characters to escape: http://stackoverflow.com/questions/7381974 + let pile_o_bits = s; + let mut last = 0; + for (i, ch) in s.char_indices() { + let s = match ch { + '>' => ">", + '<' => "<", + '&' => "&", + '\'' if escape_quotes => "'", + '"' if escape_quotes => """, + _ => continue, + }; + w.write_str(&pile_o_bits[last..i])?; + w.write_str(s)?; + // NOTE: we only expect single byte characters here - which is fine as long as we + // only match single byte characters + last = i + 1; + } + + if last < s.len() { + w.write_str(&pile_o_bits[last..])?; + } + Ok(()) +} + /// Wrapper struct which will emit the HTML-escaped version of the contained /// string when passed to a format string. pub(crate) struct Escape<'a>(pub &'a str); impl fmt::Display for Escape<'_> { fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { - pulldown_cmark_escape::escape_html(FmtWriter(fmt), self.0) + escape(self.0, fmt, true) } } @@ -28,7 +56,7 @@ fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { impl fmt::Display for EscapeBodyText<'_> { fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { - pulldown_cmark_escape::escape_html_body_text(FmtWriter(fmt), self.0) + escape(self.0, fmt, false) } } From 36b570dcde94caf069d22cef07172577079d820f Mon Sep 17 00:00:00 2001 From: Yotam Ofek Date: Wed, 19 Feb 2025 13:34:58 +0000 Subject: [PATCH 3/4] Make `Escape` and `EscapeBodyText` support lazy processing --- src/librustdoc/html/escape.rs | 35 +++++++++++++++++++----- src/librustdoc/html/escape/tests.rs | 8 ++++++ src/librustdoc/html/format.rs | 2 +- src/librustdoc/html/render/mod.rs | 4 +-- src/librustdoc/html/render/print_item.rs | 8 ++++-- src/librustdoc/lib.rs | 3 ++ 6 files changed, 48 insertions(+), 12 deletions(-) diff --git a/src/librustdoc/html/escape.rs b/src/librustdoc/html/escape.rs index ac027e1b4b65..bdd63a46dabe 100644 --- a/src/librustdoc/html/escape.rs +++ b/src/librustdoc/html/escape.rs @@ -8,7 +8,7 @@ use pulldown_cmark_escape::FmtWriter; use unicode_segmentation::UnicodeSegmentation; -#[inline(always)] +#[inline] fn escape(s: &str, mut w: impl fmt::Write, escape_quotes: bool) -> fmt::Result { // Because the internet is always right, turns out there's not that many // characters to escape: http://stackoverflow.com/questions/7381974 @@ -36,13 +36,30 @@ fn escape(s: &str, mut w: impl fmt::Write, escape_quotes: bool) -> fmt::Result { Ok(()) } +struct WriteEscaped { + writer: W, + escape_quotes: bool, +} + +impl fmt::Write for WriteEscaped { + #[inline] + fn write_str(&mut self, s: &str) -> fmt::Result { + escape(s, &mut self.writer, self.escape_quotes) + } +} + /// Wrapper struct which will emit the HTML-escaped version of the contained /// string when passed to a format string. -pub(crate) struct Escape<'a>(pub &'a str); +pub(crate) struct Escape(pub T); -impl fmt::Display for Escape<'_> { +impl fmt::Display for Escape { + #[inline] fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { - escape(self.0, fmt, true) + self.0.fmt( + &mut fmt + .options() + .create_formatter(&mut WriteEscaped { writer: fmt, escape_quotes: true }), + ) } } @@ -52,11 +69,15 @@ fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { /// This is only safe to use for text nodes. If you need your output to be /// safely contained in an attribute, use [`Escape`]. If you don't know the /// difference, use [`Escape`]. -pub(crate) struct EscapeBodyText<'a>(pub &'a str); +pub(crate) struct EscapeBodyText(pub T); -impl fmt::Display for EscapeBodyText<'_> { +impl fmt::Display for EscapeBodyText { fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { - escape(self.0, fmt, false) + self.0.fmt( + &mut fmt + .options() + .create_formatter(&mut WriteEscaped { writer: fmt, escape_quotes: false }), + ) } } diff --git a/src/librustdoc/html/escape/tests.rs b/src/librustdoc/html/escape/tests.rs index 0891249baf70..9beb137f973e 100644 --- a/src/librustdoc/html/escape/tests.rs +++ b/src/librustdoc/html/escape/tests.rs @@ -1,3 +1,11 @@ +use std::iter; + +#[test] +fn escape() { + use super::Escape as E; + assert_eq!(format!(" {}", E("")), " <World>"); +} + // basic examples #[test] fn escape_body_text_with_wbr() { diff --git a/src/librustdoc/html/format.rs b/src/librustdoc/html/format.rs index 35212d480cfd..4d86a815935c 100644 --- a/src/librustdoc/html/format.rs +++ b/src/librustdoc/html/format.rs @@ -875,7 +875,7 @@ pub(crate) fn print_anchor(did: DefId, text: Symbol, cx: &Context<'_>) -> impl D r#"{text}"#, anchor = fragment(did, cx.tcx()), path = join_path_syms(rust_path), - text = EscapeBodyText(text.as_str()), + text = EscapeBodyText(text), ) } else { f.write_str(text.as_str()) diff --git a/src/librustdoc/html/render/mod.rs b/src/librustdoc/html/render/mod.rs index 8108316a856b..702121be2247 100644 --- a/src/librustdoc/html/render/mod.rs +++ b/src/librustdoc/html/render/mod.rs @@ -901,7 +901,7 @@ fn short_item_info( } DeprecatedSince::Future => String::from("Deprecating in a future version"), DeprecatedSince::NonStandard(since) => { - format!("Deprecated since {}", Escape(since.as_str())) + format!("Deprecated since {}", Escape(since)) } DeprecatedSince::Unspecified | DeprecatedSince::Err => String::from("Deprecated"), }; @@ -1682,7 +1682,7 @@ fn notable_traits_button(ty: &clean::Type, cx: &Context<'_>) -> Optionⓘ", - ty = Escape(&format!("{:#}", print_type(ty, cx))), + ty = Escape(format_args!("{:#}", print_type(ty, cx))), ) }) }) diff --git a/src/librustdoc/html/render/print_item.rs b/src/librustdoc/html/render/print_item.rs index 45e38d67c402..2afe6c53bb55 100644 --- a/src/librustdoc/html/render/print_item.rs +++ b/src/librustdoc/html/render/print_item.rs @@ -486,12 +486,16 @@ fn print_extra_info_tags( import_def_id: Option, ) -> impl Display { fmt::from_fn(move |f| { - fn tag_html(class: &str, title: &str, contents: &str) -> impl Display { + fn tag_html<'a>( + class: impl fmt::Display + 'a, + title: impl fmt::Display + 'a, + contents: impl fmt::Display + 'a, + ) -> impl Display + 'a { fmt::from_fn(move |f| { write!( f, r#"{contents}"#, - title = Escape(title), + title = Escape(&title), ) }) } diff --git a/src/librustdoc/lib.rs b/src/librustdoc/lib.rs index 08777e8f40ac..bd7747e92954 100644 --- a/src/librustdoc/lib.rs +++ b/src/librustdoc/lib.rs @@ -7,7 +7,10 @@ #![feature(ascii_char_variants)] #![feature(deref_patterns)] #![feature(file_buffered)] +#![feature(format_args_nl)] #![feature(formatting_options)] +#![feature(if_let_guard)] +#![feature(impl_trait_in_assoc_type)] #![feature(iter_intersperse)] #![feature(iter_order_by)] #![feature(rustc_private)] From 66bb5826e73d02970dbad54c9fb79c903ccacb99 Mon Sep 17 00:00:00 2001 From: Yotam Ofek Date: Wed, 19 Feb 2025 13:35:16 +0000 Subject: [PATCH 4/4] Make `render_long_plain` return an `impl fmt::Display` --- src/librustdoc/clean/cfg.rs | 10 ++++------ src/librustdoc/html/escape.rs | 1 - src/librustdoc/html/render/mod.rs | 9 ++++++--- src/librustdoc/lib.rs | 2 -- 4 files changed, 10 insertions(+), 12 deletions(-) diff --git a/src/librustdoc/clean/cfg.rs b/src/librustdoc/clean/cfg.rs index 5dc9f17c15fd..764fd2957c55 100644 --- a/src/librustdoc/clean/cfg.rs +++ b/src/librustdoc/clean/cfg.rs @@ -111,7 +111,7 @@ pub(crate) fn render_short_html(&self) -> String { msg } - fn render_long_inner(&self, format: Format) -> String { + fn render_long_inner(&self, format: Format) -> impl fmt::Display { let on = if self.omit_preposition() { " " } else if self.should_use_with_in_description() { @@ -132,14 +132,12 @@ fn render_long_inner(&self, format: Format) -> String { } /// Renders the configuration for long display, as a long HTML description. - pub(crate) fn render_long_html(&self) -> String { - let mut msg = self.render_long_inner(Format::LongHtml); - msg.push('.'); - msg + pub(crate) fn render_long_html(&self) -> impl fmt::Display { + fmt::from_fn(|f| write!(f, "{}.", self.render_long_inner(Format::LongHtml))) } /// Renders the configuration for long display, as a long plain text description. - pub(crate) fn render_long_plain(&self) -> String { + pub(crate) fn render_long_plain(&self) -> impl fmt::Display { self.render_long_inner(Format::LongPlain) } diff --git a/src/librustdoc/html/escape.rs b/src/librustdoc/html/escape.rs index bdd63a46dabe..51663c7aae00 100644 --- a/src/librustdoc/html/escape.rs +++ b/src/librustdoc/html/escape.rs @@ -5,7 +5,6 @@ use std::fmt; -use pulldown_cmark_escape::FmtWriter; use unicode_segmentation::UnicodeSegmentation; #[inline] diff --git a/src/librustdoc/html/render/mod.rs b/src/librustdoc/html/render/mod.rs index 702121be2247..cefa42e15783 100644 --- a/src/librustdoc/html/render/mod.rs +++ b/src/librustdoc/html/render/mod.rs @@ -845,7 +845,10 @@ fn document_item_info( ItemInfo { items } } -fn portability(item: &clean::Item, parent: Option<&clean::Item>) -> Option { +fn portability<'a>( + item: &'a clean::Item, + parent: Option<&'a clean::Item>, +) -> Option { let cfg = match (&item.cfg, parent.and_then(|p| p.cfg.as_ref())) { (Some(cfg), Some(parent_cfg)) => cfg.simplify_with(parent_cfg), (cfg, _) => cfg.as_deref().cloned(), @@ -858,7 +861,7 @@ fn portability(item: &clean::Item, parent: Option<&clean::Item>) -> Option