From c30e20a04910153b121a82c058948e18e09614da Mon Sep 17 00:00:00 2001 From: Alan Egerton Date: Mon, 16 Feb 2026 13:26:00 +0000 Subject: [PATCH] Use shell-words to parse output from llvm-config llvm-config might output paths that contain spaces, in which case the naive approach of splitting on whitespace breaks; instead we ask llvm-config to quote any paths and use the shell-words crate to parse the output. --- Cargo.lock | 7 +++ compiler/rustc_llvm/Cargo.toml | 1 + compiler/rustc_llvm/build.rs | 90 ++++++++++++++++++++++++++++------ src/tools/tidy/src/deps.rs | 1 + 4 files changed, 85 insertions(+), 14 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 53cac09b7a7a..34f66cc06d67 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4163,6 +4163,7 @@ version = "0.0.0" dependencies = [ "cc", "libc", + "shell-words", ] [[package]] @@ -5174,6 +5175,12 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "shell-words" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc6fe69c597f9c37bfeeeeeb33da3530379845f10be461a66d16d03eca2ded77" + [[package]] name = "shlex" version = "1.3.0" diff --git a/compiler/rustc_llvm/Cargo.toml b/compiler/rustc_llvm/Cargo.toml index ad93c7453813..b618bc199d88 100644 --- a/compiler/rustc_llvm/Cargo.toml +++ b/compiler/rustc_llvm/Cargo.toml @@ -12,6 +12,7 @@ libc = "0.2.73" # tidy-alphabetical-start # `cc` updates often break things, so we pin it here. cc = "=1.2.16" +shell-words = "1.1.1" # tidy-alphabetical-end [features] diff --git a/compiler/rustc_llvm/build.rs b/compiler/rustc_llvm/build.rs index c58dd64cca5f..7a1c95da29ea 100644 --- a/compiler/rustc_llvm/build.rs +++ b/compiler/rustc_llvm/build.rs @@ -1,8 +1,11 @@ +use std::borrow::Cow; use std::env; use std::ffi::{OsStr, OsString}; use std::fmt::Display; use std::path::{Path, PathBuf}; -use std::process::{Command, Stdio}; +use std::process::{Command, Output, Stdio}; +use std::str::SplitWhitespace; +use std::vec::IntoIter; const OPTIONAL_COMPONENTS: &[&str] = &[ "x86", @@ -86,8 +89,8 @@ fn rerun_if_changed_anything_in_dir(dir: &Path) { } #[track_caller] -fn output(cmd: &mut Command) -> String { - let output = match cmd.stderr(Stdio::inherit()).output() { +fn execute(cmd: &mut Command) -> Output { + let output = match cmd.output() { Ok(status) => status, Err(e) => { println!("\n\nfailed to execute command: {cmd:?}\nerror: {e}\n\n"); @@ -101,7 +104,52 @@ fn output(cmd: &mut Command) -> String { cmd, output.status ); } - String::from_utf8(output.stdout).unwrap() + output +} + +#[track_caller] +fn output(cmd: &mut Command) -> String { + String::from_utf8(execute(cmd.stderr(Stdio::inherit())).stdout).unwrap() +} +#[track_caller] +fn stderr(cmd: &mut Command) -> String { + String::from_utf8(execute(cmd).stderr).unwrap() +} + +enum LlvmConfigOutput { + QuotedPaths(String), + UnquotedPaths(String), +} + +#[derive(Clone)] +enum SplitLlvmConfigOutput<'a> { + QuotedPaths(IntoIter), + UnquotedPaths(SplitWhitespace<'a>), +} + +impl<'a> Iterator for SplitLlvmConfigOutput<'a> { + type Item = Cow<'a, str>; + fn next(&mut self) -> Option> { + match self { + Self::QuotedPaths(iter) => iter.next().map(Cow::Owned), + Self::UnquotedPaths(iter) => iter.next().map(Cow::Borrowed), + } + } +} + +impl<'a> IntoIterator for &'a LlvmConfigOutput { + type Item = Cow<'a, str>; + type IntoIter = SplitLlvmConfigOutput<'a>; + fn into_iter(self) -> Self::IntoIter { + match self { + LlvmConfigOutput::QuotedPaths(output) => SplitLlvmConfigOutput::QuotedPaths( + shell_words::split(&output).expect("matched quotes").into_iter(), + ), + LlvmConfigOutput::UnquotedPaths(output) => { + SplitLlvmConfigOutput::UnquotedPaths(output.split_whitespace()) + } + } + } } fn main() { @@ -125,6 +173,19 @@ fn main() { println!("cargo:rerun-if-changed={}", llvm_config.display()); + // FIXME: `--quote-paths` was added to llvm-config in LLVM 22, so this test (and all its ensuing + // fallback paths) can be removed once we bump the minimum llvm_version >= (22, 0, 0). + let llvm_config_supports_quote_paths = + stderr(Command::new(&llvm_config).arg("--help")).contains("quote-paths"); + + let quoted_split = |mut cmd: Command| { + if llvm_config_supports_quote_paths { + LlvmConfigOutput::QuotedPaths(output(cmd.arg("--quote-paths"))) + } else { + LlvmConfigOutput::UnquotedPaths(output(&mut cmd)) + } + }; + // Test whether we're cross-compiling LLVM. This is a pretty rare case // currently where we're producing an LLVM for a different platform than // what this build script is currently running on. @@ -167,7 +228,8 @@ fn main() { // Link in our own LLVM shims, compiled with the same flags as LLVM let mut cmd = Command::new(&llvm_config); cmd.arg("--cxxflags"); - let cxxflags = output(&mut cmd); + let cxxflags = quoted_split(cmd); + let mut cxxflags_iter = cxxflags.into_iter(); let mut cfg = cc::Build::new(); cfg.warnings(false); @@ -180,7 +242,7 @@ fn main() { if std::env::var_os("CI").is_some() && !target.contains("msvc") { cfg.warnings_into_errors(true); } - for flag in cxxflags.split_whitespace() { + for flag in cxxflags_iter.clone() { // Ignore flags like `-m64` when we're doing a cross build if is_crossed && flag.starts_with("-m") { continue; @@ -201,7 +263,7 @@ fn main() { continue; } - cfg.flag(flag); + cfg.flag(&*flag); } for component in &components { @@ -289,13 +351,13 @@ fn main() { } cmd.args(&components); - for lib in output(&mut cmd).split_whitespace() { + for lib in "ed_split(cmd) { let mut is_static = false; let name = if let Some(stripped) = lib.strip_prefix("-l") { stripped } else if let Some(stripped) = lib.strip_prefix('-') { stripped - } else if Path::new(lib).exists() { + } else if Path::new(&*lib).exists() { // On MSVC llvm-config will print the full name to libraries, but // we're only interested in the name part // On Unix when we get a static library llvm-config will print the @@ -306,7 +368,7 @@ fn main() { // and we transform the zstd part into // cargo:rustc-link-search-native=/usr/local/lib // cargo:rustc-link-lib=static=zstd - let path = Path::new(lib); + let path = Path::new(&*lib); if lib.ends_with(".a") { is_static = true; println!("cargo:rustc-link-search=native={}", path.parent().unwrap().display()); @@ -351,7 +413,7 @@ fn main() { // that those -L directories are the same! let mut cmd = Command::new(&llvm_config); cmd.arg(llvm_link_arg).arg("--ldflags"); - for lib in output(&mut cmd).split_whitespace() { + for lib in "ed_split(cmd) { if is_crossed { if let Some(stripped) = lib.strip_prefix("-LIBPATH:") { println!("cargo:rustc-link-search=native={}", stripped.replace(&host, &target)); @@ -373,7 +435,7 @@ fn main() { // dependencies. let llvm_linker_flags = tracked_env_var_os("LLVM_LINKER_FLAGS"); if let Some(s) = llvm_linker_flags { - for lib in s.into_string().unwrap().split_whitespace() { + for lib in shell_words::split(&s.into_string().unwrap()).expect("matched quotes") { if let Some(stripped) = lib.strip_prefix("-l") { println!("cargo:rustc-link-lib={stripped}"); } else if let Some(stripped) = lib.strip_prefix("-L") { @@ -414,7 +476,7 @@ fn main() { // C++ runtime library if !target.contains("msvc") { if let Some(s) = llvm_static_stdcpp { - assert!(!cxxflags.contains("stdlib=libc++")); + assert!(cxxflags_iter.all(|flag| flag != "stdlib=libc++")); let path = PathBuf::from(s); println!("cargo:rustc-link-search=native={}", path.parent().unwrap().display()); if target.contains("windows") { @@ -422,7 +484,7 @@ fn main() { } else { println!("cargo:rustc-link-lib=static={stdcppname}"); } - } else if cxxflags.contains("stdlib=libc++") { + } else if cxxflags_iter.any(|flag| flag == "stdlib=libc++") { println!("cargo:rustc-link-lib=c++"); } else { println!("cargo:rustc-link-lib={stdcppname}"); diff --git a/src/tools/tidy/src/deps.rs b/src/tools/tidy/src/deps.rs index 24c610b41f3a..551abeab6f6a 100644 --- a/src/tools/tidy/src/deps.rs +++ b/src/tools/tidy/src/deps.rs @@ -426,6 +426,7 @@ pub(crate) struct WorkspaceInfo<'a> { "sha1", "sha2", "sharded-slab", + "shell-words", "shlex", "simd-adler32", "smallvec",