From 1303caee2b420b796bfeed32e49ee56b3b256be9 Mon Sep 17 00:00:00 2001 From: mehdiakiki Date: Sun, 5 Apr 2026 19:59:30 -0400 Subject: [PATCH] Add rlib digest to identify Rust object files --- compiler/rustc_codegen_gcc/src/back/lto.rs | 6 +- compiler/rustc_codegen_llvm/src/back/lto.rs | 6 +- .../rustc_codegen_ssa/src/back/archive.rs | 8 ++- compiler/rustc_codegen_ssa/src/back/link.rs | 45 ++++++++------- compiler/rustc_codegen_ssa/src/back/mod.rs | 1 + .../rustc_codegen_ssa/src/back/rmeta_link.rs | 56 +++++++++++++++++++ compiler/rustc_codegen_ssa/src/lib.rs | 19 +------ 7 files changed, 97 insertions(+), 44 deletions(-) create mode 100644 compiler/rustc_codegen_ssa/src/back/rmeta_link.rs diff --git a/compiler/rustc_codegen_gcc/src/back/lto.rs b/compiler/rustc_codegen_gcc/src/back/lto.rs index 401d4c244d5a..9eea59676d86 100644 --- a/compiler/rustc_codegen_gcc/src/back/lto.rs +++ b/compiler/rustc_codegen_gcc/src/back/lto.rs @@ -24,9 +24,10 @@ use gccjit::OutputKind; use object::read::archive::ArchiveFile; use rustc_codegen_ssa::back::lto::SerializedModule; +use rustc_codegen_ssa::back::rmeta_link; use rustc_codegen_ssa::back::write::{CodegenContext, FatLtoInput, SharedEmitter}; use rustc_codegen_ssa::traits::*; -use rustc_codegen_ssa::{CompiledModule, ModuleCodegen, ModuleKind, looks_like_rust_object_file}; +use rustc_codegen_ssa::{CompiledModule, ModuleCodegen, ModuleKind}; use rustc_data_structures::memmap::Mmap; use rustc_data_structures::profiling::SelfProfilerRef; use rustc_errors::{DiagCtxt, DiagCtxtHandle}; @@ -63,6 +64,7 @@ fn prepare_lto(each_linked_rlib_for_lto: &[PathBuf], dcx: DiagCtxtHandle<'_>) -> let archive_data = unsafe { Mmap::map(File::open(path).expect("couldn't open rlib")).expect("couldn't map rlib") }; + let digest = rmeta_link::read_from_data(&archive_data, path).unwrap(); let archive = ArchiveFile::parse(&*archive_data).expect("wanted an rlib"); let obj_files = archive .members() @@ -71,7 +73,7 @@ fn prepare_lto(each_linked_rlib_for_lto: &[PathBuf], dcx: DiagCtxtHandle<'_>) -> .ok() .and_then(|c| std::str::from_utf8(c.name()).ok().map(|name| (name.trim(), c))) }) - .filter(|&(name, _)| looks_like_rust_object_file(name)); + .filter(|&(name, _)| digest.rust_object_files.iter().any(|f| f == name)); for (name, child) in obj_files { info!("adding bitcode from {}", name); let path = tmp_path.path().join(name); diff --git a/compiler/rustc_codegen_llvm/src/back/lto.rs b/compiler/rustc_codegen_llvm/src/back/lto.rs index 09863961c9d6..b88dc949783a 100644 --- a/compiler/rustc_codegen_llvm/src/back/lto.rs +++ b/compiler/rustc_codegen_llvm/src/back/lto.rs @@ -8,11 +8,12 @@ use object::read::archive::ArchiveFile; use object::{Object, ObjectSection}; use rustc_codegen_ssa::back::lto::{SerializedModule, ThinModule, ThinShared}; +use rustc_codegen_ssa::back::rmeta_link; use rustc_codegen_ssa::back::write::{ CodegenContext, FatLtoInput, SharedEmitter, TargetMachineFactoryFn, ThinLtoInput, }; use rustc_codegen_ssa::traits::*; -use rustc_codegen_ssa::{CompiledModule, ModuleCodegen, ModuleKind, looks_like_rust_object_file}; +use rustc_codegen_ssa::{CompiledModule, ModuleCodegen, ModuleKind}; use rustc_data_structures::fx::FxHashMap; use rustc_data_structures::memmap::Mmap; use rustc_data_structures::profiling::SelfProfilerRef; @@ -96,6 +97,7 @@ fn prepare_lto( .expect("couldn't map rlib") }; let archive = ArchiveFile::parse(&*archive_data).expect("wanted an rlib"); + let digest = rmeta_link::read(&archive, &archive_data, &path).unwrap(); let obj_files = archive .members() .filter_map(|child| { @@ -103,7 +105,7 @@ fn prepare_lto( .ok() .and_then(|c| std::str::from_utf8(c.name()).ok().map(|name| (name.trim(), c))) }) - .filter(|&(name, _)| looks_like_rust_object_file(name)); + .filter(|&(name, _)| digest.rust_object_files.iter().any(|f| f == name)); for (name, child) in obj_files { info!("adding bitcode from {}", name); match get_bitcode_slice_from_object_data( diff --git a/compiler/rustc_codegen_ssa/src/back/archive.rs b/compiler/rustc_codegen_ssa/src/back/archive.rs index 3f12e857391b..473a91be21a6 100644 --- a/compiler/rustc_codegen_ssa/src/back/archive.rs +++ b/compiler/rustc_codegen_ssa/src/back/archive.rs @@ -21,6 +21,7 @@ use tracing::trace; use super::metadata::{create_compressed_metadata_file, search_for_section}; +use super::rmeta_link::{self, RmetaLink}; use crate::common; // Public for ArchiveBuilderBuilder::extract_bundled_libs pub use crate::errors::ExtractBundledLibsError; @@ -314,7 +315,7 @@ pub trait ArchiveBuilder { fn add_archive( &mut self, archive: &Path, - skip: Box bool + 'static>, + skip: Box) -> bool + 'static>, ) -> io::Result<()>; fn build(self: Box, output: &Path) -> bool; @@ -402,7 +403,7 @@ impl<'a> ArchiveBuilder for ArArchiveBuilder<'a> { fn add_archive( &mut self, archive_path: &Path, - mut skip: Box bool + 'static>, + mut skip: Box) -> bool + 'static>, ) -> io::Result<()> { let mut archive_path = archive_path.to_path_buf(); if self.sess.target.llvm_target.contains("-apple-macosx") @@ -418,13 +419,14 @@ fn add_archive( let archive_map = unsafe { Mmap::map(File::open(&archive_path)?)? }; let archive = ArchiveFile::parse(&*archive_map) .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err))?; + let digest = rmeta_link::read(&archive, &archive_map, &archive_path); let archive_index = self.src_archives.len(); for entry in archive.members() { let entry = entry.map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err))?; let file_name = String::from_utf8(entry.name().to_vec()) .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err))?; - if !skip(&file_name) { + if !skip(&file_name, digest.as_ref()) { if entry.is_thin() { let member_path = archive_path.parent().unwrap().join(Path::new(&file_name)); self.entries.push((file_name.into_bytes(), ArchiveEntry::File(member_path))); diff --git a/compiler/rustc_codegen_ssa/src/back/link.rs b/compiler/rustc_codegen_ssa/src/back/link.rs index cb22aac4e952..be68be7e2f91 100644 --- a/compiler/rustc_codegen_ssa/src/back/link.rs +++ b/compiler/rustc_codegen_ssa/src/back/link.rs @@ -58,12 +58,9 @@ use super::linker::{self, Linker}; use super::metadata::{MetadataPosition, create_wrapper_file}; use super::rpath::{self, RPathConfig}; -use super::{apple, versioned_llvm_target}; +use super::{apple, rmeta_link, versioned_llvm_target}; use crate::base::needs_allocator_shim_for_linking; -use crate::{ - CodegenLintLevels, CompiledModule, CompiledModules, CrateInfo, NativeLib, errors, - looks_like_rust_object_file, -}; +use crate::{CodegenLintLevels, CompiledModule, CompiledModules, CrateInfo, NativeLib, errors}; pub fn ensure_removed(dcx: DiagCtxtHandle<'_>, path: &Path) { if let Err(e) = fs::remove_file(path) { @@ -329,8 +326,11 @@ fn link_rlib<'a>( RlibFlavor::StaticlibBase => None, }; + let mut rust_object_files: Vec = Vec::new(); + for m in &compiled_modules.modules { if let Some(obj) = m.object.as_ref() { + rust_object_files.push(obj.file_name().unwrap().to_str().unwrap().to_string()); ab.add_file(obj); } @@ -383,7 +383,7 @@ fn link_rlib<'a>( packed_bundled_libs.push(wrapper_file); } else { let path = find_native_static_library(lib.name.as_str(), lib.verbatim, sess); - ab.add_archive(&path, Box::new(|_| false)).unwrap_or_else(|error| { + ab.add_archive(&path, Box::new(|_, _| false)).unwrap_or_else(|error| { sess.dcx().emit_fatal(errors::AddNativeLibrary { library_path: path, error }) }); } @@ -400,7 +400,7 @@ fn link_rlib<'a>( tmpdir.as_ref(), true, ) { - ab.add_archive(&output_path, Box::new(|_| false)).unwrap_or_else(|error| { + ab.add_archive(&output_path, Box::new(|_, _| false)).unwrap_or_else(|error| { sess.dcx() .emit_fatal(errors::AddNativeLibrary { library_path: output_path, error }); }); @@ -442,6 +442,16 @@ fn link_rlib<'a>( ab.add_file(&lib) } + // Add the rlib digest as the very last member. This records which archive + // members are Rust object files, replacing filename-based heuristics. + if matches!(flavor, RlibFlavor::Normal) { + let digest = rmeta_link::RmetaLink { rust_object_files }; + let digest_data = digest.encode(); + let (wrapper, _) = create_wrapper_file(sess, rmeta_link::SECTION.to_string(), &digest_data); + let digest_file = emit_wrapper_file(sess, &wrapper, tmpdir.as_ref(), rmeta_link::FILENAME); + ab.add_file(&digest_file); + } + ab } @@ -488,14 +498,14 @@ fn link_staticlib( let bundled_libs: FxIndexSet<_> = native_libs.filter_map(|lib| lib.filename).collect(); ab.add_archive( path, - Box::new(move |fname: &str| { - // Ignore metadata files, no matter the name. - if fname == METADATA_FILENAME { + Box::new(move |fname: &str, digest| { + // Ignore metadata and rlib digest files. + if fname == METADATA_FILENAME || fname == rmeta_link::FILENAME { return true; } - // Don't include Rust objects if LTO is enabled - if lto && looks_like_rust_object_file(fname) { + // Don't include Rust objects if LTO is enabled. + if lto && digest.is_some_and(|d| d.rust_object_files.iter().any(|f| f == fname)) { return true; } @@ -516,7 +526,7 @@ fn link_staticlib( for filename in relevant_libs.iter() { let joined = tempdir.as_ref().join(filename.as_str()); let path = joined.as_path(); - ab.add_archive(path, Box::new(|_| false)).unwrap(); + ab.add_archive(path, Box::new(|_, _| false)).unwrap(); } all_native_libs.extend(crate_info.native_libraries[&cnum].iter().cloned()); @@ -3146,7 +3156,6 @@ fn add_static_crate( let bundled_lib_file_names = bundled_lib_file_names.clone(); sess.prof.generic_activity_with_arg("link_altering_rlib", name).run(|| { - let canonical_name = name.replace('-', "_"); let upstream_rust_objects_already_included = are_upstream_rust_objects_already_included(sess); let is_builtins = sess.target.no_builtins || !crate_info.is_no_builtins.contains(&cnum); @@ -3154,15 +3163,13 @@ fn add_static_crate( let mut archive = archive_builder_builder.new_archive_builder(sess); if let Err(error) = archive.add_archive( cratepath, - Box::new(move |f| { - if f == METADATA_FILENAME { + Box::new(move |f, digest| { + if f == METADATA_FILENAME || f == rmeta_link::FILENAME { return true; } - let canonical = f.replace('-', "_"); - let is_rust_object = - canonical.starts_with(&canonical_name) && looks_like_rust_object_file(f); + digest.is_some_and(|d| d.rust_object_files.iter().any(|rf| rf == f)); // If we're performing LTO and this is a rust-generated object // file, then we don't need the object file as it's part of the diff --git a/compiler/rustc_codegen_ssa/src/back/mod.rs b/compiler/rustc_codegen_ssa/src/back/mod.rs index 8d1adb999303..17f6faa942c4 100644 --- a/compiler/rustc_codegen_ssa/src/back/mod.rs +++ b/compiler/rustc_codegen_ssa/src/back/mod.rs @@ -9,6 +9,7 @@ pub(crate) mod linker; pub mod lto; pub mod metadata; +pub mod rmeta_link; pub(crate) mod rpath; pub mod symbol_export; pub mod write; diff --git a/compiler/rustc_codegen_ssa/src/back/rmeta_link.rs b/compiler/rustc_codegen_ssa/src/back/rmeta_link.rs new file mode 100644 index 000000000000..071e63d3d3d4 --- /dev/null +++ b/compiler/rustc_codegen_ssa/src/back/rmeta_link.rs @@ -0,0 +1,56 @@ +//! Late-metadata archive member that lists which rlib entries are Rust object files, +//! and potentially other data collected and used when building or linking a rlib. +//! See . + +use std::path::Path; + +use object::read::archive::ArchiveFile; +use rustc_serialize::opaque::mem_encoder::MemEncoder; +use rustc_serialize::opaque::{MAGIC_END_BYTES, MemDecoder}; +use rustc_serialize::{Decodable, Encodable}; + +use super::metadata::search_for_section; + +pub(crate) const FILENAME: &str = "lib.rmeta-link"; +pub(crate) const SECTION: &str = ".rmeta-link"; + +pub struct RmetaLink { + pub rust_object_files: Vec, +} + +impl RmetaLink { + pub(crate) fn encode(&self) -> Vec { + let mut encoder = MemEncoder::new(); + self.rust_object_files.encode(&mut encoder); + let mut data = encoder.finish(); + data.extend_from_slice(MAGIC_END_BYTES); + data + } + + pub(crate) fn decode(data: &[u8]) -> Option { + let mut decoder = MemDecoder::new(data, 0).ok()?; + let rust_object_files = Vec::::decode(&mut decoder); + Some(RmetaLink { rust_object_files }) + } +} + +/// Reads the digest from an already-parsed archive. +pub fn read(archive: &ArchiveFile<'_>, archive_data: &[u8], rlib_path: &Path) -> Option { + for entry in archive.members() { + let entry = entry.ok()?; + if entry.name() == FILENAME.as_bytes() { + let data = entry.data(archive_data).ok()?; + let section_data = search_for_section(rlib_path, data, SECTION).ok()?; + return RmetaLink::decode(section_data); + } + } + None +} + +/// Like [`read`], but parses the archive from raw bytes. +/// +/// Use this when the caller's `ArchiveFile` comes from a different version of the `object` crate. +pub fn read_from_data(archive_data: &[u8], rlib_path: &Path) -> Option { + let archive = ArchiveFile::parse(archive_data).ok()?; + read(&archive, archive_data, rlib_path) +} diff --git a/compiler/rustc_codegen_ssa/src/lib.rs b/compiler/rustc_codegen_ssa/src/lib.rs index 1c266382d027..22290d672c61 100644 --- a/compiler/rustc_codegen_ssa/src/lib.rs +++ b/compiler/rustc_codegen_ssa/src/lib.rs @@ -35,7 +35,7 @@ use rustc_serialize::opaque::{FileEncoder, MemDecoder}; use rustc_serialize::{Decodable, Decoder, Encodable, Encoder}; use rustc_session::Session; -use rustc_session::config::{CrateType, OutputFilenames, OutputType, RUST_CGU_EXT}; +use rustc_session::config::{CrateType, OutputFilenames, OutputType}; use rustc_session::cstore::{self, CrateSource}; use rustc_session::lint::builtin::LINKER_MESSAGES; use rustc_span::Symbol; @@ -272,23 +272,6 @@ pub fn provide(providers: &mut Providers) { providers.queries.global_backend_features = |_tcx: TyCtxt<'_>, ()| vec![]; } -/// Checks if the given filename ends with the `.rcgu.o` extension that `rustc` -/// uses for the object files it generates. -pub fn looks_like_rust_object_file(filename: &str) -> bool { - let path = Path::new(filename); - let ext = path.extension().and_then(|s| s.to_str()); - if ext != Some(OutputType::Object.extension()) { - // The file name does not end with ".o", so it can't be an object file. - return false; - } - - // Strip the ".o" at the end - let ext2 = path.file_stem().and_then(|s| Path::new(s).extension()).and_then(|s| s.to_str()); - - // Check if the "inner" extension - ext2 == Some(RUST_CGU_EXT) -} - const RLINK_VERSION: u32 = 1; const RLINK_MAGIC: &[u8] = b"rustlink";