diff --git a/compiler/rustc_codegen_ssa/src/lib.rs b/compiler/rustc_codegen_ssa/src/lib.rs index 4eb24d71009a..9bf8fde55fce 100644 --- a/compiler/rustc_codegen_ssa/src/lib.rs +++ b/compiler/rustc_codegen_ssa/src/lib.rs @@ -195,6 +195,7 @@ pub enum CodegenErrors { EmptyVersionNumber, EncodingVersionMismatch { version_array: String, rlink_version: u32 }, RustcVersionMismatch { rustc_version: String }, + CorruptFile, } pub fn provide(providers: &mut Providers) { @@ -265,7 +266,9 @@ pub fn deserialize_rlink( }); } - let mut decoder = MemDecoder::new(&data[4..], 0); + let Some(mut decoder) = MemDecoder::new(&data[4..], 0) else { + return Err(CodegenErrors::CorruptFile); + }; let rustc_version = decoder.read_str(); if rustc_version != sess.cfg_version { return Err(CodegenErrors::RustcVersionMismatch { diff --git a/compiler/rustc_driver_impl/messages.ftl b/compiler/rustc_driver_impl/messages.ftl index 5b39248302e7..31837e017643 100644 --- a/compiler/rustc_driver_impl/messages.ftl +++ b/compiler/rustc_driver_impl/messages.ftl @@ -10,6 +10,8 @@ driver_impl_ice_path_error = the ICE couldn't be written to `{$path}`: {$error} driver_impl_ice_path_error_env = the environment variable `RUSTC_ICE` is set to `{$env_var}` driver_impl_ice_version = rustc {$version} running on {$triple} +driver_impl_rlink_corrupt_file = corrupt metadata encountered in `{$file}` + driver_impl_rlink_empty_version_number = The input does not contain version number driver_impl_rlink_encoding_version_mismatch = .rlink file was produced with encoding version `{$version_array}`, but the current version is `{$rlink_version}` diff --git a/compiler/rustc_driver_impl/src/lib.rs b/compiler/rustc_driver_impl/src/lib.rs index ba6b9ef07846..30cc41ecea04 100644 --- a/compiler/rustc_driver_impl/src/lib.rs +++ b/compiler/rustc_driver_impl/src/lib.rs @@ -96,7 +96,7 @@ pub(super) fn install() {} use crate::session_diagnostics::{ RLinkEmptyVersionNumber, RLinkEncodingVersionMismatch, RLinkRustcVersionMismatch, - RLinkWrongFileType, RlinkNotAFile, RlinkUnableToRead, + RLinkWrongFileType, RlinkCorruptFile, RlinkNotAFile, RlinkUnableToRead, }; rustc_fluent_macro::fluent_messages! { "../messages.ftl" } @@ -645,8 +645,7 @@ fn process_rlink(sess: &Session, compiler: &interface::Compiler) { match err { CodegenErrors::WrongFileType => dcx.emit_fatal(RLinkWrongFileType), CodegenErrors::EmptyVersionNumber => dcx.emit_fatal(RLinkEmptyVersionNumber), - CodegenErrors::EncodingVersionMismatch { version_array, rlink_version } => sess - .dcx() + CodegenErrors::EncodingVersionMismatch { version_array, rlink_version } => dcx .emit_fatal(RLinkEncodingVersionMismatch { version_array, rlink_version }), CodegenErrors::RustcVersionMismatch { rustc_version } => { dcx.emit_fatal(RLinkRustcVersionMismatch { @@ -654,6 +653,9 @@ fn process_rlink(sess: &Session, compiler: &interface::Compiler) { current_version: sess.cfg_version, }) } + CodegenErrors::CorruptFile => { + dcx.emit_fatal(RlinkCorruptFile { file: file.display().to_string() }); + } }; } }; diff --git a/compiler/rustc_driver_impl/src/session_diagnostics.rs b/compiler/rustc_driver_impl/src/session_diagnostics.rs index 1a9683e840af..864b122233a4 100644 --- a/compiler/rustc_driver_impl/src/session_diagnostics.rs +++ b/compiler/rustc_driver_impl/src/session_diagnostics.rs @@ -32,6 +32,12 @@ pub(crate) struct RLinkRustcVersionMismatch<'a> { #[diag(driver_impl_rlink_no_a_file)] pub(crate) struct RlinkNotAFile; +#[derive(Diagnostic)] +#[diag(driver_impl_rlink_corrupt_file)] +pub(crate) struct RlinkCorruptFile { + pub file: String, +} + #[derive(Diagnostic)] #[diag(driver_impl_ice)] pub(crate) struct Ice; diff --git a/compiler/rustc_incremental/messages.ftl b/compiler/rustc_incremental/messages.ftl index e74173b24a97..de2177ebb6e0 100644 --- a/compiler/rustc_incremental/messages.ftl +++ b/compiler/rustc_incremental/messages.ftl @@ -21,6 +21,8 @@ incremental_cargo_help_2 = incremental_copy_workproduct_to_cache = error copying object file `{$from}` to incremental directory as `{$to}`: {$err} +incremental_corrupt_file = corrupt incremental compilation artifact found at `{$path}`. This file will automatically be ignored and deleted. If you see this message repeatedly or can provoke it without manually manipulating the compiler's artifacts, please file an issue. The incremental compilation system relies on hardlinks and filesystem locks behaving correctly, and may not deal well with OS crashes, so whatever information you can provide about your filesystem or other state may be very relevant. + incremental_create_dep_graph = failed to create dependency graph at `{$path}`: {$err} incremental_create_incr_comp_dir = diff --git a/compiler/rustc_incremental/src/errors.rs b/compiler/rustc_incremental/src/errors.rs index 61bb0353a9f4..e94a7fb876bb 100644 --- a/compiler/rustc_incremental/src/errors.rs +++ b/compiler/rustc_incremental/src/errors.rs @@ -306,3 +306,9 @@ pub struct DeleteWorkProduct<'a> { pub path: &'a Path, pub err: std::io::Error, } + +#[derive(Diagnostic)] +#[diag(incremental_corrupt_file)] +pub struct CorruptFile<'a> { + pub path: &'a Path, +} diff --git a/compiler/rustc_incremental/src/persist/load.rs b/compiler/rustc_incremental/src/persist/load.rs index 26aaa24771fe..c96d22ce45d7 100644 --- a/compiler/rustc_incremental/src/persist/load.rs +++ b/compiler/rustc_incremental/src/persist/load.rs @@ -115,7 +115,12 @@ fn load_dep_graph(sess: &Session) -> LoadResult<(Arc, WorkPr if let LoadResult::Ok { data: (work_products_data, start_pos) } = load_result { // Decode the list of work_products - let mut work_product_decoder = MemDecoder::new(&work_products_data[..], start_pos); + let Some(mut work_product_decoder) = + MemDecoder::new(&work_products_data[..], start_pos) + else { + sess.dcx().emit_warn(errors::CorruptFile { path: &work_products_path }); + return LoadResult::DataOutOfDate; + }; let work_products: Vec = Decodable::decode(&mut work_product_decoder); @@ -145,7 +150,10 @@ fn load_dep_graph(sess: &Session) -> LoadResult<(Arc, WorkPr LoadResult::DataOutOfDate => LoadResult::DataOutOfDate, LoadResult::LoadDepGraph(path, err) => LoadResult::LoadDepGraph(path, err), LoadResult::Ok { data: (bytes, start_pos) } => { - let mut decoder = MemDecoder::new(&bytes, start_pos); + let Some(mut decoder) = MemDecoder::new(&bytes, start_pos) else { + sess.dcx().emit_warn(errors::CorruptFile { path: &path }); + return LoadResult::DataOutOfDate; + }; let prev_commandline_args_hash = u64::decode(&mut decoder); if prev_commandline_args_hash != expected_hash { @@ -181,9 +189,14 @@ pub fn load_query_result_cache(sess: &Session) -> Option> { let _prof_timer = sess.prof.generic_activity("incr_comp_load_query_result_cache"); - match load_data(&query_cache_path(sess), sess) { + let path = query_cache_path(sess); + match load_data(&path, sess) { LoadResult::Ok { data: (bytes, start_pos) } => { - Some(OnDiskCache::new(sess, bytes, start_pos)) + let cache = OnDiskCache::new(sess, bytes, start_pos).unwrap_or_else(|| { + sess.dcx().emit_warn(errors::CorruptFile { path: &path }); + OnDiskCache::new_empty(sess.source_map()) + }); + Some(cache) } _ => Some(OnDiskCache::new_empty(sess.source_map())), } diff --git a/compiler/rustc_metadata/src/locator.rs b/compiler/rustc_metadata/src/locator.rs index 7de03be6da62..fa33eccd2a1c 100644 --- a/compiler/rustc_metadata/src/locator.rs +++ b/compiler/rustc_metadata/src/locator.rs @@ -853,7 +853,12 @@ fn get_metadata_section<'p>( slice_owned(mmap, Deref::deref) } }; - let blob = MetadataBlob(raw_bytes); + let Some(blob) = MetadataBlob::new(raw_bytes) else { + return Err(MetadataError::LoadFailure(format!( + "corrupt metadata encountered in {}", + filename.display() + ))); + }; match blob.check_compatibility(cfg_version) { Ok(()) => Ok(blob), Err(None) => Err(MetadataError::LoadFailure(format!( diff --git a/compiler/rustc_metadata/src/rmeta/decoder.rs b/compiler/rustc_metadata/src/rmeta/decoder.rs index bb68c6eaf092..1058f58da131 100644 --- a/compiler/rustc_metadata/src/rmeta/decoder.rs +++ b/compiler/rustc_metadata/src/rmeta/decoder.rs @@ -40,10 +40,9 @@ mod cstore_impl; /// A reference to the raw binary version of crate metadata. -/// A `MetadataBlob` internally is just a reference counted pointer to -/// the actual data, so cloning it is cheap. -#[derive(Clone)] -pub(crate) struct MetadataBlob(pub(crate) OwnedSlice); +/// This struct applies [`MemDecoder`]'s validation when constructed +/// so that later constructions are guaranteed to succeed. +pub(crate) struct MetadataBlob(OwnedSlice); impl std::ops::Deref for MetadataBlob { type Target = [u8]; @@ -54,6 +53,16 @@ fn deref(&self) -> &[u8] { } } +impl MetadataBlob { + pub fn new(slice: OwnedSlice) -> Option { + if MemDecoder::new(&*slice, 0).is_some() { Some(Self(slice)) } else { None } + } + + pub fn bytes(&self) -> &OwnedSlice { + &self.0 + } +} + /// A map from external crate numbers (as decoded from some crate file) to /// local crate numbers (as generated during this session). Each external /// crate may refer to types in other external crates, and each has their @@ -165,7 +174,14 @@ fn tcx(self) -> Option> { fn decoder(self, pos: usize) -> DecodeContext<'a, 'tcx> { let tcx = self.tcx(); DecodeContext { - opaque: MemDecoder::new(self.blob(), pos), + // FIXME: This unwrap should never panic because we check that it won't when creating + // `MetadataBlob`. Ideally we'd just have a `MetadataDecoder` and hand out subslices of + // it as we do elsewhere in the compiler using `MetadataDecoder::split_at`. But we own + // the data for the decoder so holding onto the `MemDecoder` too would make us a + // self-referential struct which is downright goofy because `MetadataBlob` is already + // self-referential. Probably `MemDecoder` should contain an `OwnedSlice`, but that + // demands a significant refactoring due to our crate graph. + opaque: MemDecoder::new(self.blob(), pos).unwrap(), cdata: self.cdata(), blob: self.blob(), sess: self.sess().or(tcx.map(|tcx| tcx.sess)), @@ -393,7 +409,7 @@ fn with_position(&mut self, pos: usize, f: F) -> R where F: FnOnce(&mut Self) -> R, { - let new_opaque = MemDecoder::new(self.opaque.data(), pos); + let new_opaque = self.opaque.split_at(pos); let old_opaque = mem::replace(&mut self.opaque, new_opaque); let old_state = mem::replace(&mut self.lazy_state, LazyState::NoNode); let r = f(self); diff --git a/compiler/rustc_metadata/src/rmeta/def_path_hash_map.rs b/compiler/rustc_metadata/src/rmeta/def_path_hash_map.rs index 9950bc1c31f7..861bf6b2769e 100644 --- a/compiler/rustc_metadata/src/rmeta/def_path_hash_map.rs +++ b/compiler/rustc_metadata/src/rmeta/def_path_hash_map.rs @@ -48,7 +48,7 @@ impl<'a, 'tcx> Decodable> for DefPathHashMapRef<'static> fn decode(d: &mut DecodeContext<'a, 'tcx>) -> DefPathHashMapRef<'static> { let len = d.read_usize(); let pos = d.position(); - let o = d.blob().clone().0.slice(|blob| &blob[pos..pos + len]); + let o = d.blob().bytes().clone().slice(|blob| &blob[pos..pos + len]); // Although we already have the data we need via the `OwnedSlice`, we still need // to advance the `DecodeContext`'s position so it's in a valid state after diff --git a/compiler/rustc_middle/src/query/on_disk_cache.rs b/compiler/rustc_middle/src/query/on_disk_cache.rs index 2dcb58729ff9..6815e4263d56 100644 --- a/compiler/rustc_middle/src/query/on_disk_cache.rs +++ b/compiler/rustc_middle/src/query/on_disk_cache.rs @@ -154,24 +154,22 @@ fn new(tcx: TyCtxt<'_>, file: &SourceFile) -> EncodedSourceFileId { impl<'sess> OnDiskCache<'sess> { /// Creates a new `OnDiskCache` instance from the serialized data in `data`. - pub fn new(sess: &'sess Session, data: Mmap, start_pos: usize) -> Self { - debug_assert!(sess.opts.incremental.is_some()); + pub fn new(sess: &'sess Session, data: Mmap, start_pos: usize) -> Option { + assert!(sess.opts.incremental.is_some()); - // Wrap in a scope so we can borrow `data`. - let footer: Footer = { - let mut decoder = MemDecoder::new(&data, start_pos); + let mut decoder = MemDecoder::new(&data, start_pos)?; - // Decode the *position* of the footer, which can be found in the - // last 8 bytes of the file. - let footer_pos = decoder - .with_position(decoder.len() - IntEncodedWithFixedSize::ENCODED_SIZE, |decoder| { - IntEncodedWithFixedSize::decode(decoder).0 as usize - }); - // Decode the file footer, which contains all the lookup tables, etc. - decoder.with_position(footer_pos, |decoder| decode_tagged(decoder, TAG_FILE_FOOTER)) - }; + // Decode the *position* of the footer, which can be found in the + // last 8 bytes of the file. + let footer_pos = decoder + .with_position(decoder.len() - IntEncodedWithFixedSize::ENCODED_SIZE, |decoder| { + IntEncodedWithFixedSize::decode(decoder).0 as usize + }); + // Decode the file footer, which contains all the lookup tables, etc. + let footer: Footer = + decoder.with_position(footer_pos, |decoder| decode_tagged(decoder, TAG_FILE_FOOTER)); - Self { + Some(Self { serialized_data: RwLock::new(Some(data)), file_index_to_stable_id: footer.file_index_to_stable_id, file_index_to_file: Default::default(), @@ -184,7 +182,7 @@ pub fn new(sess: &'sess Session, data: Mmap, start_pos: usize) -> Self { expn_data: footer.expn_data, foreign_expn_data: footer.foreign_expn_data, hygiene_context: Default::default(), - } + }) } pub fn new_empty(source_map: &'sess SourceMap) -> Self { @@ -437,7 +435,8 @@ fn with_decoder<'a, 'tcx, T, F: for<'s> FnOnce(&mut CacheDecoder<'s, 'tcx>) -> T let serialized_data = self.serialized_data.read(); let mut decoder = CacheDecoder { tcx, - opaque: MemDecoder::new(serialized_data.as_deref().unwrap_or(&[]), pos.to_usize()), + opaque: MemDecoder::new(serialized_data.as_deref().unwrap_or(&[]), pos.to_usize()) + .unwrap(), source_map: self.source_map, file_index_to_file: &self.file_index_to_file, file_index_to_stable_id: &self.file_index_to_stable_id, @@ -558,7 +557,7 @@ fn with_position(&mut self, pos: usize, f: F) -> R { debug_assert!(pos < self.opaque.len()); - let new_opaque = MemDecoder::new(self.opaque.data(), pos); + let new_opaque = self.opaque.split_at(pos); let old_opaque = mem::replace(&mut self.opaque, new_opaque); let r = f(self); self.opaque = old_opaque; diff --git a/compiler/rustc_query_system/src/dep_graph/serialized.rs b/compiler/rustc_query_system/src/dep_graph/serialized.rs index b426bb888f46..8e91d9dd60b3 100644 --- a/compiler/rustc_query_system/src/dep_graph/serialized.rs +++ b/compiler/rustc_query_system/src/dep_graph/serialized.rs @@ -182,15 +182,13 @@ impl SerializedDepGraph { pub fn decode(d: &mut MemDecoder<'_>) -> Arc { // The last 16 bytes are the node count and edge count. debug!("position: {:?}", d.position()); - let (node_count, edge_count, graph_size) = - d.with_position(d.len() - 3 * IntEncodedWithFixedSize::ENCODED_SIZE, |d| { + let (node_count, edge_count) = + d.with_position(d.len() - 2 * IntEncodedWithFixedSize::ENCODED_SIZE, |d| { debug!("position: {:?}", d.position()); let node_count = IntEncodedWithFixedSize::decode(d).0 as usize; let edge_count = IntEncodedWithFixedSize::decode(d).0 as usize; - let graph_size = IntEncodedWithFixedSize::decode(d).0 as usize; - (node_count, edge_count, graph_size) + (node_count, edge_count) }); - assert_eq!(d.len(), graph_size); debug!("position: {:?}", d.position()); debug!(?node_count, ?edge_count); @@ -606,8 +604,6 @@ fn finish(self, profiler: &SelfProfilerRef) -> FileEncodeResult { debug!("position: {:?}", encoder.position()); IntEncodedWithFixedSize(node_count).encode(&mut encoder); IntEncodedWithFixedSize(edge_count).encode(&mut encoder); - let graph_size = encoder.position() + IntEncodedWithFixedSize::ENCODED_SIZE; - IntEncodedWithFixedSize(graph_size as u64).encode(&mut encoder); debug!("position: {:?}", encoder.position()); // Drop the encoder so that nothing is written after the counts. let result = encoder.finish(); diff --git a/compiler/rustc_serialize/src/opaque.rs b/compiler/rustc_serialize/src/opaque.rs index eec83c02d353..1b4b1d2436f4 100644 --- a/compiler/rustc_serialize/src/opaque.rs +++ b/compiler/rustc_serialize/src/opaque.rs @@ -17,6 +17,8 @@ pub type FileEncodeResult = Result; +const FOOTER: &[u8] = b"rust-end-file"; + /// The size of the buffer in `FileEncoder`. const BUF_SIZE: usize = 8192; @@ -181,6 +183,7 @@ fn panic_invalid_write(written: usize) { } pub fn finish(&mut self) -> FileEncodeResult { + self.write_all(FOOTER); self.flush(); #[cfg(debug_assertions)] { @@ -261,15 +264,18 @@ pub struct MemDecoder<'a> { impl<'a> MemDecoder<'a> { #[inline] - pub fn new(data: &'a [u8], position: usize) -> MemDecoder<'a> { + pub fn new(data: &'a [u8], position: usize) -> Option> { + let data = data.strip_suffix(FOOTER)?; let Range { start, end } = data.as_ptr_range(); - MemDecoder { start, current: data[position..].as_ptr(), end, _marker: PhantomData } + Some(MemDecoder { start, current: data[position..].as_ptr(), end, _marker: PhantomData }) } #[inline] - pub fn data(&self) -> &'a [u8] { - // SAFETY: This recovers the original slice, only using members we never modify. - unsafe { std::slice::from_raw_parts(self.start, self.len()) } + pub fn split_at(&self, position: usize) -> MemDecoder<'a> { + assert!(position <= self.len()); + // SAFETY: We checked above that this offset is within the original slice + let current = unsafe { self.start.add(position) }; + MemDecoder { start: self.start, current, end: self.end, _marker: PhantomData } } #[inline] diff --git a/compiler/rustc_serialize/tests/leb128.rs b/compiler/rustc_serialize/tests/leb128.rs index dc9b32a968b5..2bf4f38d3c34 100644 --- a/compiler/rustc_serialize/tests/leb128.rs +++ b/compiler/rustc_serialize/tests/leb128.rs @@ -1,4 +1,5 @@ use rustc_serialize::leb128::*; +use rustc_serialize::opaque::MemDecoder; use rustc_serialize::Decoder; macro_rules! impl_test_unsigned_leb128 { @@ -25,13 +26,15 @@ fn $test_name() { let n = $write_fn_name(&mut buf, x); stream.extend(&buf[..n]); } + let stream_end = stream.len(); + stream.extend(b"rust-end-file"); - let mut decoder = rustc_serialize::opaque::MemDecoder::new(&stream, 0); + let mut decoder = MemDecoder::new(&stream, 0).unwrap(); for &expected in &values { let actual = $read_fn_name(&mut decoder); assert_eq!(expected, actual); } - assert_eq!(stream.len(), decoder.position()); + assert_eq!(stream_end, decoder.position()); } }; } @@ -72,13 +75,15 @@ fn $test_name() { let n = $write_fn_name(&mut buf, x); stream.extend(&buf[..n]); } + let stream_end = stream.len(); + stream.extend(b"rust-end-file"); - let mut decoder = rustc_serialize::opaque::MemDecoder::new(&stream, 0); + let mut decoder = MemDecoder::new(&stream, 0).unwrap(); for &expected in &values { let actual = $read_fn_name(&mut decoder); assert_eq!(expected, actual); } - assert_eq!(stream.len(), decoder.position()); + assert_eq!(stream_end, decoder.position()); } }; } diff --git a/compiler/rustc_serialize/tests/opaque.rs b/compiler/rustc_serialize/tests/opaque.rs index 45ff85f38d2b..833151d82be3 100644 --- a/compiler/rustc_serialize/tests/opaque.rs +++ b/compiler/rustc_serialize/tests/opaque.rs @@ -42,7 +42,7 @@ fn check_round_trip< encoder.finish().unwrap(); let data = fs::read(&tmpfile).unwrap(); - let mut decoder = MemDecoder::new(&data[..], 0); + let mut decoder = MemDecoder::new(&data[..], 0).unwrap(); for value in values { let decoded = Decodable::decode(&mut decoder); assert_eq!(value, decoded); diff --git a/src/librustdoc/scrape_examples.rs b/src/librustdoc/scrape_examples.rs index 9c9b386edda7..0bf98886965c 100644 --- a/src/librustdoc/scrape_examples.rs +++ b/src/librustdoc/scrape_examples.rs @@ -344,7 +344,9 @@ pub(crate) fn load_call_locations( Ok(bytes) => bytes, Err(e) => dcx.fatal(format!("failed to load examples: {e}")), }; - let mut decoder = MemDecoder::new(&bytes, 0); + let Some(mut decoder) = MemDecoder::new(&bytes, 0) else { + dcx.fatal(format!("Corrupt metadata encountered in {path}")) + }; let calls = AllCallLocations::decode(&mut decoder); for (function, fn_calls) in calls.into_iter() {