diff --git a/compiler/rustc_middle/src/query/on_disk_cache.rs b/compiler/rustc_middle/src/query/on_disk_cache.rs index b6fd30647f85..4dbceba92403 100644 --- a/compiler/rustc_middle/src/query/on_disk_cache.rs +++ b/compiler/rustc_middle/src/query/on_disk_cache.rs @@ -231,7 +231,7 @@ pub fn serialize(tcx: TyCtxt<'_>, encoder: FileEncoder) -> FileEncodeResult { type_shorthands: Default::default(), predicate_shorthands: Default::default(), interpret_allocs: Default::default(), - source_map: CachingSourceMapView::new(tcx.sess.source_map()), + caching_source_map_view: CachingSourceMapView::new(tcx.sess.source_map()), file_to_file_index, hygiene_context: &hygiene_encode_context, symbol_index_table: Default::default(), @@ -783,7 +783,7 @@ pub struct CacheEncoder<'a, 'tcx> { type_shorthands: FxHashMap, usize>, predicate_shorthands: FxHashMap, usize>, interpret_allocs: FxIndexSet, - source_map: CachingSourceMapView<'tcx>, + caching_source_map_view: CachingSourceMapView<'tcx>, file_to_file_index: FxHashMap<*const SourceFile, SourceFileIndex>, hygiene_context: &'a HygieneEncodeContext, // Used for both `Symbol`s and `ByteSymbol`s. @@ -900,7 +900,7 @@ fn encode_span(&mut self, span: Span) { } let Some((file_lo, line_lo, col_lo)) = - self.source_map.byte_pos_to_line_and_col(span_data.lo) + self.caching_source_map_view.byte_pos_to_line_and_col(span_data.lo) else { return TAG_PARTIAL_SPAN.encode(self); }; diff --git a/compiler/rustc_span/src/caching_source_map_view.rs b/compiler/rustc_span/src/caching_source_map_view.rs index 11507a3d9e45..d9aa73cefc57 100644 --- a/compiler/rustc_span/src/caching_source_map_view.rs +++ b/compiler/rustc_span/src/caching_source_map_view.rs @@ -4,10 +4,15 @@ use crate::source_map::SourceMap; use crate::{BytePos, Pos, RelativeBytePos, SourceFile, SpanData}; +/// A `SourceMap` wrapper that caches info about a single recent code position. This gives a good +/// speedup when hashing spans, because often multiple spans within a single line are hashed in +/// succession, and this avoids expensive `SourceMap` lookups each time the cache is hit. We used +/// to cache multiple code positions, but caching a single position ended up being simpler and +/// faster. #[derive(Clone)] -struct CacheEntry { - time_stamp: usize, - line_number: usize, +pub struct CachingSourceMapView<'sm> { + source_map: &'sm SourceMap, + file: Arc, // The line's byte position range in the `SourceMap`. This range will fail to contain a valid // position in certain edge cases. Spans often start/end one past something, and when that // something is the last character of a file (this can happen when a file doesn't end in a @@ -20,261 +25,134 @@ struct CacheEntry { // entry contains a position, the only ramification of the above is that we will get cache // misses for these rare positions. A line lookup for the position via `SourceMap::lookup_line` // after a cache miss will produce the last line number, as desired. - line: Range, - file: Arc, - file_index: usize, -} - -impl CacheEntry { - #[inline] - fn update( - &mut self, - new_file_and_idx: Option<(Arc, usize)>, - pos: BytePos, - time_stamp: usize, - ) { - if let Some((file, file_idx)) = new_file_and_idx { - self.file = file; - self.file_index = file_idx; - } - - let pos = self.file.relative_position(pos); - let line_index = self.file.lookup_line(pos).unwrap(); - let line_bounds = self.file.line_bounds(line_index); - self.line_number = line_index + 1; - self.line = line_bounds; - self.touch(time_stamp); - } - - #[inline] - fn touch(&mut self, time_stamp: usize) { - self.time_stamp = time_stamp; - } -} - -#[derive(Clone)] -pub struct CachingSourceMapView<'sm> { - source_map: &'sm SourceMap, - line_cache: [CacheEntry; 3], - time_stamp: usize, + line_bounds: Range, + line_number: usize, } impl<'sm> CachingSourceMapView<'sm> { pub fn new(source_map: &'sm SourceMap) -> CachingSourceMapView<'sm> { let files = source_map.files(); let first_file = Arc::clone(&files[0]); - let entry = CacheEntry { - time_stamp: 0, - line_number: 0, - line: BytePos(0)..BytePos(0), - file: first_file, - file_index: 0, - }; - CachingSourceMapView { source_map, - line_cache: [entry.clone(), entry.clone(), entry], - time_stamp: 0, + file: first_file, + line_bounds: BytePos(0)..BytePos(0), + line_number: 0, } } + #[inline] + fn pos_to_line(&self, pos: BytePos) -> (Range, usize) { + let pos = self.file.relative_position(pos); + let line_index = self.file.lookup_line(pos).unwrap(); + let line_bounds = self.file.line_bounds(line_index); + let line_number = line_index + 1; + (line_bounds, line_number) + } + + #[inline] + fn update(&mut self, new_file: Option>, pos: BytePos) { + if let Some(file) = new_file { + self.file = file; + } + (self.line_bounds, self.line_number) = self.pos_to_line(pos); + } + pub fn byte_pos_to_line_and_col( &mut self, pos: BytePos, ) -> Option<(Arc, usize, RelativeBytePos)> { - self.time_stamp += 1; - - // Check if the position is in one of the cached lines - let cache_idx = self.cache_entry_index(pos); - if cache_idx != -1 { - let cache_entry = &mut self.line_cache[cache_idx as usize]; - cache_entry.touch(self.time_stamp); - - let col = RelativeBytePos(pos.to_u32() - cache_entry.line.start.to_u32()); - return Some((Arc::clone(&cache_entry.file), cache_entry.line_number, col)); - } - - // No cache hit ... - let oldest = self.oldest_cache_entry_index(); - - // If the entry doesn't point to the correct file, get the new file and index. - let new_file_and_idx = if !file_contains(&self.line_cache[oldest].file, pos) { - Some(self.file_for_position(pos)?) + if self.line_bounds.contains(&pos) { + // Cache hit: do nothing. } else { - None + // Cache miss. If the entry doesn't point to the correct file, get the new file and + // index. + let new_file = if !file_contains(&self.file, pos) { + Some(self.file_for_position(pos)?) + } else { + None + }; + self.update(new_file, pos); }; - let cache_entry = &mut self.line_cache[oldest]; - cache_entry.update(new_file_and_idx, pos, self.time_stamp); - - let col = RelativeBytePos(pos.to_u32() - cache_entry.line.start.to_u32()); - Some((Arc::clone(&cache_entry.file), cache_entry.line_number, col)) + let col = RelativeBytePos(pos.to_u32() - self.line_bounds.start.to_u32()); + Some((Arc::clone(&self.file), self.line_number, col)) } pub fn span_data_to_lines_and_cols( &mut self, span_data: &SpanData, ) -> Option<(&SourceFile, usize, BytePos, usize, BytePos)> { - self.time_stamp += 1; - - // Check if lo and hi are in the cached lines. - let lo_cache_idx: isize = self.cache_entry_index(span_data.lo); - let hi_cache_idx = self.cache_entry_index(span_data.hi); - - if lo_cache_idx != -1 && hi_cache_idx != -1 { - // Cache hit for span lo and hi. Check if they belong to the same file. - let lo_file_index = self.line_cache[lo_cache_idx as usize].file_index; - let hi_file_index = self.line_cache[hi_cache_idx as usize].file_index; - - if lo_file_index != hi_file_index { - return None; - } - - self.line_cache[lo_cache_idx as usize].touch(self.time_stamp); - self.line_cache[hi_cache_idx as usize].touch(self.time_stamp); - - let lo = &self.line_cache[lo_cache_idx as usize]; - let hi = &self.line_cache[hi_cache_idx as usize]; + let lo_hit = self.line_bounds.contains(&span_data.lo); + let hi_hit = self.line_bounds.contains(&span_data.hi); + if lo_hit && hi_hit { + // span_data.lo and span_data.hi are cached (i.e. both in the same line). return Some(( - &lo.file, - lo.line_number, - span_data.lo - lo.line.start, - hi.line_number, - span_data.hi - hi.line.start, + &self.file, + self.line_number, + span_data.lo - self.line_bounds.start, + self.line_number, + span_data.hi - self.line_bounds.start, )); } - // No cache hit or cache hit for only one of span lo and hi. - let oldest = if lo_cache_idx != -1 || hi_cache_idx != -1 { - let avoid_idx = if lo_cache_idx != -1 { lo_cache_idx } else { hi_cache_idx }; - self.oldest_cache_entry_index_avoid(avoid_idx as usize) - } else { - self.oldest_cache_entry_index() - }; - - // If the entry doesn't point to the correct file, get the new file and index. - // Return early if the file containing beginning of span doesn't contain end of span. - let new_file_and_idx = if !file_contains(&self.line_cache[oldest].file, span_data.lo) { - let new_file_and_idx = self.file_for_position(span_data.lo)?; - if !file_contains(&new_file_and_idx.0, span_data.hi) { + // If the cached file is wrong, update it. Return early if the span lo and hi are in + // different files. + let new_file = if !file_contains(&self.file, span_data.lo) { + let new_file = self.file_for_position(span_data.lo)?; + if !file_contains(&new_file, span_data.hi) { return None; } - - Some(new_file_and_idx) + Some(new_file) } else { - let file = &self.line_cache[oldest].file; - if !file_contains(file, span_data.hi) { + if !file_contains(&self.file, span_data.hi) { return None; } - None }; - // Update the cache entries. - let (lo_idx, hi_idx) = match (lo_cache_idx, hi_cache_idx) { - // Oldest cache entry is for span_data.lo line. - (-1, -1) => { - let lo = &mut self.line_cache[oldest]; - lo.update(new_file_and_idx, span_data.lo, self.time_stamp); + // If we reach here, lo and hi are in the same file. - if !lo.line.contains(&span_data.hi) { - let new_file_and_idx = Some((Arc::clone(&lo.file), lo.file_index)); - let next_oldest = self.oldest_cache_entry_index_avoid(oldest); - let hi = &mut self.line_cache[next_oldest]; - hi.update(new_file_and_idx, span_data.hi, self.time_stamp); - (oldest, next_oldest) - } else { - (oldest, oldest) - } - } - // Oldest cache entry is for span_data.lo line. - (-1, _) => { - let lo = &mut self.line_cache[oldest]; - lo.update(new_file_and_idx, span_data.lo, self.time_stamp); - let hi = &mut self.line_cache[hi_cache_idx as usize]; - hi.touch(self.time_stamp); - (oldest, hi_cache_idx as usize) - } - // Oldest cache entry is for span_data.hi line. - (_, -1) => { - let hi = &mut self.line_cache[oldest]; - hi.update(new_file_and_idx, span_data.hi, self.time_stamp); - let lo = &mut self.line_cache[lo_cache_idx as usize]; - lo.touch(self.time_stamp); - (lo_cache_idx as usize, oldest) - } - _ => { - panic!( - "the case of neither value being equal to -1 was handled above and the function returns." - ); - } + if !lo_hit { + // We cache the lo information. + self.update(new_file, span_data.lo); + } + let lo_line_bounds = &self.line_bounds; + let lo_line_number = self.line_number.clone(); + + let (hi_line_bounds, hi_line_number) = if !self.line_bounds.contains(&span_data.hi) { + // hi and lo are in different lines. We compute but don't cache the hi information. + self.pos_to_line(span_data.hi) + } else { + // hi and lo are in the same line. + (self.line_bounds.clone(), self.line_number) }; - let lo = &self.line_cache[lo_idx]; - let hi = &self.line_cache[hi_idx]; - // Span lo and hi may equal line end when last line doesn't // end in newline, hence the inclusive upper bounds below. - assert!(span_data.lo >= lo.line.start); - assert!(span_data.lo <= lo.line.end); - assert!(span_data.hi >= hi.line.start); - assert!(span_data.hi <= hi.line.end); - assert!(lo.file.contains(span_data.lo)); - assert!(lo.file.contains(span_data.hi)); - assert_eq!(lo.file_index, hi.file_index); + assert!(span_data.lo >= lo_line_bounds.start); + assert!(span_data.lo <= lo_line_bounds.end); + assert!(span_data.hi >= hi_line_bounds.start); + assert!(span_data.hi <= hi_line_bounds.end); + assert!(self.file.contains(span_data.lo)); + assert!(self.file.contains(span_data.hi)); Some(( - &lo.file, - lo.line_number, - span_data.lo - lo.line.start, - hi.line_number, - span_data.hi - hi.line.start, + &self.file, + lo_line_number, + span_data.lo - lo_line_bounds.start, + hi_line_number, + span_data.hi - hi_line_bounds.start, )) } - fn cache_entry_index(&self, pos: BytePos) -> isize { - for (idx, cache_entry) in self.line_cache.iter().enumerate() { - if cache_entry.line.contains(&pos) { - return idx as isize; - } - } - - -1 - } - - fn oldest_cache_entry_index(&self) -> usize { - let mut oldest = 0; - - for idx in 1..self.line_cache.len() { - if self.line_cache[idx].time_stamp < self.line_cache[oldest].time_stamp { - oldest = idx; - } - } - - oldest - } - - fn oldest_cache_entry_index_avoid(&self, avoid_idx: usize) -> usize { - let mut oldest = if avoid_idx != 0 { 0 } else { 1 }; - - for idx in 0..self.line_cache.len() { - if idx != avoid_idx - && self.line_cache[idx].time_stamp < self.line_cache[oldest].time_stamp - { - oldest = idx; - } - } - - oldest - } - - fn file_for_position(&self, pos: BytePos) -> Option<(Arc, usize)> { + fn file_for_position(&self, pos: BytePos) -> Option> { if !self.source_map.files().is_empty() { let file_idx = self.source_map.lookup_source_file_idx(pos); let file = &self.source_map.files()[file_idx]; if file_contains(file, pos) { - return Some((Arc::clone(file), file_idx)); + return Some(Arc::clone(file)); } }