rust/compiler/rustc_data_structures/src/vec_cache.rs

//! VecCache maintains a mapping from K -> (V, I) pairing. K and I must be roughly u32-sized, and V
//! must be Copy.
//!
//! VecCache supports efficient concurrent put/get across the key space, with write-once semantics
//! (i.e., a given key can only be put once). Subsequent puts will panic.
//!
//! This is currently used for query caching.

use std::fmt::{self, Debug};
use std::marker::PhantomData;
use std::ops::{Index, IndexMut};
use std::sync::atomic::{AtomicPtr, AtomicU32, AtomicUsize, Ordering};

use rustc_index::Idx;

#[cfg(test)]
mod tests;

struct Slot<V> {
    // We never construct &Slot<V> so it's fine for this to not be in an UnsafeCell.
    value: V,
    // This is both an index and a once-lock.
    //
    // 0: not yet initialized.
    // 1: lock held, initializing.
    // 2..u32::MAX - 2: initialized.
    index_and_lock: AtomicU32,
}

/// This uniquely identifies a single `Slot<V>` entry in the buckets map, and provides accessors for
/// either getting the value or putting a value.
#[derive(Copy, Clone, Debug)]
struct SlotIndex {
    // the index of the bucket in VecCache (0 to 20)
    bucket_idx: BucketIndex,
    // the index of the slot within the bucket
    index_in_bucket: usize,
}

// This makes sure the counts are consistent with what we allocate, precomputing each bucket a
// compile-time. Visiting all powers of two is enough to hit all the buckets.
//
// We confirm counts are accurate in the slot_index_exhaustive test.
const ENTRIES_BY_BUCKET: [usize; BUCKETS] = {
    let mut entries = [0; BUCKETS];
    let mut key = 0;
    loop {
        let si = SlotIndex::from_index(key);
        entries[si.bucket_idx.to_usize()] = si.bucket_idx.capacity();
        if key == 0 {
            key = 1;
        } else if key == (1 << 31) {
            break;
        } else {
            key <<= 1;
        }
    }
    entries
};

const BUCKETS: usize = 21;

impl SlotIndex {
    /// Unpacks a flat 32-bit index into a [`BucketIndex`] and a slot offset within that bucket.
    #[inline]
    const fn from_index(idx: u32) -> Self {
        let (bucket_idx, index_in_bucket) = BucketIndex::from_flat_index(idx as usize);
        SlotIndex { bucket_idx, index_in_bucket }
    }

    // SAFETY: Buckets must be managed solely by functions here (i.e., get/put on SlotIndex) and
    // `self` comes from SlotIndex::from_index
    #[inline]
    unsafe fn get<V: Copy>(&self, buckets: &[AtomicPtr<Slot<V>>; 21]) -> Option<(V, u32)> {
        let bucket = &buckets[self.bucket_idx];
        let ptr = bucket.load(Ordering::Acquire);
        // Bucket is not yet initialized: then we obviously won't find this entry in that bucket.
        if ptr.is_null() {
            return None;
        }
        debug_assert!(self.index_in_bucket < self.bucket_idx.capacity());
        // SAFETY: `bucket` was allocated (so <= isize in total bytes) to hold `entries`, so this
        // must be inbounds.
        let slot = unsafe { ptr.add(self.index_in_bucket) };

        // SAFETY: initialized bucket has zeroed all memory within the bucket, so we are valid for
        // AtomicU32 access.
        let index_and_lock = unsafe { &(*slot).index_and_lock };
        let current = index_and_lock.load(Ordering::Acquire);
        let index = match current {
            0 => return None,
            // Treat "initializing" as actually just not initialized at all.
            // The only reason this is a separate state is that `complete` calls could race and
            // we can't allow that, but from load perspective there's no difference.
            1 => return None,
            _ => current - 2,
        };

        // SAFETY:
        // * slot is a valid pointer (buckets are always valid for the index we get).
        // * value is initialized since we saw a >= 2 index above.
        // * `V: Copy`, so safe to read.
        let value = unsafe { (*slot).value };
        Some((value, index))
    }

    fn bucket_ptr<V>(&self, bucket: &AtomicPtr<Slot<V>>) -> *mut Slot<V> {
        let ptr = bucket.load(Ordering::Acquire);
        if ptr.is_null() { Self::initialize_bucket(bucket, self.bucket_idx) } else { ptr }
    }

    #[cold]
    #[inline(never)]
    fn initialize_bucket<V>(bucket: &AtomicPtr<Slot<V>>, bucket_idx: BucketIndex) -> *mut Slot<V> {
        static LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());

        // If we are initializing the bucket, then acquire a global lock.
        //
        // This path is quite cold, so it's cheap to use a global lock. This ensures that we never
        // have multiple allocations for the same bucket.
        let _allocator_guard = LOCK.lock().unwrap_or_else(|e| e.into_inner());

        let ptr = bucket.load(Ordering::Acquire);

        // OK, now under the allocator lock, if we're still null then it's definitely us that will
        // initialize this bucket.
        if ptr.is_null() {
            let bucket_layout =
                std::alloc::Layout::array::<Slot<V>>(bucket_idx.capacity()).unwrap();
            // This is more of a sanity check -- this code is very cold, so it's safe to pay a
            // little extra cost here.
            assert!(bucket_layout.size() > 0);
            // SAFETY: Just checked that size is non-zero.
            let allocated = unsafe { std::alloc::alloc_zeroed(bucket_layout).cast::<Slot<V>>() };
            if allocated.is_null() {
                std::alloc::handle_alloc_error(bucket_layout);
            }
            bucket.store(allocated, Ordering::Release);
            allocated
        } else {
            // Otherwise some other thread initialized this bucket after we took the lock. In that
            // case, just return early.
            ptr
        }
    }

    /// Returns true if this successfully put into the map.
    #[inline]
    fn put<V>(&self, buckets: &[AtomicPtr<Slot<V>>; 21], value: V, extra: u32) -> bool {
        let bucket = &buckets[self.bucket_idx];
        let ptr = self.bucket_ptr(bucket);

        debug_assert!(self.index_in_bucket < self.bucket_idx.capacity());
        // SAFETY: `bucket` was allocated (so <= isize in total bytes) to hold `entries`, so this
        // must be inbounds.
        let slot = unsafe { ptr.add(self.index_in_bucket) };

        // SAFETY: initialized bucket has zeroed all memory within the bucket, so we are valid for
        // AtomicU32 access.
        let index_and_lock = unsafe { &(*slot).index_and_lock };
        match index_and_lock.compare_exchange(0, 1, Ordering::AcqRel, Ordering::Acquire) {
            Ok(_) => {
                // We have acquired the initialization lock. It is our job to write `value` and
                // then set the lock to the real index.

                unsafe {
                    (&raw mut (*slot).value).write(value);
                }

                index_and_lock.store(extra.checked_add(2).unwrap(), Ordering::Release);

                true
            }

            // Treat "initializing" as the caller's fault. Callers are responsible for ensuring that
            // there are no races on initialization. In the compiler's current usage for query
            // caches, that's the "active query map" which ensures each query actually runs once
            // (even if concurrently started).
            Err(1) => panic!("caller raced calls to put()"),

            // This slot was already populated. Also ignore, currently this is the same as
            // "initializing".
            Err(_) => false,
        }
    }

    /// Inserts into the map, given that the slot is unique, so it won't race with other threads.
    #[inline]
    unsafe fn put_unique<V>(&self, buckets: &[AtomicPtr<Slot<V>>; 21], value: V, extra: u32) {
        let bucket = &buckets[self.bucket_idx];
        let ptr = self.bucket_ptr(bucket);

        debug_assert!(self.index_in_bucket < self.bucket_idx.capacity());
        // SAFETY: `bucket` was allocated (so <= isize in total bytes) to hold `entries`, so this
        // must be inbounds.
        let slot = unsafe { ptr.add(self.index_in_bucket) };

        // SAFETY: We known our slot is unique as a precondition of this function, so this can't race.
        unsafe {
            (&raw mut (*slot).value).write(value);
        }

        // SAFETY: initialized bucket has zeroed all memory within the bucket, so we are valid for
        // AtomicU32 access.
        let index_and_lock = unsafe { &(*slot).index_and_lock };

        index_and_lock.store(extra.checked_add(2).unwrap(), Ordering::Release);
    }
}

/// In-memory cache for queries whose keys are densely-numbered IDs
/// (e.g `CrateNum`, `LocalDefId`), and can therefore be used as indices
/// into a dense vector of cached values.
///
/// (As of [#124780] the underlying storage is not an actual `Vec`, but rather
/// a series of increasingly-large buckets, for improved performance when the
/// parallel frontend is using multiple threads.)
///
/// Each entry in the cache stores the query's return value (`V`), and also
/// an associated index (`I`), which in practice is a `DepNodeIndex` used for
/// query dependency tracking.
///
/// [#124780]: https://github.com/rust-lang/rust/pull/124780
pub struct VecCache<K: Idx, V, I> {
    // Entries per bucket:
    // Bucket  0:       4096 2^12
    // Bucket  1:       4096 2^12
    // Bucket  2:       8192
    // Bucket  3:      16384
    // ...
    // Bucket 19: 1073741824
    // Bucket 20: 2147483648
    // The total number of entries if all buckets are initialized is 2^32.
    buckets: [AtomicPtr<Slot<V>>; BUCKETS],

    // In the compiler's current usage these are only *read* during incremental and self-profiling.
    // They are an optimization over iterating the full buckets array.
    present: [AtomicPtr<Slot<()>>; BUCKETS],
    len: AtomicUsize,

    key: PhantomData<(K, I)>,
}

impl<K: Idx, V, I> Default for VecCache<K, V, I> {
    fn default() -> Self {
        VecCache {
            buckets: Default::default(),
            key: PhantomData,
            len: Default::default(),
            present: Default::default(),
        }
    }
}

// SAFETY: No access to `V` is made.
unsafe impl<K: Idx, #[may_dangle] V, I> Drop for VecCache<K, V, I> {
    fn drop(&mut self) {
        // We have unique ownership, so no locks etc. are needed. Since `K` and `V` are both `Copy`,
        // we are also guaranteed to just need to deallocate any large arrays (not iterate over
        // contents).
        //
        // Confirm no need to deallocate individual entries. Note that `V: Copy` is asserted on
        // insert/lookup but not necessarily construction, primarily to avoid annoyingly propagating
        // the bounds into struct definitions everywhere.
        assert!(!std::mem::needs_drop::<K>());
        assert!(!std::mem::needs_drop::<V>());

        for (idx, bucket) in BucketIndex::enumerate_buckets(&self.buckets) {
            let bucket = bucket.load(Ordering::Acquire);
            if !bucket.is_null() {
                let layout = std::alloc::Layout::array::<Slot<V>>(ENTRIES_BY_BUCKET[idx]).unwrap();
                unsafe {
                    std::alloc::dealloc(bucket.cast(), layout);
                }
            }
        }

        for (idx, bucket) in BucketIndex::enumerate_buckets(&self.present) {
            let bucket = bucket.load(Ordering::Acquire);
            if !bucket.is_null() {
                let layout = std::alloc::Layout::array::<Slot<()>>(ENTRIES_BY_BUCKET[idx]).unwrap();
                unsafe {
                    std::alloc::dealloc(bucket.cast(), layout);
                }
            }
        }
    }
}

impl<K, V, I> VecCache<K, V, I>
where
    K: Eq + Idx + Copy + Debug,
    V: Copy,
    I: Idx + Copy,
{
    #[inline(always)]
    pub fn lookup(&self, key: &K) -> Option<(V, I)> {
        let key = u32::try_from(key.index()).unwrap();
        let slot_idx = SlotIndex::from_index(key);
        match unsafe { slot_idx.get(&self.buckets) } {
            Some((value, idx)) => Some((value, I::new(idx as usize))),
            None => None,
        }
    }

    #[inline]
    pub fn complete(&self, key: K, value: V, index: I) {
        let key = u32::try_from(key.index()).unwrap();
        let slot_idx = SlotIndex::from_index(key);
        if slot_idx.put(&self.buckets, value, index.index() as u32) {
            let present_idx = self.len.fetch_add(1, Ordering::Relaxed);
            let slot = SlotIndex::from_index(u32::try_from(present_idx).unwrap());
            // SAFETY: We should always be uniquely putting due to `len` fetch_add returning unique values.
            // We can't get here if `len` overflows because `put` will not succeed u32::MAX + 1 times
            // as it will run out of slots.
            unsafe { slot.put_unique(&self.present, (), key) };
        }
    }

    pub fn for_each(&self, f: &mut dyn FnMut(&K, &V, I)) {
        for idx in 0..self.len.load(Ordering::Acquire) {
            let key = SlotIndex::from_index(idx as u32);
            match unsafe { key.get(&self.present) } {
                // This shouldn't happen in our current usage (iter is really only
                // used long after queries are done running), but if we hit this in practice it's
                // probably fine to just break early.
                None => unreachable!(),
                Some(((), key)) => {
                    let key = K::new(key as usize);
                    // unwrap() is OK: present entries are always written only after we put the real
                    // entry.
                    let value = self.lookup(&key).unwrap();
                    f(&key, &value.0, value.1);
                }
            }
        }
    }

    pub fn len(&self) -> usize {
        self.len.load(Ordering::Acquire)
    }
}

/// Index into an array of buckets.
///
/// Using an enum lets us tell the compiler that values range from 0 to 20,
/// allowing array bounds checks to be optimized away,
/// without having to resort to pattern types or other unstable features.
#[derive(Clone, Copy, PartialEq, Eq)]
#[repr(usize)]
enum BucketIndex {
    // tidy-alphabetical-start
    Bucket00,
    Bucket01,
    Bucket02,
    Bucket03,
    Bucket04,
    Bucket05,
    Bucket06,
    Bucket07,
    Bucket08,
    Bucket09,
    Bucket10,
    Bucket11,
    Bucket12,
    Bucket13,
    Bucket14,
    Bucket15,
    Bucket16,
    Bucket17,
    Bucket18,
    Bucket19,
    Bucket20,
    // tidy-alphabetical-end
}

impl Debug for BucketIndex {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        Debug::fmt(&self.to_usize(), f)
    }
}

impl BucketIndex {
    /// Capacity of bucket 0 (and also of bucket 1).
    const BUCKET_0_CAPACITY: usize = 1 << (Self::NONZERO_BUCKET_SHIFT_ADJUST + 1);
    /// Adjustment factor from the highest-set-bit-position of a flat index,
    /// to its corresponding bucket number.
    ///
    /// For example, the first flat-index in bucket 2 is 8192.
    /// Its highest-set-bit-position is `(8192).ilog2() == 13`, and subtracting
    /// the adjustment factor of 11 gives the bucket number of 2.
    const NONZERO_BUCKET_SHIFT_ADJUST: usize = 11;

    #[inline(always)]
    const fn to_usize(self) -> usize {
        self as usize
    }

    #[inline(always)]
    const fn from_raw(raw: usize) -> Self {
        match raw {
            // tidy-alphabetical-start
            00 => Self::Bucket00,
            01 => Self::Bucket01,
            02 => Self::Bucket02,
            03 => Self::Bucket03,
            04 => Self::Bucket04,
            05 => Self::Bucket05,
            06 => Self::Bucket06,
            07 => Self::Bucket07,
            08 => Self::Bucket08,
            09 => Self::Bucket09,
            10 => Self::Bucket10,
            11 => Self::Bucket11,
            12 => Self::Bucket12,
            13 => Self::Bucket13,
            14 => Self::Bucket14,
            15 => Self::Bucket15,
            16 => Self::Bucket16,
            17 => Self::Bucket17,
            18 => Self::Bucket18,
            19 => Self::Bucket19,
            20 => Self::Bucket20,
            // tidy-alphabetical-end
            _ => panic!("bucket index out of range"),
        }
    }

    /// Total number of slots in this bucket.
    #[inline(always)]
    const fn capacity(self) -> usize {
        match self {
            Self::Bucket00 => Self::BUCKET_0_CAPACITY,
            // Bucket 1 has a capacity of `1 << (1 + 11) == pow(2, 12) == 4096`.
            // Bucket 2 has a capacity of `1 << (2 + 11) == pow(2, 13) == 8192`.
            _ => 1 << (self.to_usize() + Self::NONZERO_BUCKET_SHIFT_ADJUST),
        }
    }

    /// Converts a flat index in the range `0..=u32::MAX` into a bucket index,
    /// and a slot offset within that bucket.
    ///
    /// Panics if `flat > u32::MAX`.
    #[inline(always)]
    const fn from_flat_index(flat: usize) -> (Self, usize) {
        if flat > u32::MAX as usize {
            panic!();
        }

        // If the index is in bucket 0, the conversion is trivial.
        // This also avoids calling `ilog2` when `flat == 0`.
        if flat < Self::BUCKET_0_CAPACITY {
            return (Self::Bucket00, flat);
        }

        // General-case conversion for a non-zero bucket index.
        //
        //              | bucket |   slot
        // flat | ilog2 |  index | offset
        // ------------------------------
        // 4096 |    12 |      1 |      0
        // 4097 |    12 |      1 |      1
        // ...
        // 8191 |    12 |      1 |   4095
        // 8192 |    13 |      2 |      0
        let highest_bit_pos = flat.ilog2() as usize;
        let bucket_index =
            BucketIndex::from_raw(highest_bit_pos - Self::NONZERO_BUCKET_SHIFT_ADJUST);

        // Clear the highest-set bit (which selects the bucket) to get the
        // slot offset within this bucket.
        let slot_offset = flat - (1 << highest_bit_pos);

        (bucket_index, slot_offset)
    }

    #[inline(always)]
    fn iter_all() -> impl ExactSizeIterator<Item = Self> {
        (0usize..BUCKETS).map(BucketIndex::from_raw)
    }

    #[inline(always)]
    fn enumerate_buckets<T>(buckets: &[T; BUCKETS]) -> impl ExactSizeIterator<Item = (Self, &T)> {
        BucketIndex::iter_all().zip(buckets)
    }
}

impl<T> Index<BucketIndex> for [T; BUCKETS] {
    type Output = T;

    #[inline(always)]
    fn index(&self, index: BucketIndex) -> &Self::Output {
        // The optimizer should be able to see that see that a bucket index is
        // always in-bounds, and omit the runtime bounds check.
        &self[index.to_usize()]
    }
}

impl<T> IndexMut<BucketIndex> for [T; BUCKETS] {
    #[inline(always)]
    fn index_mut(&mut self, index: BucketIndex) -> &mut Self::Output {
        &mut self[index.to_usize()]
    }
}