Use named fields in ChunkedBitSet's Chunk::Mixed

This helps to clarify that the field with type `ChunkSize` is a count of 1s,
not a length or size.
This commit is contained in:
Zalathar
2026-03-14 21:03:59 +11:00
parent a0e206b566
commit 0a90f8ddc2
2 changed files with 56 additions and 49 deletions
+44 -37
View File
@@ -509,14 +509,20 @@ enum Chunk {
/// to store the length, which would make this type larger. These excess
/// words are always zero, as are any excess bits in the final in-use word.
///
/// The `ChunkSize` field is the count of 1s set in the chunk, and
/// must satisfy `0 < count < chunk_domain_size`.
///
/// The words are within an `Rc` because it's surprisingly common to
/// duplicate an entire chunk, e.g. in `ChunkedBitSet::clone_from()`, or
/// when a `Mixed` chunk is union'd into a `Zeros` chunk. When we do need
/// to modify a chunk we use `Rc::make_mut`.
Mixed(ChunkSize, Rc<[Word; CHUNK_WORDS]>),
Mixed {
/// Count of set bits (1s) in this chunk's words.
///
/// Invariant: `0 < ones_count < chunk_domain_size`.
///
/// Tracking this separately allows individual insert/remove calls to
/// know that the chunk has become all-zeroes or all-ones, in O(1) time.
ones_count: ChunkSize,
words: Rc<[Word; CHUNK_WORDS]>,
},
}
// This type is used a lot. Make sure it doesn't unintentionally get bigger.
@@ -613,7 +619,7 @@ pub fn contains(&self, elem: T) -> bool {
match &chunk {
Zeros => false,
Ones => true,
Mixed(_, words) => {
Mixed { ones_count: _, words } => {
let (word_index, mask) = chunk_word_index_and_mask(elem);
(words[word_index] & mask) != 0
}
@@ -644,19 +650,19 @@ pub fn insert(&mut self, elem: T) -> bool {
let (word_index, mask) = chunk_word_index_and_mask(elem);
words_ref[word_index] |= mask;
*chunk = Mixed(1, words);
*chunk = Mixed { ones_count: 1, words };
} else {
*chunk = Ones;
}
true
}
Ones => false,
Mixed(ref mut count, ref mut words) => {
Mixed { ref mut ones_count, ref mut words } => {
// We skip all the work if the bit is already set.
let (word_index, mask) = chunk_word_index_and_mask(elem);
if (words[word_index] & mask) == 0 {
*count += 1;
if *count < chunk_domain_size {
*ones_count += 1;
if *ones_count < chunk_domain_size {
let words = Rc::make_mut(words);
words[word_index] |= mask;
} else {
@@ -702,18 +708,18 @@ pub fn remove(&mut self, elem: T) -> bool {
);
let (word_index, mask) = chunk_word_index_and_mask(elem);
words_ref[word_index] &= !mask;
*chunk = Mixed(chunk_domain_size - 1, words);
*chunk = Mixed { ones_count: chunk_domain_size - 1, words };
} else {
*chunk = Zeros;
}
true
}
Mixed(ref mut count, ref mut words) => {
Mixed { ref mut ones_count, ref mut words } => {
// We skip all the work if the bit is already clear.
let (word_index, mask) = chunk_word_index_and_mask(elem);
if (words[word_index] & mask) != 0 {
*count -= 1;
if *count > 0 {
*ones_count -= 1;
if *ones_count > 0 {
let words = Rc::make_mut(words);
words[word_index] &= !mask;
} else {
@@ -732,7 +738,7 @@ fn chunk_iter(&self, chunk_index: usize) -> ChunkIter<'_> {
match self.chunks.get(chunk_index) {
Some(Zeros) => ChunkIter::Zeros,
Some(Ones) => ChunkIter::Ones(0..chunk_domain_size as usize),
Some(Mixed(_, words)) => {
Some(Mixed { ones_count: _, words }) => {
let num_words = num_words(chunk_domain_size as usize);
ChunkIter::Mixed(BitIter::new(&words[0..num_words]))
}
@@ -765,14 +771,14 @@ fn union(&mut self, other: &ChunkedBitSet<T>) -> bool {
match (&mut self_chunk, &other_chunk) {
(_, Zeros) | (Ones, _) => {}
(Zeros, _) | (Mixed(..), Ones) => {
(Zeros, _) | (Mixed { .. }, Ones) => {
// `other_chunk` fully overwrites `self_chunk`
*self_chunk = other_chunk.clone();
changed = true;
}
(
Mixed(self_chunk_count, self_chunk_words),
Mixed(_other_chunk_count, other_chunk_words),
Mixed { ones_count: self_chunk_ones, words: self_chunk_words },
Mixed { ones_count: _, words: other_chunk_words },
) => {
// First check if the operation would change
// `self_chunk.words`. If not, we can avoid allocating some
@@ -807,8 +813,8 @@ fn union(&mut self, other: &ChunkedBitSet<T>) -> bool {
op,
);
debug_assert!(has_changed);
*self_chunk_count = count_ones(&self_chunk_words[0..num_words]) as ChunkSize;
if *self_chunk_count == chunk_domain_size {
*self_chunk_ones = count_ones(&self_chunk_words[0..num_words]) as ChunkSize;
if *self_chunk_ones == chunk_domain_size {
*self_chunk = Ones;
}
changed = true;
@@ -839,11 +845,11 @@ fn subtract(&mut self, other: &ChunkedBitSet<T>) -> bool {
match (&mut self_chunk, &other_chunk) {
(Zeros, _) | (_, Zeros) => {}
(Ones | Mixed(..), Ones) => {
(Ones | Mixed { .. }, Ones) => {
changed = true;
*self_chunk = Zeros;
}
(Ones, Mixed(other_chunk_count, other_chunk_words)) => {
(Ones, Mixed { ones_count: other_chunk_ones, words: other_chunk_words }) => {
changed = true;
let num_words = num_words(chunk_domain_size as usize);
debug_assert!(num_words > 0 && num_words <= CHUNK_WORDS);
@@ -854,16 +860,17 @@ fn subtract(&mut self, other: &ChunkedBitSet<T>) -> bool {
*word = !*word & tail_mask;
tail_mask = Word::MAX;
}
let self_chunk_count = chunk_domain_size - *other_chunk_count;
let self_chunk_ones = chunk_domain_size - *other_chunk_ones;
debug_assert_eq!(
self_chunk_count,
self_chunk_ones,
count_ones(&self_chunk_words[0..num_words]) as ChunkSize
);
*self_chunk = Mixed(self_chunk_count, Rc::new(self_chunk_words));
*self_chunk =
Mixed { ones_count: self_chunk_ones, words: Rc::new(self_chunk_words) };
}
(
Mixed(self_chunk_count, self_chunk_words),
Mixed(_other_chunk_count, other_chunk_words),
Mixed { ones_count: self_chunk_ones, words: self_chunk_words },
Mixed { ones_count: _, words: other_chunk_words },
) => {
// See `ChunkedBitSet::union` for details on what is happening here.
let num_words = num_words(chunk_domain_size as usize);
@@ -883,8 +890,8 @@ fn subtract(&mut self, other: &ChunkedBitSet<T>) -> bool {
op,
);
debug_assert!(has_changed);
*self_chunk_count = count_ones(&self_chunk_words[0..num_words]) as ChunkSize;
if *self_chunk_count == 0 {
*self_chunk_ones = count_ones(&self_chunk_words[0..num_words]) as ChunkSize;
if *self_chunk_ones == 0 {
*self_chunk = Zeros;
}
changed = true;
@@ -915,13 +922,13 @@ fn intersect(&mut self, other: &ChunkedBitSet<T>) -> bool {
match (&mut self_chunk, &other_chunk) {
(Zeros, _) | (_, Ones) => {}
(Ones, Zeros | Mixed(..)) | (Mixed(..), Zeros) => {
(Ones, Zeros | Mixed { .. }) | (Mixed { .. }, Zeros) => {
changed = true;
*self_chunk = other_chunk.clone();
}
(
Mixed(self_chunk_count, self_chunk_words),
Mixed(_other_chunk_count, other_chunk_words),
Mixed { ones_count: self_chunk_ones, words: self_chunk_words },
Mixed { ones_count: _, words: other_chunk_words },
) => {
// See `ChunkedBitSet::union` for details on what is happening here.
let num_words = num_words(chunk_domain_size as usize);
@@ -941,8 +948,8 @@ fn intersect(&mut self, other: &ChunkedBitSet<T>) -> bool {
op,
);
debug_assert!(has_changed);
*self_chunk_count = count_ones(&self_chunk_words[0..num_words]) as ChunkSize;
if *self_chunk_count == 0 {
*self_chunk_ones = count_ones(&self_chunk_words[0..num_words]) as ChunkSize;
if *self_chunk_ones == 0 {
*self_chunk = Zeros;
}
changed = true;
@@ -1023,11 +1030,11 @@ fn assert_valid(&self, chunk_domain_size: ChunkSize) {
assert!(chunk_domain_size as usize <= CHUNK_BITS);
match *self {
Zeros | Ones => {}
Mixed(count, ref words) => {
assert!(0 < count && count < chunk_domain_size);
Mixed { ones_count, ref words } => {
assert!(0 < ones_count && ones_count < chunk_domain_size);
// Check the number of set bits matches `count`.
assert_eq!(count_ones(words.as_slice()) as ChunkSize, count);
assert_eq!(count_ones(words.as_slice()) as ChunkSize, ones_count);
// Check the not-in-use words are all zeroed.
let num_words = num_words(chunk_domain_size as usize);
@@ -1043,7 +1050,7 @@ fn count(&self, chunk_domain_size: ChunkSize) -> usize {
match *self {
Zeros => 0,
Ones => chunk_domain_size as usize,
Mixed(count, _) => count as usize,
Mixed { ones_count, words: _ } => usize::from(ones_count),
}
}
}
+12 -12
View File
@@ -162,16 +162,16 @@ fn chunked_bitset() {
assert!(!b100.contains(20) && b100.contains(30) && !b100.contains(99) && b100.contains(50));
assert_eq!(
b100.chunks(),
vec![Mixed(
97,
vec![Mixed {
ones_count: 97,
#[rustfmt::skip]
Rc::new([
words: Rc::new([
0b11111111_11111111_11111110_11111111_11111111_11101111_11111111_11111111,
0b00000000_00000000_00000000_00000111_11111111_11111111_11111111_11111111,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0,
])
)],
}],
);
b100.assert_valid();
let mut num_removed = 0;
@@ -228,14 +228,14 @@ fn chunked_bitset() {
b4096.chunks(),
#[rustfmt::skip]
vec![
Mixed(1, Rc::new([
Mixed { ones_count: 1, words:Rc::new([
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
])),
Mixed(1, Rc::new([
])},
Mixed { ones_count: 1, words: Rc::new([
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x8000_0000_0000_0000
])),
])},
],
);
assert_eq!(b4096.count(), 2);
@@ -265,14 +265,14 @@ fn chunked_bitset() {
#[rustfmt::skip]
vec![
Zeros,
Mixed(1, Rc::new([
Mixed { ones_count: 1, words: Rc::new([
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x0100_0000_0000_0000, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
])),
Mixed(1, Rc::new([
])},
Mixed { ones_count: 1, words: Rc::new([
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x0100, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
])),
])},
Zeros,
Zeros,
],