mirror of
https://github.com/rust-lang/rust.git
synced 2026-04-27 18:57:42 +03:00
Auto merge of #148737 - zachs18:unit-is-zero, r=joboet
Implement IsZero for (), and optimize `IsZero::is_zero` for arrays
These are probably not super useful optimizations, but they make it so that `vec![expr; LARGE_LENGTH]` has better performance for some `expr`s, e.g.
* array of length zero in debug mode
* tuple containing `()` and zero-valued integers in debug and release mode
* array of `()` or other zero-sized `IsZero` type in debug mode
<details> <summary>very rough benchmarks</summary>
```Rust
use std::time::Instant;
use std::sync::atomic::{AtomicUsize, Ordering::Relaxed};
struct NonCopyZst;
static COUNTER: AtomicUsize = AtomicUsize::new(0);
impl Clone for NonCopyZst {
fn clone(&self) -> Self {
COUNTER.fetch_add(1, Relaxed);
Self
}
}
macro_rules! timeit {
($e:expr) => {
let start = Instant::now();
_ = $e;
println!("{:56}: {:?}", stringify!($e), start.elapsed());
};
}
fn main() {
timeit!(vec![[String::from("hello"); 0]; 1_000_000_000]); // gets a lot better in debug mode
timeit!(vec![(0u8, (), 0u16); 1_000_000_000]); // gets a lot better in debug *and* release mode
timeit!(vec![[[(); 37]; 1_000_000_000]; 1_000_000_000]); // gets a lot better in debug mode
timeit!(vec![[NonCopyZst; 0]; 1_000_000_000]); // gets a lot better in debug mode
timeit!(vec![[[1u8; 0]; 1_000_000]; 1_000_000]); // gets a little bit better in debug mode
timeit!(vec![[[(); 37]; 1_000_000]; 1_000_000]); // gets a little bit better in debug mode
timeit!(vec![[[1u128; 0]; 1_000_000]; 1_000_000]); // gets a little bit better in debug mode
// check that we don't regress existing optimizations
timeit!(vec![(0u8, 0u16); 1_000_000_000]); // about the same time
timeit!(vec![0u32; 1_000_000_000]); // about the same time
// check that we still call clone for non-IsZero ZSTs
timeit!(vec![[const { NonCopyZst }; 2]; 1_000]); // about the same time
assert_eq!(COUNTER.load(Relaxed), 1998);
timeit!(vec![NonCopyZst; 10_000]); // about the same time
assert_eq!(COUNTER.load(Relaxed), 1998 + 9_999);
}
```
```rs
$ cargo +nightly run
// ...
vec![[String::from("hello"); 0]; 1_000_000_000] : 11.13999724s
vec![(0u8, (), 0u16); 1_000_000_000] : 5.254646651s
vec![[[(); 37]; 1_000_000_000]; 1_000_000_000] : 2.738062531s
vec![[NonCopyZst; 0]; 1_000_000_000] : 9.483690922s
vec![[[1u8; 0]; 1_000_000]; 1_000_000] : 2.919236ms
vec![[[(); 37]; 1_000_000]; 1_000_000] : 2.927755ms
vec![[[1u128; 0]; 1_000_000]; 1_000_000] : 2.931486ms
vec![(0u8, 0u16); 1_000_000_000] : 19.46µs
vec![0u32; 1_000_000_000] : 9.34µs
vec![[const { NonCopyZst }; 2]; 1_000] : 31.88µs
vec![NonCopyZst; 10_000] : 36.519µs
```
```rs
$ cargo +dev run
// ...
vec![[String::from("hello"); 0]; 1_000_000_000] : 4.12µs
vec![(0u8, (), 0u16); 1_000_000_000] : 16.299µs
vec![[[(); 37]; 1_000_000_000]; 1_000_000_000] : 210ns
vec![[NonCopyZst; 0]; 1_000_000_000] : 210ns
vec![[[1u8; 0]; 1_000_000]; 1_000_000] : 170ns
vec![[[(); 37]; 1_000_000]; 1_000_000] : 110ns
vec![[[1u128; 0]; 1_000_000]; 1_000_000] : 140ns
vec![(0u8, 0u16); 1_000_000_000] : 11.56µs
vec![0u32; 1_000_000_000] : 10.71µs
vec![[const { NonCopyZst }; 2]; 1_000] : 36.08µs
vec![NonCopyZst; 10_000] : 73.21µs
```
(checking release mode to make sure this doesn't regress perf there)
```rs
$ cargo +nightly run --release
// ...
vec![[String::from("hello"); 0]; 1_000_000_000] : 70ns
vec![(0u8, (), 0u16); 1_000_000_000] : 1.269457501s
vec![[[(); 37]; 1_000_000_000]; 1_000_000_000] : 10ns
vec![[NonCopyZst; 0]; 1_000_000_000] : 20ns
vec![[[1u8; 0]; 1_000_000]; 1_000_000] : 10ns
vec![[[(); 37]; 1_000_000]; 1_000_000] : 20ns
vec![[[1u128; 0]; 1_000_000]; 1_000_000] : 20ns
vec![(0u8, 0u16); 1_000_000_000] : 20ns
vec![0u32; 1_000_000_000] : 20ns
vec![[const { NonCopyZst }; 2]; 1_000] : 2.66µs
vec![NonCopyZst; 10_000] : 13.39µs
```
```rs
$ cargo +dev run --release
vec![[String::from("hello"); 0]; 1_000_000_000] : 90ns
vec![(0u8, (), 0u16); 1_000_000_000] : 30ns
vec![[[(); 37]; 1_000_000_000]; 1_000_000_000] : 20ns
vec![[NonCopyZst; 0]; 1_000_000_000] : 30ns
vec![[[1u8; 0]; 1_000_000]; 1_000_000] : 20ns
vec![[[(); 37]; 1_000_000]; 1_000_000] : 20ns
vec![[[1u128; 0]; 1_000_000]; 1_000_000] : 20ns
vec![(0u8, 0u16); 1_000_000_000] : 30ns
vec![0u32; 1_000_000_000] : 20ns
vec![[const { NonCopyZst }; 2]; 1_000] : 3.52µs
vec![NonCopyZst; 10_000] : 17.13µs
```
</details>
The specific expression I ran into a perf issue that this PR addresses is `vec![[(); LARGE]; LARGE]`, as I was trying to demonstrate `Vec::into_flattened` panicking on length overflow in the playground, but got a timeout error instead since `vec![[(); LARGE]; LARGE]` took so long to run in debug mode (it runs fine on the playground in release mode)
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
use core::mem::SizedTypeProperties;
|
||||
use core::num::{NonZero, Saturating, Wrapping};
|
||||
|
||||
use crate::boxed::Box;
|
||||
@@ -20,6 +21,8 @@ fn is_zero(&self) -> bool {
|
||||
};
|
||||
}
|
||||
|
||||
impl_is_zero!((), |_: ()| true); // It is needed to impl for arrays and tuples of ().
|
||||
|
||||
impl_is_zero!(i8, |x| x == 0); // It is needed to impl for arrays and tuples of i8.
|
||||
impl_is_zero!(i16, |x| x == 0);
|
||||
impl_is_zero!(i32, |x| x == 0);
|
||||
@@ -43,17 +46,38 @@ fn is_zero(&self) -> bool {
|
||||
// `IsZero` cannot be soundly implemented for pointers because of provenance
|
||||
// (see #135338).
|
||||
|
||||
unsafe impl<T, const N: usize> IsZero for [T; N] {
|
||||
#[inline]
|
||||
default fn is_zero(&self) -> bool {
|
||||
// If the array is of length zero,
|
||||
// then it doesn't actually contain any `T`s,
|
||||
// so `T::clone` doesn't need to be called,
|
||||
// and we can "zero-initialize" all zero bytes of the array.
|
||||
N == 0
|
||||
}
|
||||
}
|
||||
|
||||
unsafe impl<T: IsZero, const N: usize> IsZero for [T; N] {
|
||||
#[inline]
|
||||
fn is_zero(&self) -> bool {
|
||||
// Because this is generated as a runtime check, it's not obvious that
|
||||
// it's worth doing if the array is really long. The threshold here
|
||||
// is largely arbitrary, but was picked because as of 2022-07-01 LLVM
|
||||
// fails to const-fold the check in `vec![[1; 32]; n]`
|
||||
// See https://github.com/rust-lang/rust/pull/97581#issuecomment-1166628022
|
||||
// Feel free to tweak if you have better evidence.
|
||||
if T::IS_ZST {
|
||||
// If T is a ZST, then there is at most one possible value of `T`,
|
||||
// so we only need to check one element for zeroness.
|
||||
// We can't unconditionally return `true` here, since, e.g.
|
||||
// `T = [NonTrivialCloneZst; 5]` is a ZST that implements `IsZero`
|
||||
// due to the generic array impl, but `T::is_zero` returns `false`
|
||||
// since the length is not 0.
|
||||
self.get(0).is_none_or(IsZero::is_zero)
|
||||
} else {
|
||||
// Because this is generated as a runtime check, it's not obvious that
|
||||
// it's worth doing if the array is really long. The threshold here
|
||||
// is largely arbitrary, but was picked because as of 2022-07-01 LLVM
|
||||
// fails to const-fold the check in `vec![[1; 32]; n]`
|
||||
// See https://github.com/rust-lang/rust/pull/97581#issuecomment-1166628022
|
||||
// Feel free to tweak if you have better evidence.
|
||||
|
||||
N <= 16 && self.iter().all(IsZero::is_zero)
|
||||
N <= 16 && self.iter().all(IsZero::is_zero)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -61,7 +85,7 @@ fn is_zero(&self) -> bool {
|
||||
macro_rules! impl_is_zero_tuples {
|
||||
// Stopper
|
||||
() => {
|
||||
// No use for implementing for empty tuple because it is ZST.
|
||||
// We already have an impl for () above.
|
||||
};
|
||||
($first_arg:ident $(,$rest:ident)*) => {
|
||||
unsafe impl <$first_arg: IsZero, $($rest: IsZero,)*> IsZero for ($first_arg, $($rest,)*){
|
||||
|
||||
Reference in New Issue
Block a user