Rollup merge of #145940 - pascaldekloe:int-exp-tune, r=tgross35

single buffer for exponent fmt of integers

No need for fragmented buffers when formatting.

```
orig.txt: fmt::write_i128_exp                                                  143.39ns/iter      +/- 0.32
orig.txt: fmt::write_i64_exp                                                    68.72ns/iter      +/- 0.03
new.txt:  fmt::write_i128_exp                                                  138.29ns/iter      +/- 0.50
new.txt:  fmt::write_i64_exp                                                    58.93ns/iter      +/- 4.62
```

This patch fully eliminates unsafe pointer use (after rust-lang/rust#135265 and rust-lang/rust#136594).

    r? libs
This commit is contained in:
Trevor Gross
2025-09-06 14:39:04 -04:00
committed by GitHub
2 changed files with 240 additions and 179 deletions
+216 -179
View File
@@ -3,7 +3,7 @@
use crate::fmt::NumBuffer;
use crate::mem::MaybeUninit;
use crate::num::fmt as numfmt;
use crate::{fmt, ptr, slice, str};
use crate::{fmt, str};
/// Formatting of integers with a non-decimal radix.
macro_rules! radix_integer {
@@ -96,8 +96,8 @@ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
};
}
// 2 digit decimal look up table
static DEC_DIGITS_LUT: &[u8; 200] = b"\
// The string of all two-digit numbers in range 00..99 is used as a lookup table.
static DECIMAL_PAIRS: &[u8; 200] = b"\
0001020304050607080910111213141516171819\
2021222324252627282930313233343536373839\
4041424344454647484950515253545556575859\
@@ -123,6 +123,9 @@ macro_rules! impl_Display {
$(
const _: () = {
assert!($Signed::MIN < 0, "need signed");
assert!($Unsigned::MIN == 0, "need unsigned");
assert!($Signed::BITS == $Unsigned::BITS, "need counterparts");
assert!($Signed::BITS <= $T::BITS, "need lossless conversion");
assert!($Unsigned::BITS <= $T::BITS, "need lossless conversion");
};
@@ -207,10 +210,10 @@ unsafe fn _fmt_inner(self, buf: &mut [MaybeUninit::<u8>]) -> usize {
remain /= scale;
let pair1 = (quad / 100) as usize;
let pair2 = (quad % 100) as usize;
buf[offset + 0].write(DEC_DIGITS_LUT[pair1 * 2 + 0]);
buf[offset + 1].write(DEC_DIGITS_LUT[pair1 * 2 + 1]);
buf[offset + 2].write(DEC_DIGITS_LUT[pair2 * 2 + 0]);
buf[offset + 3].write(DEC_DIGITS_LUT[pair2 * 2 + 1]);
buf[offset + 0].write(DECIMAL_PAIRS[pair1 * 2 + 0]);
buf[offset + 1].write(DECIMAL_PAIRS[pair1 * 2 + 1]);
buf[offset + 2].write(DECIMAL_PAIRS[pair2 * 2 + 0]);
buf[offset + 3].write(DECIMAL_PAIRS[pair2 * 2 + 1]);
}
// Format per two digits from the lookup table.
@@ -225,8 +228,8 @@ unsafe fn _fmt_inner(self, buf: &mut [MaybeUninit::<u8>]) -> usize {
let pair = (remain % 100) as usize;
remain /= 100;
buf[offset + 0].write(DEC_DIGITS_LUT[pair * 2 + 0]);
buf[offset + 1].write(DEC_DIGITS_LUT[pair * 2 + 1]);
buf[offset + 0].write(DECIMAL_PAIRS[pair * 2 + 0]);
buf[offset + 1].write(DECIMAL_PAIRS[pair * 2 + 1]);
}
// Format the last remaining digit, if any.
@@ -242,7 +245,7 @@ unsafe fn _fmt_inner(self, buf: &mut [MaybeUninit::<u8>]) -> usize {
// Either the compiler sees that remain < 10, or it prevents
// a boundary check up next.
let last = (remain & 15) as usize;
buf[offset].write(DEC_DIGITS_LUT[last * 2 + 1]);
buf[offset].write(DECIMAL_PAIRS[last * 2 + 1]);
// not used: remain = 0;
}
@@ -335,7 +338,6 @@ pub fn format_into(self, buf: &mut NumBuffer<Self>) -> &str {
}
}
)*
#[cfg(feature = "optimize_for_size")]
@@ -374,178 +376,213 @@ fn ${concat($fmt_fn, _small)}(n: $T, is_nonnegative: bool, f: &mut fmt::Formatte
macro_rules! impl_Exp {
($($Signed:ident, $Unsigned:ident),* ; as $T:ident into $fmt_fn:ident) => {
const _: () = assert!($T::MIN == 0, "need unsigned");
fn $fmt_fn(
mut n: $T,
f: &mut fmt::Formatter<'_>,
n: $T,
is_nonnegative: bool,
upper: bool,
f: &mut fmt::Formatter<'_>
letter_e: u8
) -> fmt::Result {
let (mut n, mut exponent, trailing_zeros, added_precision) = {
let mut exponent = 0;
// count and remove trailing decimal zeroes
while n % 10 == 0 && n >= 10 {
n /= 10;
exponent += 1;
}
let (added_precision, subtracted_precision) = match f.precision() {
Some(fmt_prec) => {
// number of decimal digits minus 1
let mut tmp = n;
let mut prec = 0;
while tmp >= 10 {
tmp /= 10;
prec += 1;
}
(fmt_prec.saturating_sub(prec), prec.saturating_sub(fmt_prec))
debug_assert!(letter_e.is_ascii_alphabetic(), "single-byte character");
// Print the integer as a coefficient in range (-10, 10).
let mut exp = n.checked_ilog10().unwrap_or(0) as usize;
debug_assert!(n / (10 as $T).pow(exp as u32) < 10);
// Precisison is counted as the number of digits in the fraction.
let mut coef_prec = exp;
// Keep the digits as an integer (paired with its coef_prec count).
let mut coef = n;
// A Formatter may set the precision to a fixed number of decimals.
let more_prec = match f.precision() {
None => {
// Omit any and all trailing zeroes.
while coef_prec != 0 && coef % 10 == 0 {
coef /= 10;
coef_prec -= 1;
}
None => (0, 0)
};
for _ in 1..subtracted_precision {
n /= 10;
exponent += 1;
}
if subtracted_precision != 0 {
let rem = n % 10;
n /= 10;
exponent += 1;
// round up last digit, round to even on a tie
if rem > 5 || (rem == 5 && (n % 2 != 0 || subtracted_precision > 1 )) {
n += 1;
// if the digit is rounded to the next power
// instead adjust the exponent
if n.ilog10() > (n - 1).ilog10() {
n /= 10;
exponent += 1;
}
0
},
Some(fmt_prec) if fmt_prec >= coef_prec => {
// Count the number of additional zeroes needed.
fmt_prec - coef_prec
},
Some(fmt_prec) => {
// Count the number of digits to drop.
let less_prec = coef_prec - fmt_prec;
assert!(less_prec > 0);
// Scale down the coefficient/precision pair. For example,
// coef 123456 gets coef_prec 5 (to make 1.23456). To format
// the number with 2 decimals, i.e., fmt_prec 2, coef should
// be scaled by 10⁵⁻²=1000 to get coef 123 with coef_prec 2.
// SAFETY: Any precision less than coef_prec will cause a
// power of ten below the coef value.
let scale = unsafe {
(10 as $T).checked_pow(less_prec as u32).unwrap_unchecked()
};
let floor = coef / scale;
// Round half to even conform documentation.
let over = coef % scale;
let half = scale / 2;
let round_up = if over < half {
0
} else if over > half {
1
} else {
floor & 1 // round odd up to even
};
// Adding one to a scale down of at least 10 won't overflow.
coef = floor + round_up;
coef_prec = fmt_prec;
// The round_up may have caused the coefficient to reach 10
// (which is not permitted). For example, anything in range
// [9.95, 10) becomes 10.0 when adjusted to precision 1.
if round_up != 0 && coef.checked_ilog10().unwrap_or(0) as usize > coef_prec {
debug_assert_eq!(coef, (10 as $T).pow(coef_prec as u32 + 1));
coef /= 10; // drop one trailing zero
exp += 1; // one power of ten higher
}
}
(n, exponent, exponent, added_precision)
0
},
};
// Since `curr` always decreases by the number of digits copied, this means
// that `curr >= 0`.
let mut buf = [MaybeUninit::<u8>::uninit(); 40];
let mut curr = buf.len(); //index for buf
let buf_ptr = MaybeUninit::slice_as_mut_ptr(&mut buf);
let lut_ptr = DEC_DIGITS_LUT.as_ptr();
// Allocate a text buffer with lazy initialization.
const MAX_DEC_N: usize = $T::MAX.ilog10() as usize + 1;
const MAX_COEF_LEN: usize = MAX_DEC_N + ".".len();
const MAX_TEXT_LEN: usize = MAX_COEF_LEN + "e99".len();
let mut buf = [MaybeUninit::<u8>::uninit(); MAX_TEXT_LEN];
// decode 2 chars at a time
while n >= 100 {
let d1 = ((n % 100) as usize) << 1;
curr -= 2;
// SAFETY: `d1 <= 198`, so we can copy from `lut_ptr[d1..d1 + 2]` since
// `DEC_DIGITS_LUT` has a length of 200.
unsafe {
ptr::copy_nonoverlapping(lut_ptr.add(d1), buf_ptr.add(curr), 2);
}
n /= 100;
exponent += 2;
}
// n is <= 99, so at most 2 chars long
let mut n = n as isize; // possibly reduce 64bit math
// decode second-to-last character
if n >= 10 {
curr -= 1;
// SAFETY: Safe since `40 > curr >= 0` (see comment)
unsafe {
*buf_ptr.add(curr) = (n as u8 % 10_u8) + b'0';
}
n /= 10;
exponent += 1;
}
// add decimal point iff >1 mantissa digit will be printed
if exponent != trailing_zeros || added_precision != 0 {
curr -= 1;
// SAFETY: Safe since `40 > curr >= 0`
unsafe {
*buf_ptr.add(curr) = b'.';
}
}
// SAFETY: Safe since `40 > curr >= 0`
let buf_slice = unsafe {
// decode last character
curr -= 1;
*buf_ptr.add(curr) = (n as u8) + b'0';
let len = buf.len() - curr as usize;
slice::from_raw_parts(buf_ptr.add(curr), len)
};
// stores 'e' (or 'E') and the up to 2-digit exponent
let mut exp_buf = [MaybeUninit::<u8>::uninit(); 3];
let exp_ptr = MaybeUninit::slice_as_mut_ptr(&mut exp_buf);
// SAFETY: In either case, `exp_buf` is written within bounds and `exp_ptr[..len]`
// is contained within `exp_buf` since `len <= 3`.
let exp_slice = unsafe {
*exp_ptr.add(0) = if upper { b'E' } else { b'e' };
let len = if exponent < 10 {
*exp_ptr.add(1) = (exponent as u8) + b'0';
2
} else {
let off = exponent << 1;
ptr::copy_nonoverlapping(lut_ptr.add(off), exp_ptr.add(1), 2);
3
};
slice::from_raw_parts(exp_ptr, len)
};
let parts = &[
numfmt::Part::Copy(buf_slice),
numfmt::Part::Zero(added_precision),
numfmt::Part::Copy(exp_slice),
];
let sign = if !is_nonnegative {
"-"
} else if f.sign_plus() {
"+"
// Encode the coefficient in buf[..coef_len].
let (lead_dec, coef_len) = if coef_prec == 0 && more_prec == 0 {
(coef, 1_usize) // single digit; no fraction
} else {
""
buf[1].write(b'.');
let fraction_range = 2..(2 + coef_prec);
// Consume the least-significant decimals from a working copy.
let mut remain = coef;
#[cfg(feature = "optimize_for_size")] {
for i in fraction_range.clone().rev() {
let digit = (remain % 10) as usize;
remain /= 10;
buf[i].write(b'0' + digit as u8);
}
}
#[cfg(not(feature = "optimize_for_size"))] {
// Write digits per two at a time with a lookup table.
for i in fraction_range.clone().skip(1).rev().step_by(2) {
let pair = (remain % 100) as usize;
remain /= 100;
buf[i - 1].write(DECIMAL_PAIRS[pair * 2 + 0]);
buf[i - 0].write(DECIMAL_PAIRS[pair * 2 + 1]);
}
// An odd number of digits leave one digit remaining.
if coef_prec & 1 != 0 {
let digit = (remain % 10) as usize;
remain /= 10;
buf[fraction_range.start].write(b'0' + digit as u8);
}
}
(remain, fraction_range.end)
};
let formatted = numfmt::Formatted { sign, parts };
// SAFETY: `buf_slice` and `exp_slice` contain only ASCII characters.
unsafe { f.pad_formatted_parts(&formatted) }
debug_assert!(lead_dec < 10);
debug_assert!(lead_dec != 0 || coef == 0, "significant digits only");
buf[0].write(b'0' + lead_dec as u8);
// SAFETY: The number of decimals is limited, captured by MAX.
unsafe { core::hint::assert_unchecked(coef_len <= MAX_COEF_LEN) }
// Encode the scale factor in buf[coef_len..text_len].
buf[coef_len].write(letter_e);
let text_len: usize = match exp {
..10 => {
buf[coef_len + 1].write(b'0' + exp as u8);
coef_len + 2
},
10..100 => {
#[cfg(feature = "optimize_for_size")] {
buf[coef_len + 1].write(b'0' + (exp / 10) as u8);
buf[coef_len + 2].write(b'0' + (exp % 10) as u8);
}
#[cfg(not(feature = "optimize_for_size"))] {
buf[coef_len + 1].write(DECIMAL_PAIRS[exp * 2 + 0]);
buf[coef_len + 2].write(DECIMAL_PAIRS[exp * 2 + 1]);
}
coef_len + 3
},
_ => {
const { assert!($T::MAX.ilog10() < 100) };
// SAFETY: A `u256::MAX` would get exponent 77.
unsafe { core::hint::unreachable_unchecked() }
}
};
// SAFETY: All bytes up until text_len have been set.
let text = unsafe { buf[..text_len].assume_init_ref() };
if more_prec == 0 {
// SAFETY: Text is set with ASCII exclusively: either a decimal,
// or a LETTER_E, or a dot. ASCII implies valid UTF-8.
let as_str = unsafe { str::from_utf8_unchecked(text) };
f.pad_integral(is_nonnegative, "", as_str)
} else {
let parts = &[
numfmt::Part::Copy(&text[..coef_len]),
numfmt::Part::Zero(more_prec),
numfmt::Part::Copy(&text[coef_len..]),
];
let sign = if !is_nonnegative {
"-"
} else if f.sign_plus() {
"+"
} else {
""
};
// SAFETY: Text is set with ASCII exclusively: either a decimal,
// or a LETTER_E, or a dot. ASCII implies valid UTF-8.
unsafe { f.pad_formatted_parts(&numfmt::Formatted { sign, parts }) }
}
}
$(
#[stable(feature = "integer_exp_format", since = "1.42.0")]
impl fmt::LowerExp for $Signed {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let is_nonnegative = *self >= 0;
let n = if is_nonnegative {
*self as $T
} else {
self.unsigned_abs() as $T
};
$fmt_fn(n, is_nonnegative, false, f)
}
const _: () = {
assert!($Signed::MIN < 0, "need signed");
assert!($Unsigned::MIN == 0, "need unsigned");
assert!($Signed::BITS == $Unsigned::BITS, "need counterparts");
assert!($Signed::BITS <= $T::BITS, "need lossless conversion");
assert!($Unsigned::BITS <= $T::BITS, "need lossless conversion");
};
#[stable(feature = "integer_exp_format", since = "1.42.0")]
impl fmt::LowerExp for $Signed {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
$fmt_fn(f, self.unsigned_abs() as $T, *self >= 0, b'e')
}
#[stable(feature = "integer_exp_format", since = "1.42.0")]
impl fmt::LowerExp for $Unsigned {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
$fmt_fn(*self as $T, true, false, f)
}
})*
}
#[stable(feature = "integer_exp_format", since = "1.42.0")]
impl fmt::LowerExp for $Unsigned {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
$fmt_fn(f, *self as $T, true, b'e')
}
}
#[stable(feature = "integer_exp_format", since = "1.42.0")]
impl fmt::UpperExp for $Signed {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
$fmt_fn(f, self.unsigned_abs() as $T, *self >= 0, b'E')
}
}
#[stable(feature = "integer_exp_format", since = "1.42.0")]
impl fmt::UpperExp for $Unsigned {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
$fmt_fn(f, *self as $T, true, b'E')
}
}
)*
$(
#[stable(feature = "integer_exp_format", since = "1.42.0")]
impl fmt::UpperExp for $Signed {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let is_nonnegative = *self >= 0;
let n = if is_nonnegative {
*self as $T
} else {
self.unsigned_abs() as $T
};
$fmt_fn(n, is_nonnegative, true, f)
}
}
#[stable(feature = "integer_exp_format", since = "1.42.0")]
impl fmt::UpperExp for $Unsigned {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
$fmt_fn(*self as $T, true, true, f)
}
})*
};
}
@@ -658,10 +695,10 @@ unsafe fn _fmt_inner(self, buf: &mut [MaybeUninit<u8>]) -> usize {
remain /= 1_00_00;
let pair1 = (quad / 100) as usize;
let pair2 = (quad % 100) as usize;
buf[offset + 0].write(DEC_DIGITS_LUT[pair1 * 2 + 0]);
buf[offset + 1].write(DEC_DIGITS_LUT[pair1 * 2 + 1]);
buf[offset + 2].write(DEC_DIGITS_LUT[pair2 * 2 + 0]);
buf[offset + 3].write(DEC_DIGITS_LUT[pair2 * 2 + 1]);
buf[offset + 0].write(DECIMAL_PAIRS[pair1 * 2 + 0]);
buf[offset + 1].write(DECIMAL_PAIRS[pair1 * 2 + 1]);
buf[offset + 2].write(DECIMAL_PAIRS[pair2 * 2 + 0]);
buf[offset + 3].write(DECIMAL_PAIRS[pair2 * 2 + 1]);
}
// Format per two digits from the lookup table.
@@ -676,8 +713,8 @@ unsafe fn _fmt_inner(self, buf: &mut [MaybeUninit<u8>]) -> usize {
let pair = (remain % 100) as usize;
remain /= 100;
buf[offset + 0].write(DEC_DIGITS_LUT[pair * 2 + 0]);
buf[offset + 1].write(DEC_DIGITS_LUT[pair * 2 + 1]);
buf[offset + 0].write(DECIMAL_PAIRS[pair * 2 + 0]);
buf[offset + 1].write(DECIMAL_PAIRS[pair * 2 + 1]);
}
// Format the last remaining digit, if any.
@@ -693,7 +730,7 @@ unsafe fn _fmt_inner(self, buf: &mut [MaybeUninit<u8>]) -> usize {
// Either the compiler sees that remain < 10, or it prevents
// a boundary check up next.
let last = (remain & 15) as usize;
buf[offset].write(DEC_DIGITS_LUT[last * 2 + 1]);
buf[offset].write(DECIMAL_PAIRS[last * 2 + 1]);
// not used: remain = 0;
}
offset
@@ -792,10 +829,10 @@ fn enc_16lsd<const OFFSET: usize>(buf: &mut [MaybeUninit<u8>], n: u64) {
remain /= 1_00_00;
let pair1 = (quad / 100) as usize;
let pair2 = (quad % 100) as usize;
buf[quad_index * 4 + OFFSET + 0].write(DEC_DIGITS_LUT[pair1 * 2 + 0]);
buf[quad_index * 4 + OFFSET + 1].write(DEC_DIGITS_LUT[pair1 * 2 + 1]);
buf[quad_index * 4 + OFFSET + 2].write(DEC_DIGITS_LUT[pair2 * 2 + 0]);
buf[quad_index * 4 + OFFSET + 3].write(DEC_DIGITS_LUT[pair2 * 2 + 1]);
buf[quad_index * 4 + OFFSET + 0].write(DECIMAL_PAIRS[pair1 * 2 + 0]);
buf[quad_index * 4 + OFFSET + 1].write(DECIMAL_PAIRS[pair1 * 2 + 1]);
buf[quad_index * 4 + OFFSET + 2].write(DECIMAL_PAIRS[pair2 * 2 + 0]);
buf[quad_index * 4 + OFFSET + 3].write(DECIMAL_PAIRS[pair2 * 2 + 1]);
}
}
+24
View File
@@ -342,3 +342,27 @@ fn write_i128_hex(bh: &mut Bencher) {
black_box(&mut buf).clear();
});
}
#[bench]
fn write_i64_exp(bh: &mut Bencher) {
let mut buf = String::with_capacity(1024);
bh.iter(|| {
write!(black_box(&mut buf), "{:e}", black_box(0_i64)).unwrap();
write!(black_box(&mut buf), "{:e}", black_box(100_i64)).unwrap();
write!(black_box(&mut buf), "{:e}", black_box(-100_i64)).unwrap();
write!(black_box(&mut buf), "{:e}", black_box(1_i64 << 32)).unwrap();
black_box(&mut buf).clear();
});
}
#[bench]
fn write_i128_exp(bh: &mut Bencher) {
let mut buf = String::with_capacity(1024);
bh.iter(|| {
write!(black_box(&mut buf), "{:e}", black_box(0_i128)).unwrap();
write!(black_box(&mut buf), "{:e}", black_box(100_i128)).unwrap();
write!(black_box(&mut buf), "{:e}", black_box(-100_i128)).unwrap();
write!(black_box(&mut buf), "{:e}", black_box(1_i128 << 64)).unwrap();
black_box(&mut buf).clear();
});
}