From b963d298528db5881c1c594bb4aed8ffb1f57cbb Mon Sep 17 00:00:00 2001 From: sayantn Date: Tue, 11 Nov 2025 03:43:37 +0530 Subject: [PATCH] Use ordered reduction intrinsics for integer reductions only ordered intrinsics have implementation in rustc-const-eval --- .../crates/core_arch/src/x86/avx512bw.rs | 32 +++++++++---------- .../crates/core_arch/src/x86/avx512f.rs | 30 ++++++++--------- 2 files changed, 30 insertions(+), 32 deletions(-) diff --git a/library/stdarch/crates/core_arch/src/x86/avx512bw.rs b/library/stdarch/crates/core_arch/src/x86/avx512bw.rs index 5d17d42532f1..c76ec3cb290e 100644 --- a/library/stdarch/crates/core_arch/src/x86/avx512bw.rs +++ b/library/stdarch/crates/core_arch/src/x86/avx512bw.rs @@ -4778,7 +4778,7 @@ pub fn _mm_mask_cmp_epi8_mask(k1: __mmask16, a: __m128i, b: __m #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] pub fn _mm256_reduce_add_epi16(a: __m256i) -> i16 { - unsafe { simd_reduce_add_unordered(a.as_i16x16()) } + unsafe { simd_reduce_add_ordered(a.as_i16x16(), 0) } } /// Reduce the packed 16-bit integers in a by addition using mask k. Returns the sum of all active elements in a. @@ -4788,7 +4788,7 @@ pub fn _mm256_reduce_add_epi16(a: __m256i) -> i16 { #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] pub fn _mm256_mask_reduce_add_epi16(k: __mmask16, a: __m256i) -> i16 { - unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i16x16(), i16x16::ZERO)) } + unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i16x16(), i16x16::ZERO), 0) } } /// Reduce the packed 16-bit integers in a by addition. Returns the sum of all elements in a. @@ -4798,7 +4798,7 @@ pub fn _mm256_mask_reduce_add_epi16(k: __mmask16, a: __m256i) -> i16 { #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] pub fn _mm_reduce_add_epi16(a: __m128i) -> i16 { - unsafe { simd_reduce_add_unordered(a.as_i16x8()) } + unsafe { simd_reduce_add_ordered(a.as_i16x8(), 0) } } /// Reduce the packed 16-bit integers in a by addition using mask k. Returns the sum of all active elements in a. @@ -4808,7 +4808,7 @@ pub fn _mm_reduce_add_epi16(a: __m128i) -> i16 { #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] pub fn _mm_mask_reduce_add_epi16(k: __mmask8, a: __m128i) -> i16 { - unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i16x8(), i16x8::ZERO)) } + unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i16x8(), i16x8::ZERO), 0) } } /// Reduce the packed 8-bit integers in a by addition. Returns the sum of all elements in a. @@ -4818,7 +4818,7 @@ pub fn _mm_mask_reduce_add_epi16(k: __mmask8, a: __m128i) -> i16 { #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] pub fn _mm256_reduce_add_epi8(a: __m256i) -> i8 { - unsafe { simd_reduce_add_unordered(a.as_i8x32()) } + unsafe { simd_reduce_add_ordered(a.as_i8x32(), 0) } } /// Reduce the packed 8-bit integers in a by addition using mask k. Returns the sum of all active elements in a. @@ -4828,7 +4828,7 @@ pub fn _mm256_reduce_add_epi8(a: __m256i) -> i8 { #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] pub fn _mm256_mask_reduce_add_epi8(k: __mmask32, a: __m256i) -> i8 { - unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i8x32(), i8x32::ZERO)) } + unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i8x32(), i8x32::ZERO), 0) } } /// Reduce the packed 8-bit integers in a by addition. Returns the sum of all elements in a. @@ -4838,7 +4838,7 @@ pub fn _mm256_mask_reduce_add_epi8(k: __mmask32, a: __m256i) -> i8 { #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] pub fn _mm_reduce_add_epi8(a: __m128i) -> i8 { - unsafe { simd_reduce_add_unordered(a.as_i8x16()) } + unsafe { simd_reduce_add_ordered(a.as_i8x16(), 0) } } /// Reduce the packed 8-bit integers in a by addition using mask k. Returns the sum of all active elements in a. @@ -4848,7 +4848,7 @@ pub fn _mm_reduce_add_epi8(a: __m128i) -> i8 { #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] pub fn _mm_mask_reduce_add_epi8(k: __mmask16, a: __m128i) -> i8 { - unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i8x16(), i8x16::ZERO)) } + unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i8x16(), i8x16::ZERO), 0) } } /// Reduce the packed 16-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a. @@ -5282,7 +5282,7 @@ pub fn _mm_mask_reduce_min_epu8(k: __mmask16, a: __m128i) -> u8 { #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] pub fn _mm256_reduce_mul_epi16(a: __m256i) -> i16 { - unsafe { simd_reduce_mul_unordered(a.as_i16x16()) } + unsafe { simd_reduce_mul_ordered(a.as_i16x16(), 1) } } /// Reduce the packed 16-bit integers in a by multiplication using mask k. Returns the product of all active elements in a. @@ -5292,7 +5292,7 @@ pub fn _mm256_reduce_mul_epi16(a: __m256i) -> i16 { #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] pub fn _mm256_mask_reduce_mul_epi16(k: __mmask16, a: __m256i) -> i16 { - unsafe { simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(1))) } + unsafe { simd_reduce_mul_ordered(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(1)), 1) } } /// Reduce the packed 16-bit integers in a by multiplication. Returns the product of all elements in a. @@ -5302,7 +5302,7 @@ pub fn _mm256_mask_reduce_mul_epi16(k: __mmask16, a: __m256i) -> i16 { #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] pub fn _mm_reduce_mul_epi16(a: __m128i) -> i16 { - unsafe { simd_reduce_mul_unordered(a.as_i16x8()) } + unsafe { simd_reduce_mul_ordered(a.as_i16x8(), 1) } } /// Reduce the packed 16-bit integers in a by multiplication using mask k. Returns the product of all active elements in a. @@ -5312,7 +5312,7 @@ pub fn _mm_reduce_mul_epi16(a: __m128i) -> i16 { #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] pub fn _mm_mask_reduce_mul_epi16(k: __mmask8, a: __m128i) -> i16 { - unsafe { simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(1))) } + unsafe { simd_reduce_mul_ordered(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(1)), 1) } } /// Reduce the packed 8-bit integers in a by multiplication. Returns the product of all elements in a. @@ -5322,7 +5322,7 @@ pub fn _mm_mask_reduce_mul_epi16(k: __mmask8, a: __m128i) -> i16 { #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] pub fn _mm256_reduce_mul_epi8(a: __m256i) -> i8 { - unsafe { simd_reduce_mul_unordered(a.as_i8x32()) } + unsafe { simd_reduce_mul_ordered(a.as_i8x32(), 1) } } /// Reduce the packed 8-bit integers in a by multiplication using mask k. Returns the product of all active elements in a. @@ -5332,7 +5332,7 @@ pub fn _mm256_reduce_mul_epi8(a: __m256i) -> i8 { #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] pub fn _mm256_mask_reduce_mul_epi8(k: __mmask32, a: __m256i) -> i8 { - unsafe { simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(1))) } + unsafe { simd_reduce_mul_ordered(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(1)), 1) } } /// Reduce the packed 8-bit integers in a by multiplication. Returns the product of all elements in a. @@ -5342,7 +5342,7 @@ pub fn _mm256_mask_reduce_mul_epi8(k: __mmask32, a: __m256i) -> i8 { #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] pub fn _mm_reduce_mul_epi8(a: __m128i) -> i8 { - unsafe { simd_reduce_mul_unordered(a.as_i8x16()) } + unsafe { simd_reduce_mul_ordered(a.as_i8x16(), 1) } } /// Reduce the packed 8-bit integers in a by multiplication using mask k. Returns the product of all active elements in a. @@ -5352,7 +5352,7 @@ pub fn _mm_reduce_mul_epi8(a: __m128i) -> i8 { #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] pub fn _mm_mask_reduce_mul_epi8(k: __mmask16, a: __m128i) -> i8 { - unsafe { simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(1))) } + unsafe { simd_reduce_mul_ordered(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(1)), 1) } } /// Reduce the packed 16-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a. diff --git a/library/stdarch/crates/core_arch/src/x86/avx512f.rs b/library/stdarch/crates/core_arch/src/x86/avx512f.rs index 8d3ddbd964bb..d6b30d2d7d86 100644 --- a/library/stdarch/crates/core_arch/src/x86/avx512f.rs +++ b/library/stdarch/crates/core_arch/src/x86/avx512f.rs @@ -33101,7 +33101,7 @@ pub fn _mm_mask_cmp_epi64_mask( #[target_feature(enable = "avx512f")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] pub fn _mm512_reduce_add_epi32(a: __m512i) -> i32 { - unsafe { simd_reduce_add_unordered(a.as_i32x16()) } + unsafe { simd_reduce_add_ordered(a.as_i32x16(), 0) } } /// Reduce the packed 32-bit integers in a by addition using mask k. Returns the sum of all active elements in a. @@ -33111,7 +33111,7 @@ pub fn _mm512_reduce_add_epi32(a: __m512i) -> i32 { #[target_feature(enable = "avx512f")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] pub fn _mm512_mask_reduce_add_epi32(k: __mmask16, a: __m512i) -> i32 { - unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i32x16(), i32x16::ZERO)) } + unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i32x16(), i32x16::ZERO), 0) } } /// Reduce the packed 64-bit integers in a by addition. Returns the sum of all elements in a. @@ -33121,7 +33121,7 @@ pub fn _mm512_mask_reduce_add_epi32(k: __mmask16, a: __m512i) -> i32 { #[target_feature(enable = "avx512f")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] pub fn _mm512_reduce_add_epi64(a: __m512i) -> i64 { - unsafe { simd_reduce_add_unordered(a.as_i64x8()) } + unsafe { simd_reduce_add_ordered(a.as_i64x8(), 0) } } /// Reduce the packed 64-bit integers in a by addition using mask k. Returns the sum of all active elements in a. @@ -33131,7 +33131,7 @@ pub fn _mm512_reduce_add_epi64(a: __m512i) -> i64 { #[target_feature(enable = "avx512f")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] pub fn _mm512_mask_reduce_add_epi64(k: __mmask8, a: __m512i) -> i64 { - unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i64x8(), i64x8::ZERO)) } + unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i64x8(), i64x8::ZERO), 0) } } /// Reduce the packed single-precision (32-bit) floating-point elements in a by addition. Returns the sum of all elements in a. @@ -33197,7 +33197,7 @@ pub fn _mm512_mask_reduce_add_pd(k: __mmask8, a: __m512d) -> f64 { #[target_feature(enable = "avx512f")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] pub fn _mm512_reduce_mul_epi32(a: __m512i) -> i32 { - unsafe { simd_reduce_mul_unordered(a.as_i32x16()) } + unsafe { simd_reduce_mul_ordered(a.as_i32x16(), 1) } } /// Reduce the packed 32-bit integers in a by multiplication using mask k. Returns the product of all active elements in a. @@ -33208,11 +33208,10 @@ pub fn _mm512_reduce_mul_epi32(a: __m512i) -> i32 { #[stable(feature = "stdarch_x86_avx512", since = "1.89")] pub fn _mm512_mask_reduce_mul_epi32(k: __mmask16, a: __m512i) -> i32 { unsafe { - simd_reduce_mul_unordered(simd_select_bitmask( - k, - a.as_i32x16(), - _mm512_set1_epi32(1).as_i32x16(), - )) + simd_reduce_mul_ordered( + simd_select_bitmask(k, a.as_i32x16(), _mm512_set1_epi32(1).as_i32x16()), + 1, + ) } } @@ -33223,7 +33222,7 @@ pub fn _mm512_mask_reduce_mul_epi32(k: __mmask16, a: __m512i) -> i32 { #[target_feature(enable = "avx512f")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] pub fn _mm512_reduce_mul_epi64(a: __m512i) -> i64 { - unsafe { simd_reduce_mul_unordered(a.as_i64x8()) } + unsafe { simd_reduce_mul_ordered(a.as_i64x8(), 1) } } /// Reduce the packed 64-bit integers in a by multiplication using mask k. Returns the product of all active elements in a. @@ -33234,11 +33233,10 @@ pub fn _mm512_reduce_mul_epi64(a: __m512i) -> i64 { #[stable(feature = "stdarch_x86_avx512", since = "1.89")] pub fn _mm512_mask_reduce_mul_epi64(k: __mmask8, a: __m512i) -> i64 { unsafe { - simd_reduce_mul_unordered(simd_select_bitmask( - k, - a.as_i64x8(), - _mm512_set1_epi64(1).as_i64x8(), - )) + simd_reduce_mul_ordered( + simd_select_bitmask(k, a.as_i64x8(), _mm512_set1_epi64(1).as_i64x8()), + 1, + ) } }