Make avx2 functions const

This commit is contained in:
sayantn
2025-09-23 19:13:18 +05:30
parent 0864ad202c
commit 2d768ea921
+399 -262
View File
@@ -31,7 +31,8 @@
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpabsd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_abs_epi32(a: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_abs_epi32(a: __m256i) -> __m256i {
unsafe {
let a = a.as_i32x8();
let r = simd_select::<m32x8, _>(simd_lt(a, i32x8::ZERO), simd_neg(a), a);
@@ -46,7 +47,8 @@ pub fn _mm256_abs_epi32(a: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpabsw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_abs_epi16(a: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_abs_epi16(a: __m256i) -> __m256i {
unsafe {
let a = a.as_i16x16();
let r = simd_select::<m16x16, _>(simd_lt(a, i16x16::ZERO), simd_neg(a), a);
@@ -61,7 +63,8 @@ pub fn _mm256_abs_epi16(a: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpabsb))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_abs_epi8(a: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_abs_epi8(a: __m256i) -> __m256i {
unsafe {
let a = a.as_i8x32();
let r = simd_select::<m8x32, _>(simd_lt(a, i8x32::ZERO), simd_neg(a), a);
@@ -76,7 +79,8 @@ pub fn _mm256_abs_epi8(a: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpaddq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_add_epi64(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_add_epi64(a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute(simd_add(a.as_i64x4(), b.as_i64x4())) }
}
@@ -87,7 +91,8 @@ pub fn _mm256_add_epi64(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpaddd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_add_epi32(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_add_epi32(a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute(simd_add(a.as_i32x8(), b.as_i32x8())) }
}
@@ -98,7 +103,8 @@ pub fn _mm256_add_epi32(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpaddw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_add_epi16(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_add_epi16(a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute(simd_add(a.as_i16x16(), b.as_i16x16())) }
}
@@ -109,7 +115,8 @@ pub fn _mm256_add_epi16(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpaddb))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_add_epi8(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_add_epi8(a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute(simd_add(a.as_i8x32(), b.as_i8x32())) }
}
@@ -120,7 +127,8 @@ pub fn _mm256_add_epi8(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpaddsb))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_adds_epi8(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_adds_epi8(a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute(simd_saturating_add(a.as_i8x32(), b.as_i8x32())) }
}
@@ -131,7 +139,8 @@ pub fn _mm256_adds_epi8(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpaddsw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_adds_epi16(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_adds_epi16(a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute(simd_saturating_add(a.as_i16x16(), b.as_i16x16())) }
}
@@ -142,7 +151,8 @@ pub fn _mm256_adds_epi16(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpaddusb))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_adds_epu8(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_adds_epu8(a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute(simd_saturating_add(a.as_u8x32(), b.as_u8x32())) }
}
@@ -153,7 +163,8 @@ pub fn _mm256_adds_epu8(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpaddusw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_adds_epu16(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_adds_epu16(a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute(simd_saturating_add(a.as_u16x16(), b.as_u16x16())) }
}
@@ -166,7 +177,8 @@ pub fn _mm256_adds_epu16(a: __m256i, b: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 7))]
#[rustc_legacy_const_generics(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_alignr_epi8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_alignr_epi8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
static_assert_uimm_bits!(IMM8, 8);
// If palignr is shifting the pair of vectors more than the size of two
@@ -247,7 +259,8 @@ const fn mask(shift: u32, i: u32) -> u32 {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vandps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_and_si256(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_and_si256(a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute(simd_and(a.as_i64x4(), b.as_i64x4())) }
}
@@ -259,7 +272,8 @@ pub fn _mm256_and_si256(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vandnps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_andnot_si256(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_andnot_si256(a: __m256i, b: __m256i) -> __m256i {
unsafe {
let all_ones = _mm256_set1_epi8(-1);
transmute(simd_and(
@@ -276,7 +290,8 @@ pub fn _mm256_andnot_si256(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpavgw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_avg_epu16(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_avg_epu16(a: __m256i, b: __m256i) -> __m256i {
unsafe {
let a = simd_cast::<_, u32x16>(a.as_u16x16());
let b = simd_cast::<_, u32x16>(b.as_u16x16());
@@ -292,7 +307,8 @@ pub fn _mm256_avg_epu16(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpavgb))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_avg_epu8(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_avg_epu8(a: __m256i, b: __m256i) -> __m256i {
unsafe {
let a = simd_cast::<_, u16x32>(a.as_u8x32());
let b = simd_cast::<_, u16x32>(b.as_u8x32());
@@ -309,7 +325,8 @@ pub fn _mm256_avg_epu8(a: __m256i, b: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vblendps, IMM4 = 9))]
#[rustc_legacy_const_generics(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm_blend_epi32<const IMM4: i32>(a: __m128i, b: __m128i) -> __m128i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm_blend_epi32<const IMM4: i32>(a: __m128i, b: __m128i) -> __m128i {
static_assert_uimm_bits!(IMM4, 4);
unsafe {
let a = a.as_i32x4();
@@ -336,7 +353,8 @@ pub fn _mm_blend_epi32<const IMM4: i32>(a: __m128i, b: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(vblendps, IMM8 = 9))]
#[rustc_legacy_const_generics(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_blend_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_blend_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
static_assert_uimm_bits!(IMM8, 8);
unsafe {
let a = a.as_i32x8();
@@ -367,7 +385,8 @@ pub fn _mm256_blend_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpblendw, IMM8 = 9))]
#[rustc_legacy_const_generics(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_blend_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_blend_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
static_assert_uimm_bits!(IMM8, 8);
unsafe {
let a = a.as_i16x16();
@@ -406,7 +425,8 @@ pub fn _mm256_blend_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpblendvb))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_blendv_epi8(a: __m256i, b: __m256i, mask: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_blendv_epi8(a: __m256i, b: __m256i, mask: __m256i) -> __m256i {
unsafe {
let mask: i8x32 = simd_lt(mask.as_i8x32(), i8x32::ZERO);
transmute(simd_select(mask, b.as_i8x32(), a.as_i8x32()))
@@ -421,7 +441,8 @@ pub fn _mm256_blendv_epi8(a: __m256i, b: __m256i, mask: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpbroadcastb))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm_broadcastb_epi8(a: __m128i) -> __m128i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm_broadcastb_epi8(a: __m128i) -> __m128i {
unsafe {
let ret = simd_shuffle!(a.as_i8x16(), i8x16::ZERO, [0_u32; 16]);
transmute::<i8x16, _>(ret)
@@ -436,7 +457,8 @@ pub fn _mm_broadcastb_epi8(a: __m128i) -> __m128i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpbroadcastb))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_broadcastb_epi8(a: __m128i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_broadcastb_epi8(a: __m128i) -> __m256i {
unsafe {
let ret = simd_shuffle!(a.as_i8x16(), i8x16::ZERO, [0_u32; 32]);
transmute::<i8x32, _>(ret)
@@ -453,7 +475,8 @@ pub fn _mm256_broadcastb_epi8(a: __m128i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vbroadcastss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm_broadcastd_epi32(a: __m128i) -> __m128i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm_broadcastd_epi32(a: __m128i) -> __m128i {
unsafe {
let ret = simd_shuffle!(a.as_i32x4(), i32x4::ZERO, [0_u32; 4]);
transmute::<i32x4, _>(ret)
@@ -470,7 +493,8 @@ pub fn _mm_broadcastd_epi32(a: __m128i) -> __m128i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vbroadcastss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_broadcastd_epi32(a: __m128i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_broadcastd_epi32(a: __m128i) -> __m256i {
unsafe {
let ret = simd_shuffle!(a.as_i32x4(), i32x4::ZERO, [0_u32; 8]);
transmute::<i32x8, _>(ret)
@@ -487,7 +511,8 @@ pub fn _mm256_broadcastd_epi32(a: __m128i) -> __m256i {
// See https://github.com/rust-lang/stdarch/issues/791
#[cfg_attr(test, assert_instr(vmovddup))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm_broadcastq_epi64(a: __m128i) -> __m128i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm_broadcastq_epi64(a: __m128i) -> __m128i {
unsafe {
let ret = simd_shuffle!(a.as_i64x2(), a.as_i64x2(), [0_u32; 2]);
transmute::<i64x2, _>(ret)
@@ -502,7 +527,8 @@ pub fn _mm_broadcastq_epi64(a: __m128i) -> __m128i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vbroadcastsd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_broadcastq_epi64(a: __m128i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_broadcastq_epi64(a: __m128i) -> __m256i {
unsafe {
let ret = simd_shuffle!(a.as_i64x2(), a.as_i64x2(), [0_u32; 4]);
transmute::<i64x4, _>(ret)
@@ -517,7 +543,8 @@ pub fn _mm256_broadcastq_epi64(a: __m128i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vmovddup))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm_broadcastsd_pd(a: __m128d) -> __m128d {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm_broadcastsd_pd(a: __m128d) -> __m128d {
unsafe { simd_shuffle!(a, _mm_setzero_pd(), [0_u32; 2]) }
}
@@ -529,7 +556,8 @@ pub fn _mm_broadcastsd_pd(a: __m128d) -> __m128d {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vbroadcastsd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_broadcastsd_pd(a: __m128d) -> __m256d {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_broadcastsd_pd(a: __m128d) -> __m256d {
unsafe { simd_shuffle!(a, _mm_setzero_pd(), [0_u32; 4]) }
}
@@ -540,7 +568,8 @@ pub fn _mm256_broadcastsd_pd(a: __m128d) -> __m256d {
#[inline]
#[target_feature(enable = "avx2")]
#[stable(feature = "simd_x86_updates", since = "1.82.0")]
pub fn _mm_broadcastsi128_si256(a: __m128i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm_broadcastsi128_si256(a: __m128i) -> __m256i {
unsafe {
let ret = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 1, 0, 1]);
transmute::<i64x4, _>(ret)
@@ -556,7 +585,8 @@ pub fn _mm_broadcastsi128_si256(a: __m128i) -> __m256i {
#[inline]
#[target_feature(enable = "avx2")]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_broadcastsi128_si256(a: __m128i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_broadcastsi128_si256(a: __m128i) -> __m256i {
unsafe {
let ret = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 1, 0, 1]);
transmute::<i64x4, _>(ret)
@@ -571,7 +601,8 @@ pub fn _mm256_broadcastsi128_si256(a: __m128i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vbroadcastss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm_broadcastss_ps(a: __m128) -> __m128 {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm_broadcastss_ps(a: __m128) -> __m128 {
unsafe { simd_shuffle!(a, _mm_setzero_ps(), [0_u32; 4]) }
}
@@ -583,7 +614,8 @@ pub fn _mm_broadcastss_ps(a: __m128) -> __m128 {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vbroadcastss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_broadcastss_ps(a: __m128) -> __m256 {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_broadcastss_ps(a: __m128) -> __m256 {
unsafe { simd_shuffle!(a, _mm_setzero_ps(), [0_u32; 8]) }
}
@@ -595,7 +627,8 @@ pub fn _mm256_broadcastss_ps(a: __m128) -> __m256 {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpbroadcastw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm_broadcastw_epi16(a: __m128i) -> __m128i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm_broadcastw_epi16(a: __m128i) -> __m128i {
unsafe {
let ret = simd_shuffle!(a.as_i16x8(), i16x8::ZERO, [0_u32; 8]);
transmute::<i16x8, _>(ret)
@@ -610,7 +643,8 @@ pub fn _mm_broadcastw_epi16(a: __m128i) -> __m128i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpbroadcastw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_broadcastw_epi16(a: __m128i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_broadcastw_epi16(a: __m128i) -> __m256i {
unsafe {
let ret = simd_shuffle!(a.as_i16x8(), i16x8::ZERO, [0_u32; 16]);
transmute::<i16x16, _>(ret)
@@ -624,7 +658,8 @@ pub fn _mm256_broadcastw_epi16(a: __m128i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpcmpeqq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_cmpeq_epi64(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_cmpeq_epi64(a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute::<i64x4, _>(simd_eq(a.as_i64x4(), b.as_i64x4())) }
}
@@ -635,7 +670,8 @@ pub fn _mm256_cmpeq_epi64(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpcmpeqd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_cmpeq_epi32(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_cmpeq_epi32(a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute::<i32x8, _>(simd_eq(a.as_i32x8(), b.as_i32x8())) }
}
@@ -646,7 +682,8 @@ pub fn _mm256_cmpeq_epi32(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpcmpeqw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_cmpeq_epi16(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_cmpeq_epi16(a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute::<i16x16, _>(simd_eq(a.as_i16x16(), b.as_i16x16())) }
}
@@ -657,7 +694,8 @@ pub fn _mm256_cmpeq_epi16(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpcmpeqb))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_cmpeq_epi8(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_cmpeq_epi8(a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute::<i8x32, _>(simd_eq(a.as_i8x32(), b.as_i8x32())) }
}
@@ -668,7 +706,8 @@ pub fn _mm256_cmpeq_epi8(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpcmpgtq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_cmpgt_epi64(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_cmpgt_epi64(a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute::<i64x4, _>(simd_gt(a.as_i64x4(), b.as_i64x4())) }
}
@@ -679,7 +718,8 @@ pub fn _mm256_cmpgt_epi64(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpcmpgtd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_cmpgt_epi32(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_cmpgt_epi32(a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute::<i32x8, _>(simd_gt(a.as_i32x8(), b.as_i32x8())) }
}
@@ -690,7 +730,8 @@ pub fn _mm256_cmpgt_epi32(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpcmpgtw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_cmpgt_epi16(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_cmpgt_epi16(a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute::<i16x16, _>(simd_gt(a.as_i16x16(), b.as_i16x16())) }
}
@@ -701,7 +742,8 @@ pub fn _mm256_cmpgt_epi16(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpcmpgtb))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_cmpgt_epi8(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_cmpgt_epi8(a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute::<i8x32, _>(simd_gt(a.as_i8x32(), b.as_i8x32())) }
}
@@ -712,7 +754,8 @@ pub fn _mm256_cmpgt_epi8(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmovsxwd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_cvtepi16_epi32(a: __m128i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_cvtepi16_epi32(a: __m128i) -> __m256i {
unsafe { transmute::<i32x8, _>(simd_cast(a.as_i16x8())) }
}
@@ -723,7 +766,8 @@ pub fn _mm256_cvtepi16_epi32(a: __m128i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmovsxwq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_cvtepi16_epi64(a: __m128i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_cvtepi16_epi64(a: __m128i) -> __m256i {
unsafe {
let a = a.as_i16x8();
let v64: i16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
@@ -738,7 +782,8 @@ pub fn _mm256_cvtepi16_epi64(a: __m128i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmovsxdq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_cvtepi32_epi64(a: __m128i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_cvtepi32_epi64(a: __m128i) -> __m256i {
unsafe { transmute::<i64x4, _>(simd_cast(a.as_i32x4())) }
}
@@ -749,7 +794,8 @@ pub fn _mm256_cvtepi32_epi64(a: __m128i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmovsxbw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_cvtepi8_epi16(a: __m128i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_cvtepi8_epi16(a: __m128i) -> __m256i {
unsafe { transmute::<i16x16, _>(simd_cast(a.as_i8x16())) }
}
@@ -760,7 +806,8 @@ pub fn _mm256_cvtepi8_epi16(a: __m128i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmovsxbd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_cvtepi8_epi32(a: __m128i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_cvtepi8_epi32(a: __m128i) -> __m256i {
unsafe {
let a = a.as_i8x16();
let v64: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
@@ -775,7 +822,8 @@ pub fn _mm256_cvtepi8_epi32(a: __m128i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmovsxbq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_cvtepi8_epi64(a: __m128i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_cvtepi8_epi64(a: __m128i) -> __m256i {
unsafe {
let a = a.as_i8x16();
let v32: i8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
@@ -791,7 +839,8 @@ pub fn _mm256_cvtepi8_epi64(a: __m128i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmovzxwd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_cvtepu16_epi32(a: __m128i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_cvtepu16_epi32(a: __m128i) -> __m256i {
unsafe { transmute::<i32x8, _>(simd_cast(a.as_u16x8())) }
}
@@ -803,7 +852,8 @@ pub fn _mm256_cvtepu16_epi32(a: __m128i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmovzxwq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_cvtepu16_epi64(a: __m128i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_cvtepu16_epi64(a: __m128i) -> __m256i {
unsafe {
let a = a.as_u16x8();
let v64: u16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
@@ -818,7 +868,8 @@ pub fn _mm256_cvtepu16_epi64(a: __m128i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmovzxdq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_cvtepu32_epi64(a: __m128i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_cvtepu32_epi64(a: __m128i) -> __m256i {
unsafe { transmute::<i64x4, _>(simd_cast(a.as_u32x4())) }
}
@@ -829,7 +880,8 @@ pub fn _mm256_cvtepu32_epi64(a: __m128i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmovzxbw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_cvtepu8_epi16(a: __m128i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_cvtepu8_epi16(a: __m128i) -> __m256i {
unsafe { transmute::<i16x16, _>(simd_cast(a.as_u8x16())) }
}
@@ -841,7 +893,8 @@ pub fn _mm256_cvtepu8_epi16(a: __m128i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmovzxbd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_cvtepu8_epi32(a: __m128i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_cvtepu8_epi32(a: __m128i) -> __m256i {
unsafe {
let a = a.as_u8x16();
let v64: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
@@ -857,7 +910,8 @@ pub fn _mm256_cvtepu8_epi32(a: __m128i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmovzxbq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_cvtepu8_epi64(a: __m128i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_cvtepu8_epi64(a: __m128i) -> __m256i {
unsafe {
let a = a.as_u8x16();
let v32: u8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
@@ -873,7 +927,8 @@ pub fn _mm256_cvtepu8_epi64(a: __m128i) -> __m256i {
#[cfg_attr(test, assert_instr(vextractf128, IMM1 = 1))]
#[rustc_legacy_const_generics(1)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_extracti128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_extracti128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
static_assert_uimm_bits!(IMM1, 1);
unsafe {
let a = a.as_i64x4();
@@ -890,7 +945,8 @@ pub fn _mm256_extracti128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vphaddw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_hadd_epi16(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_hadd_epi16(a: __m256i, b: __m256i) -> __m256i {
let a = a.as_i16x16();
let b = b.as_i16x16();
unsafe {
@@ -915,7 +971,8 @@ pub fn _mm256_hadd_epi16(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vphaddd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_hadd_epi32(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_hadd_epi32(a: __m256i, b: __m256i) -> __m256i {
let a = a.as_i32x8();
let b = b.as_i32x8();
unsafe {
@@ -944,7 +1001,8 @@ pub fn _mm256_hadds_epi16(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vphsubw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_hsub_epi16(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_hsub_epi16(a: __m256i, b: __m256i) -> __m256i {
let a = a.as_i16x16();
let b = b.as_i16x16();
unsafe {
@@ -969,7 +1027,8 @@ pub fn _mm256_hsub_epi16(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vphsubd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_hsub_epi32(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_hsub_epi32(a: __m256i, b: __m256i) -> __m256i {
let a = a.as_i32x8();
let b = b.as_i32x8();
unsafe {
@@ -1734,7 +1793,8 @@ pub unsafe fn _mm256_mask_i64gather_pd<const SCALE: i32>(
#[cfg_attr(test, assert_instr(vinsertf128, IMM1 = 1))]
#[rustc_legacy_const_generics(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_inserti128_si256<const IMM1: i32>(a: __m256i, b: __m128i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_inserti128_si256<const IMM1: i32>(a: __m256i, b: __m128i) -> __m256i {
static_assert_uimm_bits!(IMM1, 1);
unsafe {
let a = a.as_i64x4();
@@ -1753,7 +1813,8 @@ pub fn _mm256_inserti128_si256<const IMM1: i32>(a: __m256i, b: __m128i) -> __m25
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmaddwd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_madd_epi16(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_madd_epi16(a: __m256i, b: __m256i) -> __m256i {
unsafe {
let r: i32x16 = simd_mul(simd_cast(a.as_i16x16()), simd_cast(b.as_i16x16()));
let even: i32x8 = simd_shuffle!(r, r, [0, 2, 4, 6, 8, 10, 12, 14]);
@@ -1785,7 +1846,8 @@ pub fn _mm256_maddubs_epi16(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmaskmovd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_maskload_epi32(mem_addr: *const i32, mask: __m128i) -> __m128i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const unsafe fn _mm_maskload_epi32(mem_addr: *const i32, mask: __m128i) -> __m128i {
let mask = simd_shr(mask.as_i32x4(), i32x4::splat(31));
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i32x4::ZERO).as_m128i()
}
@@ -1799,7 +1861,8 @@ pub unsafe fn _mm_maskload_epi32(mem_addr: *const i32, mask: __m128i) -> __m128i
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmaskmovd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_maskload_epi32(mem_addr: *const i32, mask: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const unsafe fn _mm256_maskload_epi32(mem_addr: *const i32, mask: __m256i) -> __m256i {
let mask = simd_shr(mask.as_i32x8(), i32x8::splat(31));
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i32x8::ZERO).as_m256i()
}
@@ -1813,7 +1876,8 @@ pub unsafe fn _mm256_maskload_epi32(mem_addr: *const i32, mask: __m256i) -> __m2
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmaskmovq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_maskload_epi64(mem_addr: *const i64, mask: __m128i) -> __m128i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const unsafe fn _mm_maskload_epi64(mem_addr: *const i64, mask: __m128i) -> __m128i {
let mask = simd_shr(mask.as_i64x2(), i64x2::splat(63));
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i64x2::ZERO).as_m128i()
}
@@ -1827,7 +1891,8 @@ pub unsafe fn _mm_maskload_epi64(mem_addr: *const i64, mask: __m128i) -> __m128i
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmaskmovq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_maskload_epi64(mem_addr: *const i64, mask: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const unsafe fn _mm256_maskload_epi64(mem_addr: *const i64, mask: __m256i) -> __m256i {
let mask = simd_shr(mask.as_i64x4(), i64x4::splat(63));
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i64x4::ZERO).as_m256i()
}
@@ -1841,7 +1906,8 @@ pub unsafe fn _mm256_maskload_epi64(mem_addr: *const i64, mask: __m256i) -> __m2
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmaskmovd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_maskstore_epi32(mem_addr: *mut i32, mask: __m128i, a: __m128i) {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const unsafe fn _mm_maskstore_epi32(mem_addr: *mut i32, mask: __m128i, a: __m128i) {
let mask = simd_shr(mask.as_i32x4(), i32x4::splat(31));
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x4())
}
@@ -1855,7 +1921,8 @@ pub unsafe fn _mm_maskstore_epi32(mem_addr: *mut i32, mask: __m128i, a: __m128i)
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmaskmovd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_maskstore_epi32(mem_addr: *mut i32, mask: __m256i, a: __m256i) {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const unsafe fn _mm256_maskstore_epi32(mem_addr: *mut i32, mask: __m256i, a: __m256i) {
let mask = simd_shr(mask.as_i32x8(), i32x8::splat(31));
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x8())
}
@@ -1869,7 +1936,8 @@ pub unsafe fn _mm256_maskstore_epi32(mem_addr: *mut i32, mask: __m256i, a: __m25
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmaskmovq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_maskstore_epi64(mem_addr: *mut i64, mask: __m128i, a: __m128i) {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const unsafe fn _mm_maskstore_epi64(mem_addr: *mut i64, mask: __m128i, a: __m128i) {
let mask = simd_shr(mask.as_i64x2(), i64x2::splat(63));
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x2())
}
@@ -1883,7 +1951,8 @@ pub unsafe fn _mm_maskstore_epi64(mem_addr: *mut i64, mask: __m128i, a: __m128i)
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmaskmovq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_maskstore_epi64(mem_addr: *mut i64, mask: __m256i, a: __m256i) {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const unsafe fn _mm256_maskstore_epi64(mem_addr: *mut i64, mask: __m256i, a: __m256i) {
let mask = simd_shr(mask.as_i64x4(), i64x4::splat(63));
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x4())
}
@@ -1896,7 +1965,8 @@ pub unsafe fn _mm256_maskstore_epi64(mem_addr: *mut i64, mask: __m256i, a: __m25
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmaxsw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_max_epi16(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_max_epi16(a: __m256i, b: __m256i) -> __m256i {
unsafe { simd_imax(a.as_i16x16(), b.as_i16x16()).as_m256i() }
}
@@ -1908,7 +1978,8 @@ pub fn _mm256_max_epi16(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmaxsd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_max_epi32(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_max_epi32(a: __m256i, b: __m256i) -> __m256i {
unsafe { simd_imax(a.as_i32x8(), b.as_i32x8()).as_m256i() }
}
@@ -1920,7 +1991,8 @@ pub fn _mm256_max_epi32(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmaxsb))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_max_epi8(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_max_epi8(a: __m256i, b: __m256i) -> __m256i {
unsafe { simd_imax(a.as_i8x32(), b.as_i8x32()).as_m256i() }
}
@@ -1932,7 +2004,8 @@ pub fn _mm256_max_epi8(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmaxuw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_max_epu16(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_max_epu16(a: __m256i, b: __m256i) -> __m256i {
unsafe { simd_imax(a.as_u16x16(), b.as_u16x16()).as_m256i() }
}
@@ -1944,7 +2017,8 @@ pub fn _mm256_max_epu16(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmaxud))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_max_epu32(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_max_epu32(a: __m256i, b: __m256i) -> __m256i {
unsafe { simd_imax(a.as_u32x8(), b.as_u32x8()).as_m256i() }
}
@@ -1956,7 +2030,8 @@ pub fn _mm256_max_epu32(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmaxub))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_max_epu8(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_max_epu8(a: __m256i, b: __m256i) -> __m256i {
unsafe { simd_imax(a.as_u8x32(), b.as_u8x32()).as_m256i() }
}
@@ -1968,7 +2043,8 @@ pub fn _mm256_max_epu8(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpminsw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_min_epi16(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_min_epi16(a: __m256i, b: __m256i) -> __m256i {
unsafe { simd_imin(a.as_i16x16(), b.as_i16x16()).as_m256i() }
}
@@ -1980,7 +2056,8 @@ pub fn _mm256_min_epi16(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpminsd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_min_epi32(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_min_epi32(a: __m256i, b: __m256i) -> __m256i {
unsafe { simd_imin(a.as_i32x8(), b.as_i32x8()).as_m256i() }
}
@@ -1992,7 +2069,8 @@ pub fn _mm256_min_epi32(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpminsb))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_min_epi8(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_min_epi8(a: __m256i, b: __m256i) -> __m256i {
unsafe { simd_imin(a.as_i8x32(), b.as_i8x32()).as_m256i() }
}
@@ -2004,7 +2082,8 @@ pub fn _mm256_min_epi8(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpminuw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_min_epu16(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_min_epu16(a: __m256i, b: __m256i) -> __m256i {
unsafe { simd_imin(a.as_u16x16(), b.as_u16x16()).as_m256i() }
}
@@ -2016,7 +2095,8 @@ pub fn _mm256_min_epu16(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpminud))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_min_epu32(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_min_epu32(a: __m256i, b: __m256i) -> __m256i {
unsafe { simd_imin(a.as_u32x8(), b.as_u32x8()).as_m256i() }
}
@@ -2028,7 +2108,8 @@ pub fn _mm256_min_epu32(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpminub))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_min_epu8(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_min_epu8(a: __m256i, b: __m256i) -> __m256i {
unsafe { simd_imin(a.as_u8x32(), b.as_u8x32()).as_m256i() }
}
@@ -2040,7 +2121,8 @@ pub fn _mm256_min_epu8(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmovmskb))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_movemask_epi8(a: __m256i) -> i32 {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_movemask_epi8(a: __m256i) -> i32 {
unsafe {
let z = i8x32::ZERO;
let m: i8x32 = simd_lt(a.as_i8x32(), z);
@@ -2077,7 +2159,8 @@ pub fn _mm256_mpsadbw_epu8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmuldq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_mul_epi32(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_mul_epi32(a: __m256i, b: __m256i) -> __m256i {
unsafe {
let a = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(a.as_i64x4()));
let b = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(b.as_i64x4()));
@@ -2095,11 +2178,12 @@ pub fn _mm256_mul_epi32(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmuludq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_mul_epu32(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_mul_epu32(a: __m256i, b: __m256i) -> __m256i {
unsafe {
let a = a.as_u64x4();
let b = b.as_u64x4();
let mask = u64x4::splat(u32::MAX.into());
let mask = u64x4::splat(u32::MAX as u64);
transmute(simd_mul(simd_and(a, mask), simd_and(b, mask)))
}
}
@@ -2113,7 +2197,8 @@ pub fn _mm256_mul_epu32(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmulhw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_mulhi_epi16(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_mulhi_epi16(a: __m256i, b: __m256i) -> __m256i {
unsafe {
let a = simd_cast::<_, i32x16>(a.as_i16x16());
let b = simd_cast::<_, i32x16>(b.as_i16x16());
@@ -2131,7 +2216,8 @@ pub fn _mm256_mulhi_epi16(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmulhuw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_mulhi_epu16(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_mulhi_epu16(a: __m256i, b: __m256i) -> __m256i {
unsafe {
let a = simd_cast::<_, u32x16>(a.as_u16x16());
let b = simd_cast::<_, u32x16>(b.as_u16x16());
@@ -2149,7 +2235,8 @@ pub fn _mm256_mulhi_epu16(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmullw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_mullo_epi16(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_mullo_epi16(a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute(simd_mul(a.as_i16x16(), b.as_i16x16())) }
}
@@ -2162,7 +2249,8 @@ pub fn _mm256_mullo_epi16(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpmulld))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_mullo_epi32(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_mullo_epi32(a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute(simd_mul(a.as_i32x8(), b.as_i32x8())) }
}
@@ -2188,7 +2276,8 @@ pub fn _mm256_mulhrs_epi16(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vorps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_or_si256(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_or_si256(a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute(simd_or(a.as_i32x8(), b.as_i32x8())) }
}
@@ -2262,7 +2351,8 @@ pub fn _mm256_permutevar8x32_epi32(a: __m256i, b: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpermpd, IMM8 = 9))]
#[rustc_legacy_const_generics(1)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_permute4x64_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_permute4x64_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
static_assert_uimm_bits!(IMM8, 8);
unsafe {
let zero = i64x4::ZERO;
@@ -2288,7 +2378,8 @@ pub fn _mm256_permute4x64_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 9))]
#[rustc_legacy_const_generics(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_permute2x128_si256<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_permute2x128_si256<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
static_assert_uimm_bits!(IMM8, 8);
_mm256_permute2f128_si256::<IMM8>(a, b)
}
@@ -2302,7 +2393,8 @@ pub fn _mm256_permute2x128_si256<const IMM8: i32>(a: __m256i, b: __m256i) -> __m
#[cfg_attr(test, assert_instr(vpermpd, IMM8 = 1))]
#[rustc_legacy_const_generics(1)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_permute4x64_pd<const IMM8: i32>(a: __m256d) -> __m256d {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_permute4x64_pd<const IMM8: i32>(a: __m256d) -> __m256d {
static_assert_uimm_bits!(IMM8, 8);
unsafe {
simd_shuffle!(
@@ -2417,7 +2509,8 @@ pub fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vshufps, MASK = 9))]
#[rustc_legacy_const_generics(1)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_shuffle_epi32<const MASK: i32>(a: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_shuffle_epi32<const MASK: i32>(a: __m256i) -> __m256i {
static_assert_uimm_bits!(MASK, 8);
unsafe {
let r: i32x8 = simd_shuffle!(
@@ -2448,7 +2541,8 @@ pub fn _mm256_shuffle_epi32<const MASK: i32>(a: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 9))]
#[rustc_legacy_const_generics(1)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_shufflehi_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_shufflehi_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
static_assert_uimm_bits!(IMM8, 8);
unsafe {
let a = a.as_i16x16();
@@ -2488,7 +2582,8 @@ pub fn _mm256_shufflehi_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 9))]
#[rustc_legacy_const_generics(1)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_shufflelo_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_shufflelo_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
static_assert_uimm_bits!(IMM8, 8);
unsafe {
let a = a.as_i16x16();
@@ -2602,7 +2697,8 @@ pub fn _mm256_sll_epi64(a: __m256i, count: __m128i) -> __m256i {
#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 7))]
#[rustc_legacy_const_generics(1)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_slli_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_slli_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
static_assert_uimm_bits!(IMM8, 8);
unsafe {
if IMM8 >= 16 {
@@ -2622,7 +2718,8 @@ pub fn _mm256_slli_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpslld, IMM8 = 7))]
#[rustc_legacy_const_generics(1)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_slli_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_slli_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
unsafe {
static_assert_uimm_bits!(IMM8, 8);
if IMM8 >= 32 {
@@ -2642,7 +2739,8 @@ pub fn _mm256_slli_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 7))]
#[rustc_legacy_const_generics(1)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_slli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_slli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
unsafe {
static_assert_uimm_bits!(IMM8, 8);
if IMM8 >= 64 {
@@ -2661,7 +2759,8 @@ pub fn _mm256_slli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
#[rustc_legacy_const_generics(1)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_slli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_slli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
static_assert_uimm_bits!(IMM8, 8);
_mm256_bslli_epi128::<IMM8>(a)
}
@@ -2674,7 +2773,8 @@ pub fn _mm256_slli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
#[rustc_legacy_const_generics(1)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_bslli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_bslli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
static_assert_uimm_bits!(IMM8, 8);
const fn mask(shift: i32, i: u32) -> u32 {
let shift = shift as u32 & 0xff;
@@ -2737,7 +2837,8 @@ const fn mask(shift: i32, i: u32) -> u32 {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsllvd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm_sllv_epi32(a: __m128i, count: __m128i) -> __m128i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm_sllv_epi32(a: __m128i, count: __m128i) -> __m128i {
unsafe {
let count = count.as_u32x4();
let no_overflow: u32x4 = simd_lt(count, u32x4::splat(u32::BITS));
@@ -2755,7 +2856,8 @@ pub fn _mm_sllv_epi32(a: __m128i, count: __m128i) -> __m128i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsllvd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_sllv_epi32(a: __m256i, count: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_sllv_epi32(a: __m256i, count: __m256i) -> __m256i {
unsafe {
let count = count.as_u32x8();
let no_overflow: u32x8 = simd_lt(count, u32x8::splat(u32::BITS));
@@ -2773,7 +2875,8 @@ pub fn _mm256_sllv_epi32(a: __m256i, count: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsllvq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm_sllv_epi64(a: __m128i, count: __m128i) -> __m128i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm_sllv_epi64(a: __m128i, count: __m128i) -> __m128i {
unsafe {
let count = count.as_u64x2();
let no_overflow: u64x2 = simd_lt(count, u64x2::splat(u64::BITS as u64));
@@ -2791,7 +2894,8 @@ pub fn _mm_sllv_epi64(a: __m128i, count: __m128i) -> __m128i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsllvq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_sllv_epi64(a: __m256i, count: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_sllv_epi64(a: __m256i, count: __m256i) -> __m256i {
unsafe {
let count = count.as_u64x4();
let no_overflow: u64x4 = simd_lt(count, u64x4::splat(u64::BITS as u64));
@@ -2833,7 +2937,8 @@ pub fn _mm256_sra_epi32(a: __m256i, count: __m128i) -> __m256i {
#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 7))]
#[rustc_legacy_const_generics(1)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_srai_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_srai_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
static_assert_uimm_bits!(IMM8, 8);
unsafe { transmute(simd_shr(a.as_i16x16(), i16x16::splat(IMM8.min(15) as i16))) }
}
@@ -2847,7 +2952,8 @@ pub fn _mm256_srai_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 7))]
#[rustc_legacy_const_generics(1)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_srai_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_srai_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
static_assert_uimm_bits!(IMM8, 8);
unsafe { transmute(simd_shr(a.as_i32x8(), i32x8::splat(IMM8.min(31)))) }
}
@@ -2860,7 +2966,8 @@ pub fn _mm256_srai_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsravd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm_srav_epi32(a: __m128i, count: __m128i) -> __m128i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm_srav_epi32(a: __m128i, count: __m128i) -> __m128i {
unsafe {
let count = count.as_u32x4();
let no_overflow: u32x4 = simd_lt(count, u32x4::splat(u32::BITS));
@@ -2877,7 +2984,8 @@ pub fn _mm_srav_epi32(a: __m128i, count: __m128i) -> __m128i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsravd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_srav_epi32(a: __m256i, count: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_srav_epi32(a: __m256i, count: __m256i) -> __m256i {
unsafe {
let count = count.as_u32x8();
let no_overflow: u32x8 = simd_lt(count, u32x8::splat(u32::BITS));
@@ -2894,7 +3002,8 @@ pub fn _mm256_srav_epi32(a: __m256i, count: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 1))]
#[rustc_legacy_const_generics(1)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_srli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_srli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
static_assert_uimm_bits!(IMM8, 8);
_mm256_bsrli_epi128::<IMM8>(a)
}
@@ -2907,7 +3016,8 @@ pub fn _mm256_srli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 1))]
#[rustc_legacy_const_generics(1)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_bsrli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_bsrli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
static_assert_uimm_bits!(IMM8, 8);
const fn mask(shift: i32, i: u32) -> u32 {
let shift = shift as u32 & 0xff;
@@ -3006,7 +3116,8 @@ pub fn _mm256_srl_epi64(a: __m256i, count: __m128i) -> __m256i {
#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 7))]
#[rustc_legacy_const_generics(1)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_srli_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_srli_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
static_assert_uimm_bits!(IMM8, 8);
unsafe {
if IMM8 >= 16 {
@@ -3026,7 +3137,8 @@ pub fn _mm256_srli_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 7))]
#[rustc_legacy_const_generics(1)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_srli_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_srli_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
static_assert_uimm_bits!(IMM8, 8);
unsafe {
if IMM8 >= 32 {
@@ -3046,7 +3158,8 @@ pub fn _mm256_srli_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 7))]
#[rustc_legacy_const_generics(1)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_srli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_srli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
static_assert_uimm_bits!(IMM8, 8);
unsafe {
if IMM8 >= 64 {
@@ -3065,7 +3178,8 @@ pub fn _mm256_srli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsrlvd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm_srlv_epi32(a: __m128i, count: __m128i) -> __m128i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm_srlv_epi32(a: __m128i, count: __m128i) -> __m128i {
unsafe {
let count = count.as_u32x4();
let no_overflow: u32x4 = simd_lt(count, u32x4::splat(u32::BITS));
@@ -3082,7 +3196,8 @@ pub fn _mm_srlv_epi32(a: __m128i, count: __m128i) -> __m128i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsrlvd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_srlv_epi32(a: __m256i, count: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_srlv_epi32(a: __m256i, count: __m256i) -> __m256i {
unsafe {
let count = count.as_u32x8();
let no_overflow: u32x8 = simd_lt(count, u32x8::splat(u32::BITS));
@@ -3099,7 +3214,8 @@ pub fn _mm256_srlv_epi32(a: __m256i, count: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsrlvq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm_srlv_epi64(a: __m128i, count: __m128i) -> __m128i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm_srlv_epi64(a: __m128i, count: __m128i) -> __m128i {
unsafe {
let count = count.as_u64x2();
let no_overflow: u64x2 = simd_lt(count, u64x2::splat(u64::BITS as u64));
@@ -3116,7 +3232,8 @@ pub fn _mm_srlv_epi64(a: __m128i, count: __m128i) -> __m128i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsrlvq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_srlv_epi64(a: __m256i, count: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_srlv_epi64(a: __m256i, count: __m256i) -> __m256i {
unsafe {
let count = count.as_u64x4();
let no_overflow: u64x4 = simd_lt(count, u64x4::splat(u64::BITS as u64));
@@ -3152,7 +3269,8 @@ pub unsafe fn _mm256_stream_load_si256(mem_addr: *const __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsubw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_sub_epi16(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_sub_epi16(a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute(simd_sub(a.as_i16x16(), b.as_i16x16())) }
}
@@ -3163,7 +3281,8 @@ pub fn _mm256_sub_epi16(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsubd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_sub_epi32(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_sub_epi32(a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute(simd_sub(a.as_i32x8(), b.as_i32x8())) }
}
@@ -3174,7 +3293,8 @@ pub fn _mm256_sub_epi32(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsubq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_sub_epi64(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_sub_epi64(a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute(simd_sub(a.as_i64x4(), b.as_i64x4())) }
}
@@ -3185,7 +3305,8 @@ pub fn _mm256_sub_epi64(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsubb))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_sub_epi8(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_sub_epi8(a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute(simd_sub(a.as_i8x32(), b.as_i8x32())) }
}
@@ -3197,7 +3318,8 @@ pub fn _mm256_sub_epi8(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsubsw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_subs_epi16(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_subs_epi16(a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute(simd_saturating_sub(a.as_i16x16(), b.as_i16x16())) }
}
@@ -3209,7 +3331,8 @@ pub fn _mm256_subs_epi16(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsubsb))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_subs_epi8(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_subs_epi8(a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute(simd_saturating_sub(a.as_i8x32(), b.as_i8x32())) }
}
@@ -3221,7 +3344,8 @@ pub fn _mm256_subs_epi8(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsubusw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_subs_epu16(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_subs_epu16(a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute(simd_saturating_sub(a.as_u16x16(), b.as_u16x16())) }
}
@@ -3233,7 +3357,8 @@ pub fn _mm256_subs_epu16(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpsubusb))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_subs_epu8(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_subs_epu8(a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute(simd_saturating_sub(a.as_u8x32(), b.as_u8x32())) }
}
@@ -3280,7 +3405,8 @@ pub fn _mm256_subs_epu8(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpunpckhbw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_unpackhi_epi8(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_unpackhi_epi8(a: __m256i, b: __m256i) -> __m256i {
unsafe {
#[rustfmt::skip]
let r: i8x32 = simd_shuffle!(a.as_i8x32(), b.as_i8x32(), [
@@ -3335,7 +3461,8 @@ pub fn _mm256_unpackhi_epi8(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpunpcklbw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_unpacklo_epi8(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_unpacklo_epi8(a: __m256i, b: __m256i) -> __m256i {
unsafe {
#[rustfmt::skip]
let r: i8x32 = simd_shuffle!(a.as_i8x32(), b.as_i8x32(), [
@@ -3386,7 +3513,8 @@ pub fn _mm256_unpacklo_epi8(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpunpckhwd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_unpackhi_epi16(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_unpackhi_epi16(a: __m256i, b: __m256i) -> __m256i {
unsafe {
let r: i16x16 = simd_shuffle!(
a.as_i16x16(),
@@ -3436,7 +3564,8 @@ pub fn _mm256_unpackhi_epi16(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpunpcklwd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_unpacklo_epi16(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_unpacklo_epi16(a: __m256i, b: __m256i) -> __m256i {
unsafe {
let r: i16x16 = simd_shuffle!(
a.as_i16x16(),
@@ -3479,7 +3608,8 @@ pub fn _mm256_unpacklo_epi16(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vunpckhps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i {
unsafe {
let r: i32x8 = simd_shuffle!(a.as_i32x8(), b.as_i32x8(), [2, 10, 3, 11, 6, 14, 7, 15]);
transmute(r)
@@ -3518,7 +3648,8 @@ pub fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vunpcklps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i {
unsafe {
let r: i32x8 = simd_shuffle!(a.as_i32x8(), b.as_i32x8(), [0, 8, 1, 9, 4, 12, 5, 13]);
transmute(r)
@@ -3557,7 +3688,8 @@ pub fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vunpckhpd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_unpackhi_epi64(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_unpackhi_epi64(a: __m256i, b: __m256i) -> __m256i {
unsafe {
let r: i64x4 = simd_shuffle!(a.as_i64x4(), b.as_i64x4(), [1, 5, 3, 7]);
transmute(r)
@@ -3596,7 +3728,8 @@ pub fn _mm256_unpackhi_epi64(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vunpcklpd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_unpacklo_epi64(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_unpacklo_epi64(a: __m256i, b: __m256i) -> __m256i {
unsafe {
let r: i64x4 = simd_shuffle!(a.as_i64x4(), b.as_i64x4(), [0, 4, 2, 6]);
transmute(r)
@@ -3611,7 +3744,8 @@ pub fn _mm256_unpacklo_epi64(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vxorps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_xor_si256(a: __m256i, b: __m256i) -> __m256i {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_xor_si256(a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute(simd_xor(a.as_i64x4(), b.as_i64x4())) }
}
@@ -3626,7 +3760,8 @@ pub fn _mm256_xor_si256(a: __m256i, b: __m256i) -> __m256i {
// This intrinsic has no corresponding instruction.
#[rustc_legacy_const_generics(1)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_extract_epi8<const INDEX: i32>(a: __m256i) -> i32 {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_extract_epi8<const INDEX: i32>(a: __m256i) -> i32 {
static_assert_uimm_bits!(INDEX, 5);
unsafe { simd_extract!(a.as_u8x32(), INDEX as u32, u8) as i32 }
}
@@ -3642,7 +3777,8 @@ pub fn _mm256_extract_epi8<const INDEX: i32>(a: __m256i) -> i32 {
// This intrinsic has no corresponding instruction.
#[rustc_legacy_const_generics(1)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_extract_epi16<const INDEX: i32>(a: __m256i) -> i32 {
#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
pub const fn _mm256_extract_epi16<const INDEX: i32>(a: __m256i) -> i32 {
static_assert_uimm_bits!(INDEX, 4);
unsafe { simd_extract!(a.as_u16x16(), INDEX as u32, u16) as i32 }
}
@@ -3771,13 +3907,14 @@ fn vpgatherqps(
#[cfg(test)]
mod tests {
use crate::core_arch::assert_eq_const as assert_eq;
use stdarch_test::simd_test;
use crate::core_arch::x86::*;
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_abs_epi32() {
const unsafe fn test_mm256_abs_epi32() {
#[rustfmt::skip]
let a = _mm256_setr_epi32(
0, 1, -1, i32::MAX,
@@ -3793,7 +3930,7 @@ unsafe fn test_mm256_abs_epi32() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_abs_epi16() {
const unsafe fn test_mm256_abs_epi16() {
#[rustfmt::skip]
let a = _mm256_setr_epi16(
0, 1, -1, 2, -2, 3, -3, 4,
@@ -3809,7 +3946,7 @@ unsafe fn test_mm256_abs_epi16() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_abs_epi8() {
const unsafe fn test_mm256_abs_epi8() {
#[rustfmt::skip]
let a = _mm256_setr_epi8(
0, 1, -1, 2, -2, 3, -3, 4,
@@ -3829,7 +3966,7 @@ unsafe fn test_mm256_abs_epi8() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_add_epi64() {
const unsafe fn test_mm256_add_epi64() {
let a = _mm256_setr_epi64x(-10, 0, 100, 1_000_000_000);
let b = _mm256_setr_epi64x(-1, 0, 1, 2);
let r = _mm256_add_epi64(a, b);
@@ -3838,7 +3975,7 @@ unsafe fn test_mm256_add_epi64() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_add_epi32() {
const unsafe fn test_mm256_add_epi32() {
let a = _mm256_setr_epi32(-1, 0, 1, 2, 3, 4, 5, 6);
let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
let r = _mm256_add_epi32(a, b);
@@ -3847,7 +3984,7 @@ unsafe fn test_mm256_add_epi32() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_add_epi16() {
const unsafe fn test_mm256_add_epi16() {
#[rustfmt::skip]
let a = _mm256_setr_epi16(
0, 1, 2, 3, 4, 5, 6, 7,
@@ -3868,7 +4005,7 @@ unsafe fn test_mm256_add_epi16() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_add_epi8() {
const unsafe fn test_mm256_add_epi8() {
#[rustfmt::skip]
let a = _mm256_setr_epi8(
0, 1, 2, 3, 4, 5, 6, 7,
@@ -3895,7 +4032,7 @@ unsafe fn test_mm256_add_epi8() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_adds_epi8() {
const unsafe fn test_mm256_adds_epi8() {
#[rustfmt::skip]
let a = _mm256_setr_epi8(
0, 1, 2, 3, 4, 5, 6, 7,
@@ -3938,7 +4075,7 @@ unsafe fn test_mm256_adds_epi8_saturate_negative() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_adds_epi16() {
const unsafe fn test_mm256_adds_epi16() {
#[rustfmt::skip]
let a = _mm256_setr_epi16(
0, 1, 2, 3, 4, 5, 6, 7,
@@ -3976,7 +4113,7 @@ unsafe fn test_mm256_adds_epi16_saturate_negative() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_adds_epu8() {
const unsafe fn test_mm256_adds_epu8() {
#[rustfmt::skip]
let a = _mm256_setr_epi8(
0, 1, 2, 3, 4, 5, 6, 7,
@@ -4011,7 +4148,7 @@ unsafe fn test_mm256_adds_epu8_saturate() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_adds_epu16() {
const unsafe fn test_mm256_adds_epu16() {
#[rustfmt::skip]
let a = _mm256_setr_epi16(
0, 1, 2, 3, 4, 5, 6, 7,
@@ -4041,7 +4178,7 @@ unsafe fn test_mm256_adds_epu16_saturate() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_and_si256() {
const unsafe fn test_mm256_and_si256() {
let a = _mm256_set1_epi8(5);
let b = _mm256_set1_epi8(3);
let got = _mm256_and_si256(a, b);
@@ -4049,7 +4186,7 @@ unsafe fn test_mm256_and_si256() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_andnot_si256() {
const unsafe fn test_mm256_andnot_si256() {
let a = _mm256_set1_epi8(5);
let b = _mm256_set1_epi8(3);
let got = _mm256_andnot_si256(a, b);
@@ -4057,21 +4194,21 @@ unsafe fn test_mm256_andnot_si256() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_avg_epu8() {
const unsafe fn test_mm256_avg_epu8() {
let (a, b) = (_mm256_set1_epi8(3), _mm256_set1_epi8(9));
let r = _mm256_avg_epu8(a, b);
assert_eq_m256i(r, _mm256_set1_epi8(6));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_avg_epu16() {
const unsafe fn test_mm256_avg_epu16() {
let (a, b) = (_mm256_set1_epi16(3), _mm256_set1_epi16(9));
let r = _mm256_avg_epu16(a, b);
assert_eq_m256i(r, _mm256_set1_epi16(6));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm_blend_epi32() {
const unsafe fn test_mm_blend_epi32() {
let (a, b) = (_mm_set1_epi32(3), _mm_set1_epi32(9));
let e = _mm_setr_epi32(9, 3, 3, 3);
let r = _mm_blend_epi32::<0x01>(a, b);
@@ -4082,7 +4219,7 @@ unsafe fn test_mm_blend_epi32() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_blend_epi32() {
const unsafe fn test_mm256_blend_epi32() {
let (a, b) = (_mm256_set1_epi32(3), _mm256_set1_epi32(9));
let e = _mm256_setr_epi32(9, 3, 3, 3, 3, 3, 3, 3);
let r = _mm256_blend_epi32::<0x01>(a, b);
@@ -4098,7 +4235,7 @@ unsafe fn test_mm256_blend_epi32() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_blend_epi16() {
const unsafe fn test_mm256_blend_epi16() {
let (a, b) = (_mm256_set1_epi16(3), _mm256_set1_epi16(9));
let e = _mm256_setr_epi16(9, 3, 3, 3, 3, 3, 3, 3, 9, 3, 3, 3, 3, 3, 3, 3);
let r = _mm256_blend_epi16::<0x01>(a, b);
@@ -4109,7 +4246,7 @@ unsafe fn test_mm256_blend_epi16() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_blendv_epi8() {
const unsafe fn test_mm256_blendv_epi8() {
let (a, b) = (_mm256_set1_epi8(4), _mm256_set1_epi8(2));
let mask = _mm256_insert_epi8::<2>(_mm256_set1_epi8(0), -1);
let e = _mm256_insert_epi8::<2>(_mm256_set1_epi8(4), 2);
@@ -4118,63 +4255,63 @@ unsafe fn test_mm256_blendv_epi8() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm_broadcastb_epi8() {
const unsafe fn test_mm_broadcastb_epi8() {
let a = _mm_insert_epi8::<0>(_mm_set1_epi8(0x00), 0x2a);
let res = _mm_broadcastb_epi8(a);
assert_eq_m128i(res, _mm_set1_epi8(0x2a));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_broadcastb_epi8() {
const unsafe fn test_mm256_broadcastb_epi8() {
let a = _mm_insert_epi8::<0>(_mm_set1_epi8(0x00), 0x2a);
let res = _mm256_broadcastb_epi8(a);
assert_eq_m256i(res, _mm256_set1_epi8(0x2a));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm_broadcastd_epi32() {
const unsafe fn test_mm_broadcastd_epi32() {
let a = _mm_setr_epi32(0x2a, 0x8000000, 0, 0);
let res = _mm_broadcastd_epi32(a);
assert_eq_m128i(res, _mm_set1_epi32(0x2a));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_broadcastd_epi32() {
const unsafe fn test_mm256_broadcastd_epi32() {
let a = _mm_setr_epi32(0x2a, 0x8000000, 0, 0);
let res = _mm256_broadcastd_epi32(a);
assert_eq_m256i(res, _mm256_set1_epi32(0x2a));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm_broadcastq_epi64() {
const unsafe fn test_mm_broadcastq_epi64() {
let a = _mm_setr_epi64x(0x1ffffffff, 0);
let res = _mm_broadcastq_epi64(a);
assert_eq_m128i(res, _mm_set1_epi64x(0x1ffffffff));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_broadcastq_epi64() {
const unsafe fn test_mm256_broadcastq_epi64() {
let a = _mm_setr_epi64x(0x1ffffffff, 0);
let res = _mm256_broadcastq_epi64(a);
assert_eq_m256i(res, _mm256_set1_epi64x(0x1ffffffff));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm_broadcastsd_pd() {
const unsafe fn test_mm_broadcastsd_pd() {
let a = _mm_setr_pd(6.88, 3.44);
let res = _mm_broadcastsd_pd(a);
assert_eq_m128d(res, _mm_set1_pd(6.88));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_broadcastsd_pd() {
const unsafe fn test_mm256_broadcastsd_pd() {
let a = _mm_setr_pd(6.88, 3.44);
let res = _mm256_broadcastsd_pd(a);
assert_eq_m256d(res, _mm256_set1_pd(6.88f64));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm_broadcastsi128_si256() {
const unsafe fn test_mm_broadcastsi128_si256() {
let a = _mm_setr_epi64x(0x0987654321012334, 0x5678909876543210);
let res = _mm_broadcastsi128_si256(a);
let retval = _mm256_setr_epi64x(
@@ -4187,7 +4324,7 @@ unsafe fn test_mm_broadcastsi128_si256() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_broadcastsi128_si256() {
const unsafe fn test_mm256_broadcastsi128_si256() {
let a = _mm_setr_epi64x(0x0987654321012334, 0x5678909876543210);
let res = _mm256_broadcastsi128_si256(a);
let retval = _mm256_setr_epi64x(
@@ -4200,35 +4337,35 @@ unsafe fn test_mm256_broadcastsi128_si256() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm_broadcastss_ps() {
const unsafe fn test_mm_broadcastss_ps() {
let a = _mm_setr_ps(6.88, 3.44, 0.0, 0.0);
let res = _mm_broadcastss_ps(a);
assert_eq_m128(res, _mm_set1_ps(6.88));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_broadcastss_ps() {
const unsafe fn test_mm256_broadcastss_ps() {
let a = _mm_setr_ps(6.88, 3.44, 0.0, 0.0);
let res = _mm256_broadcastss_ps(a);
assert_eq_m256(res, _mm256_set1_ps(6.88));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm_broadcastw_epi16() {
const unsafe fn test_mm_broadcastw_epi16() {
let a = _mm_insert_epi16::<0>(_mm_set1_epi16(0x2a), 0x22b);
let res = _mm_broadcastw_epi16(a);
assert_eq_m128i(res, _mm_set1_epi16(0x22b));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_broadcastw_epi16() {
const unsafe fn test_mm256_broadcastw_epi16() {
let a = _mm_insert_epi16::<0>(_mm_set1_epi16(0x2a), 0x22b);
let res = _mm256_broadcastw_epi16(a);
assert_eq_m256i(res, _mm256_set1_epi16(0x22b));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_cmpeq_epi8() {
const unsafe fn test_mm256_cmpeq_epi8() {
#[rustfmt::skip]
let a = _mm256_setr_epi8(
0, 1, 2, 3, 4, 5, 6, 7,
@@ -4248,7 +4385,7 @@ unsafe fn test_mm256_cmpeq_epi8() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_cmpeq_epi16() {
const unsafe fn test_mm256_cmpeq_epi16() {
#[rustfmt::skip]
let a = _mm256_setr_epi16(
0, 1, 2, 3, 4, 5, 6, 7,
@@ -4264,7 +4401,7 @@ unsafe fn test_mm256_cmpeq_epi16() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_cmpeq_epi32() {
const unsafe fn test_mm256_cmpeq_epi32() {
let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
let b = _mm256_setr_epi32(7, 6, 2, 4, 3, 2, 1, 0);
let r = _mm256_cmpeq_epi32(a, b);
@@ -4274,7 +4411,7 @@ unsafe fn test_mm256_cmpeq_epi32() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_cmpeq_epi64() {
const unsafe fn test_mm256_cmpeq_epi64() {
let a = _mm256_setr_epi64x(0, 1, 2, 3);
let b = _mm256_setr_epi64x(3, 2, 2, 0);
let r = _mm256_cmpeq_epi64(a, b);
@@ -4282,7 +4419,7 @@ unsafe fn test_mm256_cmpeq_epi64() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_cmpgt_epi8() {
const unsafe fn test_mm256_cmpgt_epi8() {
let a = _mm256_insert_epi8::<0>(_mm256_set1_epi8(0), 5);
let b = _mm256_set1_epi8(0);
let r = _mm256_cmpgt_epi8(a, b);
@@ -4290,7 +4427,7 @@ unsafe fn test_mm256_cmpgt_epi8() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_cmpgt_epi16() {
const unsafe fn test_mm256_cmpgt_epi16() {
let a = _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), 5);
let b = _mm256_set1_epi16(0);
let r = _mm256_cmpgt_epi16(a, b);
@@ -4298,7 +4435,7 @@ unsafe fn test_mm256_cmpgt_epi16() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_cmpgt_epi32() {
const unsafe fn test_mm256_cmpgt_epi32() {
let a = _mm256_insert_epi32::<0>(_mm256_set1_epi32(0), 5);
let b = _mm256_set1_epi32(0);
let r = _mm256_cmpgt_epi32(a, b);
@@ -4306,7 +4443,7 @@ unsafe fn test_mm256_cmpgt_epi32() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_cmpgt_epi64() {
const unsafe fn test_mm256_cmpgt_epi64() {
let a = _mm256_insert_epi64::<0>(_mm256_set1_epi64x(0), 5);
let b = _mm256_set1_epi64x(0);
let r = _mm256_cmpgt_epi64(a, b);
@@ -4314,7 +4451,7 @@ unsafe fn test_mm256_cmpgt_epi64() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_cvtepi8_epi16() {
const unsafe fn test_mm256_cvtepi8_epi16() {
#[rustfmt::skip]
let a = _mm_setr_epi8(
0, 0, -1, 1, -2, 2, -3, 3,
@@ -4329,7 +4466,7 @@ unsafe fn test_mm256_cvtepi8_epi16() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_cvtepi8_epi32() {
const unsafe fn test_mm256_cvtepi8_epi32() {
#[rustfmt::skip]
let a = _mm_setr_epi8(
0, 0, -1, 1, -2, 2, -3, 3,
@@ -4340,7 +4477,7 @@ unsafe fn test_mm256_cvtepi8_epi32() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_cvtepi8_epi64() {
const unsafe fn test_mm256_cvtepi8_epi64() {
#[rustfmt::skip]
let a = _mm_setr_epi8(
0, 0, -1, 1, -2, 2, -3, 3,
@@ -4351,49 +4488,49 @@ unsafe fn test_mm256_cvtepi8_epi64() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_cvtepi16_epi32() {
const unsafe fn test_mm256_cvtepi16_epi32() {
let a = _mm_setr_epi16(0, 0, -1, 1, -2, 2, -3, 3);
let r = _mm256_setr_epi32(0, 0, -1, 1, -2, 2, -3, 3);
assert_eq_m256i(r, _mm256_cvtepi16_epi32(a));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_cvtepi16_epi64() {
const unsafe fn test_mm256_cvtepi16_epi64() {
let a = _mm_setr_epi16(0, 0, -1, 1, -2, 2, -3, 3);
let r = _mm256_setr_epi64x(0, 0, -1, 1);
assert_eq_m256i(r, _mm256_cvtepi16_epi64(a));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_cvtepi32_epi64() {
const unsafe fn test_mm256_cvtepi32_epi64() {
let a = _mm_setr_epi32(0, 0, -1, 1);
let r = _mm256_setr_epi64x(0, 0, -1, 1);
assert_eq_m256i(r, _mm256_cvtepi32_epi64(a));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_cvtepu16_epi32() {
const unsafe fn test_mm256_cvtepu16_epi32() {
let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
let r = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
assert_eq_m256i(r, _mm256_cvtepu16_epi32(a));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_cvtepu16_epi64() {
const unsafe fn test_mm256_cvtepu16_epi64() {
let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
let r = _mm256_setr_epi64x(0, 1, 2, 3);
assert_eq_m256i(r, _mm256_cvtepu16_epi64(a));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_cvtepu32_epi64() {
const unsafe fn test_mm256_cvtepu32_epi64() {
let a = _mm_setr_epi32(0, 1, 2, 3);
let r = _mm256_setr_epi64x(0, 1, 2, 3);
assert_eq_m256i(r, _mm256_cvtepu32_epi64(a));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_cvtepu8_epi16() {
const unsafe fn test_mm256_cvtepu8_epi16() {
#[rustfmt::skip]
let a = _mm_setr_epi8(
0, 1, 2, 3, 4, 5, 6, 7,
@@ -4408,7 +4545,7 @@ unsafe fn test_mm256_cvtepu8_epi16() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_cvtepu8_epi32() {
const unsafe fn test_mm256_cvtepu8_epi32() {
#[rustfmt::skip]
let a = _mm_setr_epi8(
0, 1, 2, 3, 4, 5, 6, 7,
@@ -4419,7 +4556,7 @@ unsafe fn test_mm256_cvtepu8_epi32() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_cvtepu8_epi64() {
const unsafe fn test_mm256_cvtepu8_epi64() {
#[rustfmt::skip]
let a = _mm_setr_epi8(
0, 1, 2, 3, 4, 5, 6, 7,
@@ -4430,7 +4567,7 @@ unsafe fn test_mm256_cvtepu8_epi64() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_extracti128_si256() {
const unsafe fn test_mm256_extracti128_si256() {
let a = _mm256_setr_epi64x(1, 2, 3, 4);
let r = _mm256_extracti128_si256::<1>(a);
let e = _mm_setr_epi64x(3, 4);
@@ -4438,7 +4575,7 @@ unsafe fn test_mm256_extracti128_si256() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_hadd_epi16() {
const unsafe fn test_mm256_hadd_epi16() {
let a = _mm256_set1_epi16(2);
let b = _mm256_set1_epi16(4);
let r = _mm256_hadd_epi16(a, b);
@@ -4447,7 +4584,7 @@ unsafe fn test_mm256_hadd_epi16() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_hadd_epi32() {
const unsafe fn test_mm256_hadd_epi32() {
let a = _mm256_set1_epi32(2);
let b = _mm256_set1_epi32(4);
let r = _mm256_hadd_epi32(a, b);
@@ -4471,7 +4608,7 @@ unsafe fn test_mm256_hadds_epi16() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_hsub_epi16() {
const unsafe fn test_mm256_hsub_epi16() {
let a = _mm256_set1_epi16(2);
let b = _mm256_set1_epi16(4);
let r = _mm256_hsub_epi16(a, b);
@@ -4480,7 +4617,7 @@ unsafe fn test_mm256_hsub_epi16() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_hsub_epi32() {
const unsafe fn test_mm256_hsub_epi32() {
let a = _mm256_set1_epi32(2);
let b = _mm256_set1_epi32(4);
let r = _mm256_hsub_epi32(a, b);
@@ -4500,7 +4637,7 @@ unsafe fn test_mm256_hsubs_epi16() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_madd_epi16() {
const unsafe fn test_mm256_madd_epi16() {
let a = _mm256_set1_epi16(2);
let b = _mm256_set1_epi16(4);
let r = _mm256_madd_epi16(a, b);
@@ -4509,7 +4646,7 @@ unsafe fn test_mm256_madd_epi16() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_inserti128_si256() {
const unsafe fn test_mm256_inserti128_si256() {
let a = _mm256_setr_epi64x(1, 2, 3, 4);
let b = _mm_setr_epi64x(7, 8);
let r = _mm256_inserti128_si256::<1>(a, b);
@@ -4527,7 +4664,7 @@ unsafe fn test_mm256_maddubs_epi16() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm_maskload_epi32() {
const unsafe fn test_mm_maskload_epi32() {
let nums = [1, 2, 3, 4];
let a = &nums as *const i32;
let mask = _mm_setr_epi32(-1, 0, 0, -1);
@@ -4537,7 +4674,7 @@ unsafe fn test_mm_maskload_epi32() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_maskload_epi32() {
const unsafe fn test_mm256_maskload_epi32() {
let nums = [1, 2, 3, 4, 5, 6, 7, 8];
let a = &nums as *const i32;
let mask = _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0);
@@ -4547,7 +4684,7 @@ unsafe fn test_mm256_maskload_epi32() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm_maskload_epi64() {
const unsafe fn test_mm_maskload_epi64() {
let nums = [1_i64, 2_i64];
let a = &nums as *const i64;
let mask = _mm_setr_epi64x(0, -1);
@@ -4557,7 +4694,7 @@ unsafe fn test_mm_maskload_epi64() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_maskload_epi64() {
const unsafe fn test_mm256_maskload_epi64() {
let nums = [1_i64, 2_i64, 3_i64, 4_i64];
let a = &nums as *const i64;
let mask = _mm256_setr_epi64x(0, -1, -1, 0);
@@ -4567,7 +4704,7 @@ unsafe fn test_mm256_maskload_epi64() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm_maskstore_epi32() {
const unsafe fn test_mm_maskstore_epi32() {
let a = _mm_setr_epi32(1, 2, 3, 4);
let mut arr = [-1, -1, -1, -1];
let mask = _mm_setr_epi32(-1, 0, 0, -1);
@@ -4577,7 +4714,7 @@ unsafe fn test_mm_maskstore_epi32() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_maskstore_epi32() {
const unsafe fn test_mm256_maskstore_epi32() {
let a = _mm256_setr_epi32(1, 0x6d726f, 3, 42, 0x777161, 6, 7, 8);
let mut arr = [-1, -1, -1, 0x776173, -1, 0x68657265, -1, -1];
let mask = _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0);
@@ -4587,7 +4724,7 @@ unsafe fn test_mm256_maskstore_epi32() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm_maskstore_epi64() {
const unsafe fn test_mm_maskstore_epi64() {
let a = _mm_setr_epi64x(1_i64, 2_i64);
let mut arr = [-1_i64, -1_i64];
let mask = _mm_setr_epi64x(0, -1);
@@ -4597,7 +4734,7 @@ unsafe fn test_mm_maskstore_epi64() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_maskstore_epi64() {
const unsafe fn test_mm256_maskstore_epi64() {
let a = _mm256_setr_epi64x(1_i64, 2_i64, 3_i64, 4_i64);
let mut arr = [-1_i64, -1_i64, -1_i64, -1_i64];
let mask = _mm256_setr_epi64x(0, -1, -1, 0);
@@ -4607,7 +4744,7 @@ unsafe fn test_mm256_maskstore_epi64() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_max_epi16() {
const unsafe fn test_mm256_max_epi16() {
let a = _mm256_set1_epi16(2);
let b = _mm256_set1_epi16(4);
let r = _mm256_max_epi16(a, b);
@@ -4615,7 +4752,7 @@ unsafe fn test_mm256_max_epi16() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_max_epi32() {
const unsafe fn test_mm256_max_epi32() {
let a = _mm256_set1_epi32(2);
let b = _mm256_set1_epi32(4);
let r = _mm256_max_epi32(a, b);
@@ -4623,7 +4760,7 @@ unsafe fn test_mm256_max_epi32() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_max_epi8() {
const unsafe fn test_mm256_max_epi8() {
let a = _mm256_set1_epi8(2);
let b = _mm256_set1_epi8(4);
let r = _mm256_max_epi8(a, b);
@@ -4631,7 +4768,7 @@ unsafe fn test_mm256_max_epi8() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_max_epu16() {
const unsafe fn test_mm256_max_epu16() {
let a = _mm256_set1_epi16(2);
let b = _mm256_set1_epi16(4);
let r = _mm256_max_epu16(a, b);
@@ -4639,7 +4776,7 @@ unsafe fn test_mm256_max_epu16() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_max_epu32() {
const unsafe fn test_mm256_max_epu32() {
let a = _mm256_set1_epi32(2);
let b = _mm256_set1_epi32(4);
let r = _mm256_max_epu32(a, b);
@@ -4647,7 +4784,7 @@ unsafe fn test_mm256_max_epu32() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_max_epu8() {
const unsafe fn test_mm256_max_epu8() {
let a = _mm256_set1_epi8(2);
let b = _mm256_set1_epi8(4);
let r = _mm256_max_epu8(a, b);
@@ -4655,7 +4792,7 @@ unsafe fn test_mm256_max_epu8() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_min_epi16() {
const unsafe fn test_mm256_min_epi16() {
let a = _mm256_set1_epi16(2);
let b = _mm256_set1_epi16(4);
let r = _mm256_min_epi16(a, b);
@@ -4663,7 +4800,7 @@ unsafe fn test_mm256_min_epi16() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_min_epi32() {
const unsafe fn test_mm256_min_epi32() {
let a = _mm256_set1_epi32(2);
let b = _mm256_set1_epi32(4);
let r = _mm256_min_epi32(a, b);
@@ -4671,7 +4808,7 @@ unsafe fn test_mm256_min_epi32() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_min_epi8() {
const unsafe fn test_mm256_min_epi8() {
let a = _mm256_set1_epi8(2);
let b = _mm256_set1_epi8(4);
let r = _mm256_min_epi8(a, b);
@@ -4679,7 +4816,7 @@ unsafe fn test_mm256_min_epi8() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_min_epu16() {
const unsafe fn test_mm256_min_epu16() {
let a = _mm256_set1_epi16(2);
let b = _mm256_set1_epi16(4);
let r = _mm256_min_epu16(a, b);
@@ -4687,7 +4824,7 @@ unsafe fn test_mm256_min_epu16() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_min_epu32() {
const unsafe fn test_mm256_min_epu32() {
let a = _mm256_set1_epi32(2);
let b = _mm256_set1_epi32(4);
let r = _mm256_min_epu32(a, b);
@@ -4695,7 +4832,7 @@ unsafe fn test_mm256_min_epu32() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_min_epu8() {
const unsafe fn test_mm256_min_epu8() {
let a = _mm256_set1_epi8(2);
let b = _mm256_set1_epi8(4);
let r = _mm256_min_epu8(a, b);
@@ -4703,7 +4840,7 @@ unsafe fn test_mm256_min_epu8() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_movemask_epi8() {
const unsafe fn test_mm256_movemask_epi8() {
let a = _mm256_set1_epi8(-1);
let r = _mm256_movemask_epi8(a);
let e = -1;
@@ -4720,7 +4857,7 @@ unsafe fn test_mm256_mpsadbw_epu8() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_mul_epi32() {
const unsafe fn test_mm256_mul_epi32() {
let a = _mm256_setr_epi32(0, 0, 0, 0, 2, 2, 2, 2);
let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
let r = _mm256_mul_epi32(a, b);
@@ -4729,7 +4866,7 @@ unsafe fn test_mm256_mul_epi32() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_mul_epu32() {
const unsafe fn test_mm256_mul_epu32() {
let a = _mm256_setr_epi32(0, 0, 0, 0, 2, 2, 2, 2);
let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
let r = _mm256_mul_epu32(a, b);
@@ -4738,7 +4875,7 @@ unsafe fn test_mm256_mul_epu32() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_mulhi_epi16() {
const unsafe fn test_mm256_mulhi_epi16() {
let a = _mm256_set1_epi16(6535);
let b = _mm256_set1_epi16(6535);
let r = _mm256_mulhi_epi16(a, b);
@@ -4747,7 +4884,7 @@ unsafe fn test_mm256_mulhi_epi16() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_mulhi_epu16() {
const unsafe fn test_mm256_mulhi_epu16() {
let a = _mm256_set1_epi16(6535);
let b = _mm256_set1_epi16(6535);
let r = _mm256_mulhi_epu16(a, b);
@@ -4756,7 +4893,7 @@ unsafe fn test_mm256_mulhi_epu16() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_mullo_epi16() {
const unsafe fn test_mm256_mullo_epi16() {
let a = _mm256_set1_epi16(2);
let b = _mm256_set1_epi16(4);
let r = _mm256_mullo_epi16(a, b);
@@ -4765,7 +4902,7 @@ unsafe fn test_mm256_mullo_epi16() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_mullo_epi32() {
const unsafe fn test_mm256_mullo_epi32() {
let a = _mm256_set1_epi32(2);
let b = _mm256_set1_epi32(4);
let r = _mm256_mullo_epi32(a, b);
@@ -4783,7 +4920,7 @@ unsafe fn test_mm256_mulhrs_epi16() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_or_si256() {
const unsafe fn test_mm256_or_si256() {
let a = _mm256_set1_epi8(-1);
let b = _mm256_set1_epi8(0);
let r = _mm256_or_si256(a, b);
@@ -4852,7 +4989,7 @@ unsafe fn test_mm256_sad_epu8() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_shufflehi_epi16() {
const unsafe fn test_mm256_shufflehi_epi16() {
#[rustfmt::skip]
let a = _mm256_setr_epi16(
0, 1, 2, 3, 11, 22, 33, 44,
@@ -4868,7 +5005,7 @@ unsafe fn test_mm256_shufflehi_epi16() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_shufflelo_epi16() {
const unsafe fn test_mm256_shufflelo_epi16() {
#[rustfmt::skip]
let a = _mm256_setr_epi16(
11, 22, 33, 44, 0, 1, 2, 3,
@@ -4935,7 +5072,7 @@ unsafe fn test_mm256_sll_epi64() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_slli_epi16() {
const unsafe fn test_mm256_slli_epi16() {
assert_eq_m256i(
_mm256_slli_epi16::<4>(_mm256_set1_epi16(0xFF)),
_mm256_set1_epi16(0xFF0),
@@ -4943,7 +5080,7 @@ unsafe fn test_mm256_slli_epi16() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_slli_epi32() {
const unsafe fn test_mm256_slli_epi32() {
assert_eq_m256i(
_mm256_slli_epi32::<4>(_mm256_set1_epi32(0xFFFF)),
_mm256_set1_epi32(0xFFFF0),
@@ -4951,7 +5088,7 @@ unsafe fn test_mm256_slli_epi32() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_slli_epi64() {
const unsafe fn test_mm256_slli_epi64() {
assert_eq_m256i(
_mm256_slli_epi64::<4>(_mm256_set1_epi64x(0xFFFFFFFF)),
_mm256_set1_epi64x(0xFFFFFFFF0),
@@ -4959,14 +5096,14 @@ unsafe fn test_mm256_slli_epi64() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_slli_si256() {
const unsafe fn test_mm256_slli_si256() {
let a = _mm256_set1_epi64x(0xFFFFFFFF);
let r = _mm256_slli_si256::<3>(a);
assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFFF000000));
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm_sllv_epi32() {
const unsafe fn test_mm_sllv_epi32() {
let a = _mm_set1_epi32(2);
let b = _mm_set1_epi32(1);
let r = _mm_sllv_epi32(a, b);
@@ -4975,7 +5112,7 @@ unsafe fn test_mm_sllv_epi32() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_sllv_epi32() {
const unsafe fn test_mm256_sllv_epi32() {
let a = _mm256_set1_epi32(2);
let b = _mm256_set1_epi32(1);
let r = _mm256_sllv_epi32(a, b);
@@ -4984,7 +5121,7 @@ unsafe fn test_mm256_sllv_epi32() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm_sllv_epi64() {
const unsafe fn test_mm_sllv_epi64() {
let a = _mm_set1_epi64x(2);
let b = _mm_set1_epi64x(1);
let r = _mm_sllv_epi64(a, b);
@@ -4993,7 +5130,7 @@ unsafe fn test_mm_sllv_epi64() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_sllv_epi64() {
const unsafe fn test_mm256_sllv_epi64() {
let a = _mm256_set1_epi64x(2);
let b = _mm256_set1_epi64x(1);
let r = _mm256_sllv_epi64(a, b);
@@ -5018,7 +5155,7 @@ unsafe fn test_mm256_sra_epi32() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_srai_epi16() {
const unsafe fn test_mm256_srai_epi16() {
assert_eq_m256i(
_mm256_srai_epi16::<1>(_mm256_set1_epi16(-1)),
_mm256_set1_epi16(-1),
@@ -5026,7 +5163,7 @@ unsafe fn test_mm256_srai_epi16() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_srai_epi32() {
const unsafe fn test_mm256_srai_epi32() {
assert_eq_m256i(
_mm256_srai_epi32::<1>(_mm256_set1_epi32(-1)),
_mm256_set1_epi32(-1),
@@ -5034,7 +5171,7 @@ unsafe fn test_mm256_srai_epi32() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm_srav_epi32() {
const unsafe fn test_mm_srav_epi32() {
let a = _mm_set1_epi32(4);
let count = _mm_set1_epi32(1);
let r = _mm_srav_epi32(a, count);
@@ -5043,7 +5180,7 @@ unsafe fn test_mm_srav_epi32() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_srav_epi32() {
const unsafe fn test_mm256_srav_epi32() {
let a = _mm256_set1_epi32(4);
let count = _mm256_set1_epi32(1);
let r = _mm256_srav_epi32(a, count);
@@ -5052,7 +5189,7 @@ unsafe fn test_mm256_srav_epi32() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_srli_si256() {
const unsafe fn test_mm256_srli_si256() {
#[rustfmt::skip]
let a = _mm256_setr_epi8(
1, 2, 3, 4, 5, 6, 7, 8,
@@ -5096,7 +5233,7 @@ unsafe fn test_mm256_srl_epi64() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_srli_epi16() {
const unsafe fn test_mm256_srli_epi16() {
assert_eq_m256i(
_mm256_srli_epi16::<4>(_mm256_set1_epi16(0xFF)),
_mm256_set1_epi16(0xF),
@@ -5104,7 +5241,7 @@ unsafe fn test_mm256_srli_epi16() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_srli_epi32() {
const unsafe fn test_mm256_srli_epi32() {
assert_eq_m256i(
_mm256_srli_epi32::<4>(_mm256_set1_epi32(0xFFFF)),
_mm256_set1_epi32(0xFFF),
@@ -5112,7 +5249,7 @@ unsafe fn test_mm256_srli_epi32() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_srli_epi64() {
const unsafe fn test_mm256_srli_epi64() {
assert_eq_m256i(
_mm256_srli_epi64::<4>(_mm256_set1_epi64x(0xFFFFFFFF)),
_mm256_set1_epi64x(0xFFFFFFF),
@@ -5120,7 +5257,7 @@ unsafe fn test_mm256_srli_epi64() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm_srlv_epi32() {
const unsafe fn test_mm_srlv_epi32() {
let a = _mm_set1_epi32(2);
let count = _mm_set1_epi32(1);
let r = _mm_srlv_epi32(a, count);
@@ -5129,7 +5266,7 @@ unsafe fn test_mm_srlv_epi32() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_srlv_epi32() {
const unsafe fn test_mm256_srlv_epi32() {
let a = _mm256_set1_epi32(2);
let count = _mm256_set1_epi32(1);
let r = _mm256_srlv_epi32(a, count);
@@ -5138,7 +5275,7 @@ unsafe fn test_mm256_srlv_epi32() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm_srlv_epi64() {
const unsafe fn test_mm_srlv_epi64() {
let a = _mm_set1_epi64x(2);
let count = _mm_set1_epi64x(1);
let r = _mm_srlv_epi64(a, count);
@@ -5147,7 +5284,7 @@ unsafe fn test_mm_srlv_epi64() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_srlv_epi64() {
const unsafe fn test_mm256_srlv_epi64() {
let a = _mm256_set1_epi64x(2);
let count = _mm256_set1_epi64x(1);
let r = _mm256_srlv_epi64(a, count);
@@ -5163,7 +5300,7 @@ unsafe fn test_mm256_stream_load_si256() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_sub_epi16() {
const unsafe fn test_mm256_sub_epi16() {
let a = _mm256_set1_epi16(4);
let b = _mm256_set1_epi16(2);
let r = _mm256_sub_epi16(a, b);
@@ -5171,7 +5308,7 @@ unsafe fn test_mm256_sub_epi16() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_sub_epi32() {
const unsafe fn test_mm256_sub_epi32() {
let a = _mm256_set1_epi32(4);
let b = _mm256_set1_epi32(2);
let r = _mm256_sub_epi32(a, b);
@@ -5179,7 +5316,7 @@ unsafe fn test_mm256_sub_epi32() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_sub_epi64() {
const unsafe fn test_mm256_sub_epi64() {
let a = _mm256_set1_epi64x(4);
let b = _mm256_set1_epi64x(2);
let r = _mm256_sub_epi64(a, b);
@@ -5187,7 +5324,7 @@ unsafe fn test_mm256_sub_epi64() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_sub_epi8() {
const unsafe fn test_mm256_sub_epi8() {
let a = _mm256_set1_epi8(4);
let b = _mm256_set1_epi8(2);
let r = _mm256_sub_epi8(a, b);
@@ -5195,7 +5332,7 @@ unsafe fn test_mm256_sub_epi8() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_subs_epi16() {
const unsafe fn test_mm256_subs_epi16() {
let a = _mm256_set1_epi16(4);
let b = _mm256_set1_epi16(2);
let r = _mm256_subs_epi16(a, b);
@@ -5203,7 +5340,7 @@ unsafe fn test_mm256_subs_epi16() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_subs_epi8() {
const unsafe fn test_mm256_subs_epi8() {
let a = _mm256_set1_epi8(4);
let b = _mm256_set1_epi8(2);
let r = _mm256_subs_epi8(a, b);
@@ -5211,7 +5348,7 @@ unsafe fn test_mm256_subs_epi8() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_subs_epu16() {
const unsafe fn test_mm256_subs_epu16() {
let a = _mm256_set1_epi16(4);
let b = _mm256_set1_epi16(2);
let r = _mm256_subs_epu16(a, b);
@@ -5219,7 +5356,7 @@ unsafe fn test_mm256_subs_epu16() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_subs_epu8() {
const unsafe fn test_mm256_subs_epu8() {
let a = _mm256_set1_epi8(4);
let b = _mm256_set1_epi8(2);
let r = _mm256_subs_epu8(a, b);
@@ -5227,7 +5364,7 @@ unsafe fn test_mm256_subs_epu8() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_xor_si256() {
const unsafe fn test_mm256_xor_si256() {
let a = _mm256_set1_epi8(5);
let b = _mm256_set1_epi8(3);
let r = _mm256_xor_si256(a, b);
@@ -5235,7 +5372,7 @@ unsafe fn test_mm256_xor_si256() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_alignr_epi8() {
const unsafe fn test_mm256_alignr_epi8() {
#[rustfmt::skip]
let a = _mm256_setr_epi8(
1, 2, 3, 4, 5, 6, 7, 8,
@@ -5327,7 +5464,7 @@ unsafe fn test_mm256_permutevar8x32_epi32() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_permute4x64_epi64() {
const unsafe fn test_mm256_permute4x64_epi64() {
let a = _mm256_setr_epi64x(100, 200, 300, 400);
let expected = _mm256_setr_epi64x(400, 100, 200, 100);
let r = _mm256_permute4x64_epi64::<0b00010011>(a);
@@ -5335,7 +5472,7 @@ unsafe fn test_mm256_permute4x64_epi64() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_permute2x128_si256() {
const unsafe fn test_mm256_permute2x128_si256() {
let a = _mm256_setr_epi64x(100, 200, 500, 600);
let b = _mm256_setr_epi64x(300, 400, 700, 800);
let r = _mm256_permute2x128_si256::<0b00_01_00_11>(a, b);
@@ -5344,7 +5481,7 @@ unsafe fn test_mm256_permute2x128_si256() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_permute4x64_pd() {
const unsafe fn test_mm256_permute4x64_pd() {
let a = _mm256_setr_pd(1., 2., 3., 4.);
let r = _mm256_permute4x64_pd::<0b00_01_00_11>(a);
let e = _mm256_setr_pd(4., 1., 2., 1.);
@@ -5702,7 +5839,7 @@ unsafe fn test_mm256_mask_i64gather_pd() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_extract_epi8() {
const unsafe fn test_mm256_extract_epi8() {
#[rustfmt::skip]
let a = _mm256_setr_epi8(
-1, 1, 2, 3, 4, 5, 6, 7,
@@ -5717,7 +5854,7 @@ unsafe fn test_mm256_extract_epi8() {
}
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_extract_epi16() {
const unsafe fn test_mm256_extract_epi16() {
#[rustfmt::skip]
let a = _mm256_setr_epi16(
-1, 1, 2, 3, 4, 5, 6, 7,