diff --git a/src/tools/miri/src/shims/x86/avx512.rs b/src/tools/miri/src/shims/x86/avx512.rs index 8e1d22d723e7..fe4adf971c0d 100644 --- a/src/tools/miri/src/shims/x86/avx512.rs +++ b/src/tools/miri/src/shims/x86/avx512.rs @@ -104,8 +104,8 @@ fn emulate_x86_avx512_intrinsic( pmaddbw(this, left, right, dest)?; } - // Used to implement the _mm512_permutexvar_epi32 function. - "permvar.si.512" => { + // Used to implement the _mm512_permutexvar_epi32/_mm512_permutexvar_epi64 functions. + "permvar.si.512" | "permvar.di.512" => { let [left, right] = this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?; diff --git a/src/tools/miri/src/shims/x86/mod.rs b/src/tools/miri/src/shims/x86/mod.rs index ce6538c8ca27..e6e7f4b6f09f 100644 --- a/src/tools/miri/src/shims/x86/mod.rs +++ b/src/tools/miri/src/shims/x86/mod.rs @@ -1056,12 +1056,22 @@ fn pmaddbw<'tcx>( interp_ok(()) } -/// Shuffle 32-bit integers in `values` across lanes using the corresponding -/// index in `indices`, and store the results in dst. +/// Shuffle elements in `values` across lanes using the corresponding index in +/// `indices`, and store the results in `dest`. +/// +/// This helper is shared by both the 32-bit-lane and 64-bit-lane AVX +/// permute-by-index intrinsics. The element type is taken from `values` and +/// `dest`, while the index lanes are interpreted at their full width (`i32` or +/// `i64`, depending on the intrinsic). +/// +/// For a vector with `N` lanes, only the low `log2(N)` bits of each index are +/// used. Equivalently, lane `i` of the result is copied from +/// `values[indices[i] & (N - 1)]`. /// /// /// /// +/// fn permute<'tcx>( ecx: &mut crate::MiriInterpCx<'tcx>, values: &OpTy<'tcx>, @@ -1075,18 +1085,21 @@ fn permute<'tcx>( // fn permd(a: u32x8, b: u32x8) -> u32x8; // fn permps(a: __m256, b: i32x8) -> __m256; // fn vpermd(a: i32x16, idx: i32x16) -> i32x16; + // fn vpermq(a: i64x8, b: i64x8) -> i64x8; assert_eq!(dest_len, values_len); assert_eq!(dest_len, indices_len); // Only use the lower 3 bits to index into a vector with 8 lanes, // or the lower 4 bits when indexing into a 16-lane vector. assert!(dest_len.is_power_of_two()); - let mask = u32::try_from(dest_len).unwrap().strict_sub(1); + let mask = u128::from(dest_len).strict_sub(1); for i in 0..dest_len { let dest = ecx.project_index(&dest, i)?; - let index = ecx.read_scalar(&ecx.project_index(&indices, i)?)?.to_u32()?; - let element = ecx.project_index(&values, (index & mask).into())?; + let index_place = ecx.project_index(&indices, i)?; + let index = ecx.read_scalar(&index_place)?.to_uint(index_place.layout.size)?; + // `mask` is at most `dest_len - 1` which fits in a `u64`, so this cannot fail. + let element = ecx.project_index(&values, u64::try_from(index & mask).unwrap())?; ecx.copy_op(&element, &dest)?; } diff --git a/src/tools/miri/tests/pass/shims/x86/intrinsics-x86-avx512.rs b/src/tools/miri/tests/pass/shims/x86/intrinsics-x86-avx512.rs index e1e23eda8428..0417a4cbc679 100644 --- a/src/tools/miri/tests/pass/shims/x86/intrinsics-x86-avx512.rs +++ b/src/tools/miri/tests/pass/shims/x86/intrinsics-x86-avx512.rs @@ -219,6 +219,30 @@ unsafe fn test_mm512_permutexvar_epi32() { } test_mm512_permutexvar_epi32(); + #[target_feature(enable = "avx512f")] + unsafe fn test_mm512_permutexvar_epi64() { + let a = _mm512_setr_epi64(100, 200, 300, 400, 500, 600, 700, 800); + + // Mirrors stdarch's basic sanity check. + let idx = _mm512_set1_epi64(1); + let r = _mm512_permutexvar_epi64(idx, a); + let e = _mm512_set1_epi64(200); + assert_eq_m512i(r, e); + + // This must permute across the full 512-bit register, not within 128-bit lanes. + let idx = _mm512_setr_epi64(7, 0, 5, 2, 6, 1, 4, 3); + let r = _mm512_permutexvar_epi64(idx, a); + let e = _mm512_setr_epi64(800, 100, 600, 300, 700, 200, 500, 400); + assert_eq_m512i(r, e); + + // Only the low 3 bits of each 64-bit index are used. + let idx = _mm512_setr_epi64(8, 15, -1, i64::MIN, 0, 1, 2, 3); + let r = _mm512_permutexvar_epi64(idx, a); + let e = _mm512_setr_epi64(100, 800, 800, 100, 100, 200, 300, 400); + assert_eq_m512i(r, e); + } + test_mm512_permutexvar_epi64(); + #[target_feature(enable = "avx512bw")] unsafe fn test_mm512_shuffle_epi8() { #[rustfmt::skip]