mirror of
https://github.com/rust-lang/rust.git
synced 2026-04-27 18:57:42 +03:00
Merge pull request #4962 from elichai/elichai/_mm512_permutexvar_epi64
Implement `_mm512_permutexvar_epi64` shim
This commit is contained in:
@@ -104,8 +104,8 @@ fn emulate_x86_avx512_intrinsic(
|
||||
|
||||
pmaddbw(this, left, right, dest)?;
|
||||
}
|
||||
// Used to implement the _mm512_permutexvar_epi32 function.
|
||||
"permvar.si.512" => {
|
||||
// Used to implement the _mm512_permutexvar_epi32/_mm512_permutexvar_epi64 functions.
|
||||
"permvar.si.512" | "permvar.di.512" => {
|
||||
let [left, right] =
|
||||
this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
|
||||
|
||||
|
||||
@@ -1056,12 +1056,22 @@ fn pmaddbw<'tcx>(
|
||||
interp_ok(())
|
||||
}
|
||||
|
||||
/// Shuffle 32-bit integers in `values` across lanes using the corresponding
|
||||
/// index in `indices`, and store the results in dst.
|
||||
/// Shuffle elements in `values` across lanes using the corresponding index in
|
||||
/// `indices`, and store the results in `dest`.
|
||||
///
|
||||
/// This helper is shared by both the 32-bit-lane and 64-bit-lane AVX
|
||||
/// permute-by-index intrinsics. The element type is taken from `values` and
|
||||
/// `dest`, while the index lanes are interpreted at their full width (`i32` or
|
||||
/// `i64`, depending on the intrinsic).
|
||||
///
|
||||
/// For a vector with `N` lanes, only the low `log2(N)` bits of each index are
|
||||
/// used. Equivalently, lane `i` of the result is copied from
|
||||
/// `values[indices[i] & (N - 1)]`.
|
||||
///
|
||||
/// <https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutevar8x32_epi32>
|
||||
/// <https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutevar8x32_ps>
|
||||
/// <https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi32>
|
||||
/// <https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi64>
|
||||
fn permute<'tcx>(
|
||||
ecx: &mut crate::MiriInterpCx<'tcx>,
|
||||
values: &OpTy<'tcx>,
|
||||
@@ -1075,18 +1085,21 @@ fn permute<'tcx>(
|
||||
// fn permd(a: u32x8, b: u32x8) -> u32x8;
|
||||
// fn permps(a: __m256, b: i32x8) -> __m256;
|
||||
// fn vpermd(a: i32x16, idx: i32x16) -> i32x16;
|
||||
// fn vpermq(a: i64x8, b: i64x8) -> i64x8;
|
||||
assert_eq!(dest_len, values_len);
|
||||
assert_eq!(dest_len, indices_len);
|
||||
|
||||
// Only use the lower 3 bits to index into a vector with 8 lanes,
|
||||
// or the lower 4 bits when indexing into a 16-lane vector.
|
||||
assert!(dest_len.is_power_of_two());
|
||||
let mask = u32::try_from(dest_len).unwrap().strict_sub(1);
|
||||
let mask = u128::from(dest_len).strict_sub(1);
|
||||
|
||||
for i in 0..dest_len {
|
||||
let dest = ecx.project_index(&dest, i)?;
|
||||
let index = ecx.read_scalar(&ecx.project_index(&indices, i)?)?.to_u32()?;
|
||||
let element = ecx.project_index(&values, (index & mask).into())?;
|
||||
let index_place = ecx.project_index(&indices, i)?;
|
||||
let index = ecx.read_scalar(&index_place)?.to_uint(index_place.layout.size)?;
|
||||
// `mask` is at most `dest_len - 1` which fits in a `u64`, so this cannot fail.
|
||||
let element = ecx.project_index(&values, u64::try_from(index & mask).unwrap())?;
|
||||
|
||||
ecx.copy_op(&element, &dest)?;
|
||||
}
|
||||
|
||||
@@ -219,6 +219,30 @@ unsafe fn test_mm512_permutexvar_epi32() {
|
||||
}
|
||||
test_mm512_permutexvar_epi32();
|
||||
|
||||
#[target_feature(enable = "avx512f")]
|
||||
unsafe fn test_mm512_permutexvar_epi64() {
|
||||
let a = _mm512_setr_epi64(100, 200, 300, 400, 500, 600, 700, 800);
|
||||
|
||||
// Mirrors stdarch's basic sanity check.
|
||||
let idx = _mm512_set1_epi64(1);
|
||||
let r = _mm512_permutexvar_epi64(idx, a);
|
||||
let e = _mm512_set1_epi64(200);
|
||||
assert_eq_m512i(r, e);
|
||||
|
||||
// This must permute across the full 512-bit register, not within 128-bit lanes.
|
||||
let idx = _mm512_setr_epi64(7, 0, 5, 2, 6, 1, 4, 3);
|
||||
let r = _mm512_permutexvar_epi64(idx, a);
|
||||
let e = _mm512_setr_epi64(800, 100, 600, 300, 700, 200, 500, 400);
|
||||
assert_eq_m512i(r, e);
|
||||
|
||||
// Only the low 3 bits of each 64-bit index are used.
|
||||
let idx = _mm512_setr_epi64(8, 15, -1, i64::MIN, 0, 1, 2, 3);
|
||||
let r = _mm512_permutexvar_epi64(idx, a);
|
||||
let e = _mm512_setr_epi64(100, 800, 800, 100, 100, 200, 300, 400);
|
||||
assert_eq_m512i(r, e);
|
||||
}
|
||||
test_mm512_permutexvar_epi64();
|
||||
|
||||
#[target_feature(enable = "avx512bw")]
|
||||
unsafe fn test_mm512_shuffle_epi8() {
|
||||
#[rustfmt::skip]
|
||||
|
||||
Reference in New Issue
Block a user