mirror of
https://github.com/rust-lang/rust.git
synced 2026-04-27 18:57:42 +03:00
Use intrinsics for sse41
This commit is contained in:
@@ -418,7 +418,7 @@ pub const fn _mm_min_epu32(a: __m128i, b: __m128i) -> __m128i {
|
||||
unsafe { simd_imin(a.as_u32x4(), b.as_u32x4()).as_m128i() }
|
||||
}
|
||||
|
||||
/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
|
||||
/// Converts packed signed 32-bit integers from `a` and `b` to packed 16-bit integers
|
||||
/// using unsigned saturation
|
||||
///
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packus_epi32)
|
||||
@@ -427,7 +427,24 @@ pub const fn _mm_min_epu32(a: __m128i, b: __m128i) -> __m128i {
|
||||
#[cfg_attr(test, assert_instr(packusdw))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub fn _mm_packus_epi32(a: __m128i, b: __m128i) -> __m128i {
|
||||
unsafe { transmute(packusdw(a.as_i32x4(), b.as_i32x4())) }
|
||||
unsafe {
|
||||
let max = simd_splat(i32::from(u16::MAX));
|
||||
let min = simd_splat(i32::from(u16::MIN));
|
||||
|
||||
let clamped_a = simd_imax(simd_imin(a.as_i32x4(), max), min)
|
||||
.as_m128i()
|
||||
.as_i16x8();
|
||||
let clamped_b = simd_imax(simd_imin(b.as_i32x4(), max), min)
|
||||
.as_m128i()
|
||||
.as_i16x8();
|
||||
|
||||
// Shuffle the low u16 of each i32 from two concatenated vectors into
|
||||
// the low bits of the result register.
|
||||
const IDXS: [u32; 8] = [0, 2, 4, 6, 8, 10, 12, 14];
|
||||
let result: i16x8 = simd_shuffle!(clamped_a, clamped_b, IDXS);
|
||||
|
||||
result.as_m128i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Compares packed 64-bit integers in `a` and `b` for equality
|
||||
@@ -1166,8 +1183,6 @@ pub unsafe fn _mm_stream_load_si128(mem_addr: *const __m128i) -> __m128i {
|
||||
unsafe extern "C" {
|
||||
#[link_name = "llvm.x86.sse41.insertps"]
|
||||
fn insertps(a: __m128, b: __m128, imm8: u8) -> __m128;
|
||||
#[link_name = "llvm.x86.sse41.packusdw"]
|
||||
fn packusdw(a: i32x4, b: i32x4) -> u16x8;
|
||||
#[link_name = "llvm.x86.sse41.dppd"]
|
||||
fn dppd(a: __m128d, b: __m128d, imm8: u8) -> __m128d;
|
||||
#[link_name = "llvm.x86.sse41.dpps"]
|
||||
|
||||
Reference in New Issue
Block a user