mirror of
https://github.com/rust-lang/rust.git
synced 2026-05-28 20:16:58 +03:00
sse4.1 instructions (#98)
* sse4.1: _mm_blendv_ps and _mm_blendv_pd * sse4.1: _mm_blend_ps and _mm_blend_pd - HACK warning: messing with the constify macros - Selecting only one buffer gets optimized away and tests need to take this into account * sse4.1: _mm_blend_epi16 * sse4.1: _mm_extract_ps * sse4.1: _mm_extract_epi8 * see4.1: _mm_extract_epi32 * sse4.1: _mm_extract_epi64 * sse4.1: _mm_insert_ps * sse4.1: _mm_insert_epi8 * sse4.1: _mm_insert_epi32 and _mm_insert_epi64 * Formmating * sse4.1: _mm_max_epi8, _mm_max_epu16, _mm_max_epi32 and _mm_max_epu32 * Fix wrong compiler flag - avx -> sse4.1 * Fix intrinsics that only work with x86-64 * sse4.1: use appropriate types * Revert '_mm_extract_ps' to return i32 * sse4.1: Use the v128 types for consistency * Try fix for windows * Try "vectorcall" calling convention * Revert "Try "vectorcall" calling convention" This reverts commit 12936e9976bc6b0e4e538d82f55f0ee2d87a7f25. * Revert "Try fix for windows" This reverts commit 9c473808d334acedd46060b32ceea116662bf6a3. * Change tests for windows * Remove useless Windows test
This commit is contained in:
committed by
Alex Crichton
parent
acf919f960
commit
02c89b24ba
@@ -301,3 +301,41 @@ macro_rules! constify_imm6 {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! constify_imm4 {
|
||||
($imm8:expr, $expand:ident) => {
|
||||
#[allow(overflowing_literals)]
|
||||
match $imm8 & 0b1111 {
|
||||
0 => $expand!(0),
|
||||
1 => $expand!(1),
|
||||
2 => $expand!(2),
|
||||
3 => $expand!(3),
|
||||
4 => $expand!(4),
|
||||
5 => $expand!(5),
|
||||
6 => $expand!(6),
|
||||
7 => $expand!(7),
|
||||
8 => $expand!(8),
|
||||
9 => $expand!(9),
|
||||
10 => $expand!(10),
|
||||
11 => $expand!(11),
|
||||
12 => $expand!(12),
|
||||
13 => $expand!(13),
|
||||
14 => $expand!(14),
|
||||
_ => $expand!(15),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! constify_imm2 {
|
||||
($imm8:expr, $expand:ident) => {
|
||||
#[allow(overflowing_literals)]
|
||||
match $imm8 & 0b11 {
|
||||
0 => $expand!(0),
|
||||
1 => $expand!(1),
|
||||
2 => $expand!(2),
|
||||
_ => $expand!(3),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1,20 +1,191 @@
|
||||
|
||||
use std::mem;
|
||||
|
||||
#[cfg(test)]
|
||||
use stdsimd_test::assert_instr;
|
||||
|
||||
use v128::*;
|
||||
use x86::__m128i;
|
||||
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse4.1"]
|
||||
#[cfg_attr(test, assert_instr(pblendvb))]
|
||||
pub unsafe fn _mm_blendv_epi8(
|
||||
a: __m128i,
|
||||
b: __m128i,
|
||||
mask: __m128i,
|
||||
) -> __m128i {
|
||||
pub unsafe fn _mm_blendv_epi8(a: i8x16, b: i8x16, mask: i8x16) -> i8x16 {
|
||||
pblendvb(a, b, mask)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse4.1"]
|
||||
#[cfg_attr(test, assert_instr(pblendw, imm8=0xF0))]
|
||||
pub unsafe fn _mm_blend_epi16(a: i16x8, b: i16x8, imm8: u8) -> i16x8 {
|
||||
macro_rules! call {
|
||||
($imm8:expr) => { pblendw(a, b, $imm8) }
|
||||
}
|
||||
constify_imm8!(imm8, call)
|
||||
}
|
||||
|
||||
/// Blend packed double-precision (64-bit) floating-point elements from `a` and `b` using `mask`
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse4.1"]
|
||||
#[cfg_attr(test, assert_instr(blendvpd))]
|
||||
pub unsafe fn _mm_blendv_pd(a: f64x2, b: f64x2, mask: f64x2) -> f64x2 {
|
||||
blendvpd(a, b, mask)
|
||||
}
|
||||
|
||||
/// Blend packed single-precision (32-bit) floating-point elements from `a` and `b` using `mask`
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse4.1"]
|
||||
#[cfg_attr(test, assert_instr(blendvps))]
|
||||
pub unsafe fn _mm_blendv_ps(a: f32x4, b: f32x4, mask: f32x4) -> f32x4 {
|
||||
blendvps(a, b, mask)
|
||||
}
|
||||
|
||||
/// Blend packed double-precision (64-bit) floating-point elements from `a` and `b` using control mask `imm2`
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse4.1"]
|
||||
#[cfg_attr(test, assert_instr(blendpd, imm2=0b10))]
|
||||
pub unsafe fn _mm_blend_pd(a: f64x2, b: f64x2, imm2: u8) -> f64x2 {
|
||||
macro_rules! call {
|
||||
($imm2:expr) => { blendpd(a, b, $imm2) }
|
||||
}
|
||||
constify_imm2!(imm2, call)
|
||||
}
|
||||
|
||||
/// Blend packed single-precision (32-bit) floating-point elements from `a` and `b` using mask `imm4`
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse4.1"]
|
||||
#[cfg_attr(test, assert_instr(blendps, imm4=0b0101))]
|
||||
pub unsafe fn _mm_blend_ps(a: f32x4, b: f32x4, imm4: u8) -> f32x4 {
|
||||
macro_rules! call {
|
||||
($imm4:expr) => { blendps(a, b, $imm4) }
|
||||
}
|
||||
constify_imm4!(imm4, call)
|
||||
}
|
||||
|
||||
/// Extract a single-precision (32-bit) floating-point element from `a`, selected with `imm8`
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse4.1"]
|
||||
// TODO: Add test for Windows
|
||||
#[cfg_attr(all(test, not(windows)), assert_instr(extractps, imm8=0))]
|
||||
pub unsafe fn _mm_extract_ps(a: f32x4, imm8: u8) -> i32 {
|
||||
mem::transmute(a.extract(imm8 as u32 & 0b11))
|
||||
}
|
||||
|
||||
/// Extract an 8-bit integer from `a` selected with `imm8`
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse4.1"]
|
||||
#[cfg_attr(test, assert_instr(pextrb, imm8=0))]
|
||||
pub unsafe fn _mm_extract_epi8(a: i8x16, imm8: u8) -> i8 {
|
||||
a.extract((imm8 & 0b1111) as u32)
|
||||
}
|
||||
|
||||
/// Extract an 32-bit integer from `a` selected with `imm8`
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse4.1"]
|
||||
// TODO: Add test for Windows
|
||||
#[cfg_attr(all(test, not(windows)), assert_instr(pextrd, imm8=1))]
|
||||
pub unsafe fn _mm_extract_epi32(a: i32x4, imm8: u8) -> i32 {
|
||||
a.extract((imm8 & 0b11) as u32)
|
||||
}
|
||||
|
||||
/// Extract an 64-bit integer from `a` selected with `imm8`
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse4.1"]
|
||||
// TODO: Add test for Windows
|
||||
#[cfg_attr(all(test, not(windows)), assert_instr(pextrq, imm8=1))]
|
||||
pub unsafe fn _mm_extract_epi64(a: i64x2, imm8: u8) -> i64 {
|
||||
a.extract((imm8 & 0b1) as u32)
|
||||
}
|
||||
|
||||
/// Select a single value in `a` to store at some position in `b`,
|
||||
/// Then zero elements according to `imm8`.
|
||||
///
|
||||
/// `imm8` specifies which bits from operand `a` will be copied, which bits in the
|
||||
/// result they will be copied to, and which bits in the result will be
|
||||
/// cleared. The following assignments are made:
|
||||
///
|
||||
/// * Bits `[7:6]` specify the bits to copy from operand `a`:
|
||||
/// - `00`: Selects bits `[31:0]` from operand `a`.
|
||||
/// - `01`: Selects bits `[63:32]` from operand `a`.
|
||||
/// - `10`: Selects bits `[95:64]` from operand `a`.
|
||||
/// - `11`: Selects bits `[127:96]` from operand `a`.
|
||||
///
|
||||
/// * Bits `[5:4]` specify the bits in the result to which the selected bits
|
||||
/// from operand `a` are copied:
|
||||
/// - `00`: Copies the selected bits from `a` to result bits `[31:0]`.
|
||||
/// - `01`: Copies the selected bits from `a` to result bits `[63:32]`.
|
||||
/// - `10`: Copies the selected bits from `a` to result bits `[95:64]`.
|
||||
/// - `11`: Copies the selected bits from `a` to result bits `[127:96]`.
|
||||
///
|
||||
/// * Bits `[3:0]`: If any of these bits are set, the corresponding result
|
||||
/// element is cleared.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse4.1"]
|
||||
#[cfg_attr(test, assert_instr(insertps, imm8=0b1010))]
|
||||
pub unsafe fn _mm_insert_ps(a: f32x4, b: f32x4, imm8: u8) -> f32x4 {
|
||||
macro_rules! call {
|
||||
($imm8:expr) => { insertps(a, b, $imm8) }
|
||||
}
|
||||
constify_imm8!(imm8, call)
|
||||
}
|
||||
|
||||
/// Return a copy of `a` with the 8-bit integer from `i` inserted at a location specified by `imm8`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse4.1"]
|
||||
#[cfg_attr(test, assert_instr(pinsrb, imm8=0))]
|
||||
pub unsafe fn _mm_insert_epi8(a: i8x16, i: i8, imm8: u8) -> i8x16 {
|
||||
a.replace((imm8 & 0b1111) as u32, i)
|
||||
}
|
||||
|
||||
/// Return a copy of `a` with the 32-bit integer from `i` inserted at a location specified by `imm8`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse4.1"]
|
||||
#[cfg_attr(test, assert_instr(pinsrd, imm8=0))]
|
||||
pub unsafe fn _mm_insert_epi32(a: i32x4, i: i32, imm8: u8) -> i32x4 {
|
||||
a.replace((imm8 & 0b11) as u32, i)
|
||||
}
|
||||
|
||||
/// Return a copy of `a` with the 64-bit integer from `i` inserted at a location specified by `imm8`.
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse4.1"]
|
||||
#[cfg_attr(test, assert_instr(pinsrq, imm8=0))]
|
||||
pub unsafe fn _mm_insert_epi64(a: i64x2, i: i64, imm8: u8) -> i64x2 {
|
||||
a.replace((imm8 & 0b1) as u32, i)
|
||||
}
|
||||
|
||||
/// Compare packed 8-bit integers in `a` and `b`,87 and return packed maximum values in dst.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse4.1"]
|
||||
#[cfg_attr(test, assert_instr(pmaxsb, imm8=0))]
|
||||
pub unsafe fn _mm_max_epi8(a: i8x16, b: i8x16) -> i8x16 {
|
||||
pmaxsb(a, b)
|
||||
}
|
||||
|
||||
/// Compare packed unsigned 16-bit integers in `a` and `b`, and return packed maximum.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse4.1"]
|
||||
#[cfg_attr(test, assert_instr(pmaxuw, imm8=0))]
|
||||
pub unsafe fn _mm_max_epu16(a: u16x8, b: u16x8) -> u16x8 {
|
||||
pmaxuw(a, b)
|
||||
}
|
||||
|
||||
// Compare packed 32-bit integers in `a` and `b`, and return packed maximum values.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse4.1"]
|
||||
#[cfg_attr(test, assert_instr(pmaxsd, imm8=0))]
|
||||
pub unsafe fn _mm_max_epi32(a: i32x4, b: i32x4) -> i32x4 {
|
||||
pmaxsd(a, b)
|
||||
}
|
||||
|
||||
// Compare packed unsigned 32-bit integers in `a` and `b`, and return packed maximum values.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse4.1"]
|
||||
#[cfg_attr(test, assert_instr(pmaxud, imm8=0))]
|
||||
pub unsafe fn _mm_max_epu32(a: u32x4, b: u32x4) -> u32x4 {
|
||||
pmaxud(a, b)
|
||||
}
|
||||
|
||||
/// Returns the dot product of two f64x2 vectors.
|
||||
///
|
||||
/// `imm8[1:0]` is the broadcast mask, and `imm8[5:4]` is the condition mask.
|
||||
@@ -52,7 +223,27 @@ macro_rules! call {
|
||||
#[allow(improper_ctypes)]
|
||||
extern {
|
||||
#[link_name = "llvm.x86.sse41.pblendvb"]
|
||||
fn pblendvb(a: __m128i, b: __m128i, mask: __m128i) -> __m128i;
|
||||
fn pblendvb(a: i8x16, b: i8x16, mask: i8x16) -> i8x16;
|
||||
#[link_name = "llvm.x86.sse41.blendvpd"]
|
||||
fn blendvpd(a: f64x2, b: f64x2, mask: f64x2) -> f64x2;
|
||||
#[link_name = "llvm.x86.sse41.blendvps"]
|
||||
fn blendvps(a: f32x4, b: f32x4, mask: f32x4) -> f32x4;
|
||||
#[link_name = "llvm.x86.sse41.blendpd"]
|
||||
fn blendpd(a: f64x2, b: f64x2, imm2: u8) -> f64x2;
|
||||
#[link_name = "llvm.x86.sse41.blendps"]
|
||||
fn blendps(a: f32x4, b: f32x4, imm4: u8) -> f32x4;
|
||||
#[link_name = "llvm.x86.sse41.pblendw"]
|
||||
fn pblendw(a: i16x8, b: i16x8, imm8: u8) -> i16x8;
|
||||
#[link_name = "llvm.x86.sse41.insertps"]
|
||||
fn insertps(a: f32x4, b: f32x4, imm8: u8) -> f32x4;
|
||||
#[link_name = "llvm.x86.sse41.pmaxsb"]
|
||||
fn pmaxsb(a: i8x16, b: i8x16) -> i8x16;
|
||||
#[link_name = "llvm.x86.sse41.pmaxuw"]
|
||||
fn pmaxuw(a: u16x8, b: u16x8) -> u16x8;
|
||||
#[link_name = "llvm.x86.sse41.pmaxsd"]
|
||||
fn pmaxsd(a: i32x4, b: i32x4) -> i32x4;
|
||||
#[link_name = "llvm.x86.sse41.pmaxud"]
|
||||
fn pmaxud(a: u32x4, b: u32x4) -> u32x4;
|
||||
#[link_name = "llvm.x86.sse41.dppd"]
|
||||
fn dppd(a: f64x2, b: f64x2, imm8: u8) -> f64x2;
|
||||
#[link_name = "llvm.x86.sse41.dpps"]
|
||||
@@ -61,6 +252,8 @@ macro_rules! call {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::mem;
|
||||
|
||||
use stdsimd_test::simd_test;
|
||||
|
||||
use v128::*;
|
||||
@@ -79,6 +272,166 @@ unsafe fn _mm_blendv_epi8() {
|
||||
assert_eq!(sse41::_mm_blendv_epi8(a, b, mask), e);
|
||||
}
|
||||
|
||||
#[simd_test = "sse4.1"]
|
||||
unsafe fn _mm_blendv_pd() {
|
||||
let a = f64x2::splat(0.0);
|
||||
let b = f64x2::splat(1.0);
|
||||
let mask = mem::transmute(i64x2::new(0, -1));
|
||||
let r = sse41::_mm_blendv_pd(a, b, mask);
|
||||
let e = f64x2::new(0.0, 1.0);
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test = "sse4.1"]
|
||||
unsafe fn _mm_blendv_ps() {
|
||||
let a = f32x4::splat(0.0);
|
||||
let b = f32x4::splat(1.0);
|
||||
let mask = mem::transmute(i32x4::new(0,-1, 0, -1));
|
||||
let r = sse41::_mm_blendv_ps(a, b, mask);
|
||||
let e = f32x4::new(0.0, 1.0, 0.0, 1.0);
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test = "sse4.1"]
|
||||
unsafe fn _mm_blend_pd() {
|
||||
let a = f64x2::splat(0.0);
|
||||
let b = f64x2::splat(1.0);
|
||||
let r = sse41::_mm_blend_pd(a, b, 0b10);
|
||||
let e = f64x2::new(0.0, 1.0);
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test = "sse4.1"]
|
||||
unsafe fn _mm_blend_ps() {
|
||||
let a = f32x4::splat(0.0);
|
||||
let b = f32x4::splat(1.0);
|
||||
let r = sse41::_mm_blend_ps(a, b, 0b1010);
|
||||
let e = f32x4::new(0.0, 1.0, 0.0, 1.0);
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test = "sse4.1"]
|
||||
unsafe fn _mm_blend_epi16() {
|
||||
let a = i16x8::splat(0);
|
||||
let b = i16x8::splat(1);
|
||||
let r = sse41::_mm_blend_epi16(a, b, 0b1010_1100);
|
||||
let e = i16x8::new(0, 0, 1, 1, 0, 1, 0, 1);
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test = "sse4.1"]
|
||||
unsafe fn _mm_extract_ps() {
|
||||
let a = f32x4::new(0.0, 1.0, 2.0, 3.0);
|
||||
let r: f32 = mem::transmute(sse41::_mm_extract_ps(a, 1));
|
||||
assert_eq!(r, 1.0);
|
||||
let r: f32 = mem::transmute(sse41::_mm_extract_ps(a, 5));
|
||||
assert_eq!(r, 1.0);
|
||||
}
|
||||
|
||||
#[simd_test = "sse4.1"]
|
||||
unsafe fn _mm_extract_epi8() {
|
||||
let a = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
let r = sse41::_mm_extract_epi8(a, 1);
|
||||
assert_eq!(r, 1);
|
||||
let r = sse41::_mm_extract_epi8(a, 17);
|
||||
assert_eq!(r, 1);
|
||||
}
|
||||
|
||||
#[simd_test = "sse4.1"]
|
||||
unsafe fn _mm_extract_epi32() {
|
||||
let a = i32x4::new(0, 1, 2, 3);
|
||||
let r = sse41::_mm_extract_epi32(a, 1);
|
||||
assert_eq!(r, 1);
|
||||
let r = sse41::_mm_extract_epi32(a, 5);
|
||||
assert_eq!(r, 1);
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[simd_test = "sse4.1"]
|
||||
unsafe fn _mm_extract_epi64() {
|
||||
let a = i64x2::new(0, 1);
|
||||
let r = sse41::_mm_extract_epi64(a, 1);
|
||||
assert_eq!(r, 1);
|
||||
let r = sse41::_mm_extract_epi64(a, 3);
|
||||
assert_eq!(r, 1);
|
||||
}
|
||||
|
||||
#[simd_test = "sse4.1"]
|
||||
unsafe fn _mm_insert_ps() {
|
||||
let a = f32x4::splat(1.0);
|
||||
let b = f32x4::new(1.0, 2.0, 3.0, 4.0);
|
||||
let r = sse41::_mm_insert_ps(a, b, 0b11_00_1100);
|
||||
let e = f32x4::new(4.0, 1.0, 0.0, 0.0);
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test = "sse4.1"]
|
||||
unsafe fn _mm_insert_epi8() {
|
||||
let a = i8x16::splat(0);
|
||||
let e = i8x16::splat(0).replace(1, 32);
|
||||
let r = sse41::_mm_insert_epi8(a, 32, 1);
|
||||
assert_eq!(r, e);
|
||||
let r = sse41::_mm_insert_epi8(a, 32, 17);
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test = "sse4.1"]
|
||||
unsafe fn _mm_insert_epi32() {
|
||||
let a = i32x4::splat(0);
|
||||
let e = i32x4::splat(0).replace(1, 32);
|
||||
let r = sse41::_mm_insert_epi32(a, 32, 1);
|
||||
assert_eq!(r, e);
|
||||
let r = sse41::_mm_insert_epi32(a, 32, 5);
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[simd_test = "sse4.1"]
|
||||
unsafe fn _mm_insert_epi64() {
|
||||
let a = i64x2::splat(0);
|
||||
let e = i64x2::splat(0).replace(1, 32);
|
||||
let r = sse41::_mm_insert_epi64(a, 32, 1);
|
||||
assert_eq!(r, e);
|
||||
let r = sse41::_mm_insert_epi64(a, 32, 3);
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test = "sse4.1"]
|
||||
unsafe fn _mm_max_epi8() {
|
||||
let a = i8x16::new(1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29, 32);
|
||||
let b = i8x16::new(2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31);
|
||||
let r = sse41::_mm_max_epi8(a, b);
|
||||
let e = i8x16::new(2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32);
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test = "sse4.1"]
|
||||
unsafe fn _mm_max_epu16() {
|
||||
let a = u16x8::new(1, 4, 5, 8, 9, 12, 13, 16);
|
||||
let b = u16x8::new(2, 3, 6, 7, 10, 11, 14, 15);
|
||||
let r = sse41::_mm_max_epu16(a, b);
|
||||
let e = u16x8::new(2, 4, 6, 8, 10, 12, 14, 16);
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test = "sse4.1"]
|
||||
unsafe fn _mm_max_epi32() {
|
||||
let a = i32x4::new(1, 4, 5, 8);
|
||||
let b = i32x4::new(2, 3, 6, 7);
|
||||
let r = sse41::_mm_max_epi32(a, b);
|
||||
let e = i32x4::new(2, 4, 6, 8);
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test = "sse4.1"]
|
||||
unsafe fn _mm_max_epu32() {
|
||||
let a = u32x4::new(1, 4, 5, 8);
|
||||
let b = u32x4::new(2, 3, 6, 7);
|
||||
let r = sse41::_mm_max_epu32(a, b);
|
||||
let e = u32x4::new(2, 4, 6, 8);
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test = "sse4.1"]
|
||||
unsafe fn _mm_dp_pd() {
|
||||
let a = f64x2::new(2.0, 3.0);
|
||||
|
||||
Reference in New Issue
Block a user