mirror of
https://github.com/rust-lang/rust.git
synced 2026-05-29 20:46:07 +03:00
miri: implement some llvm.x86.sse.* intrinsics and add tests
Implements LLVM intrisics needed to run most SSE functions from `core::arch::x86{,_64}`.
Also adds miri tests for those functions (mostly copied from core_arch tests).
This commit is contained in:
@@ -19,6 +19,7 @@
|
||||
clippy::enum_variant_names,
|
||||
clippy::field_reassign_with_default,
|
||||
clippy::manual_map,
|
||||
clippy::neg_cmp_op_on_partial_ord,
|
||||
clippy::new_without_default,
|
||||
clippy::single_match,
|
||||
clippy::useless_format,
|
||||
|
||||
@@ -918,6 +918,33 @@ fn emulate_foreign_item_by_name(
|
||||
this.write_scalar(Scalar::from_f64(res), dest)?;
|
||||
}
|
||||
|
||||
"llvm.prefetch" => {
|
||||
let [p, rw, loc, ty] =
|
||||
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
|
||||
let _ = this.read_pointer(p)?;
|
||||
let rw = this.read_scalar(rw)?.to_i32()?;
|
||||
let loc = this.read_scalar(loc)?.to_i32()?;
|
||||
let ty = this.read_scalar(ty)?.to_i32()?;
|
||||
|
||||
if ty == 1 {
|
||||
// Data cache prefetch.
|
||||
// Notably, we do not have to check the pointer, this operation is never UB!
|
||||
|
||||
if !matches!(rw, 0 | 1) {
|
||||
throw_unsup_format!("invalid `rw` value passed to `llvm.prefetch`: {}", rw);
|
||||
}
|
||||
if !matches!(loc, 0..=3) {
|
||||
throw_unsup_format!(
|
||||
"invalid `loc` value passed to `llvm.prefetch`: {}",
|
||||
loc
|
||||
);
|
||||
}
|
||||
} else {
|
||||
throw_unsup_format!("unsupported `llvm.prefetch` type argument: {}", ty);
|
||||
}
|
||||
}
|
||||
|
||||
// Architecture-specific shims
|
||||
"llvm.x86.addcarry.64" if this.tcx.sess.target.arch == "x86_64" => {
|
||||
// Computes u8+u64+u64, returning tuple (u8,u64) comprising the output carry and truncated sum.
|
||||
@@ -970,6 +997,12 @@ fn emulate_foreign_item_by_name(
|
||||
}
|
||||
}
|
||||
|
||||
name if name.starts_with("llvm.x86.sse.") => {
|
||||
return shims::x86::sse::EvalContextExt::emulate_x86_sse_intrinsic(
|
||||
this, link_name, abi, args, dest,
|
||||
);
|
||||
}
|
||||
|
||||
// Platform-specific shims
|
||||
_ =>
|
||||
return match this.tcx.sess.target.os.as_ref() {
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
pub mod intrinsics;
|
||||
pub mod unix;
|
||||
pub mod windows;
|
||||
mod x86;
|
||||
|
||||
pub mod dlsym;
|
||||
pub mod env;
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
pub(super) mod sse;
|
||||
@@ -0,0 +1,609 @@
|
||||
use rustc_apfloat::{ieee::Single, Float as _};
|
||||
use rustc_middle::mir;
|
||||
use rustc_span::Symbol;
|
||||
use rustc_target::spec::abi::Abi;
|
||||
|
||||
use rand::Rng as _;
|
||||
|
||||
use crate::*;
|
||||
use shims::foreign_items::EmulateByNameResult;
|
||||
|
||||
impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {}
|
||||
pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
|
||||
fn emulate_x86_sse_intrinsic(
|
||||
&mut self,
|
||||
link_name: Symbol,
|
||||
abi: Abi,
|
||||
args: &[OpTy<'tcx, Provenance>],
|
||||
dest: &PlaceTy<'tcx, Provenance>,
|
||||
) -> InterpResult<'tcx, EmulateByNameResult<'mir, 'tcx>> {
|
||||
let this = self.eval_context_mut();
|
||||
// Prefix should have already been checked.
|
||||
let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.sse.").unwrap();
|
||||
// All these intrinsics operate on 128-bit (f32x4) SIMD vectors unless stated otherwise.
|
||||
// Many intrinsic names are sufixed with "ps" (packed single) or "ss" (scalar single),
|
||||
// where single means single precision floating point (f32). "ps" means thet the operation
|
||||
// is performed on each element of the vector, while "ss" means that the operation is
|
||||
// performed only on the first element, copying the remaining elements from the input
|
||||
// vector (for binary operations, from the left-hand side).
|
||||
match unprefixed_name {
|
||||
// Used to implement _mm_{add,sub,mul,div,min,max}_ss functions.
|
||||
// Performs the operations on the first component of `left` and
|
||||
// `right` and copies the remaining components from `left`.
|
||||
"add.ss" | "sub.ss" | "mul.ss" | "div.ss" | "min.ss" | "max.ss" => {
|
||||
let [left, right] =
|
||||
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
|
||||
let which = match unprefixed_name {
|
||||
"add.ss" => FloatBinOp::Arith(mir::BinOp::Add),
|
||||
"sub.ss" => FloatBinOp::Arith(mir::BinOp::Sub),
|
||||
"mul.ss" => FloatBinOp::Arith(mir::BinOp::Mul),
|
||||
"div.ss" => FloatBinOp::Arith(mir::BinOp::Div),
|
||||
"min.ss" => FloatBinOp::Min,
|
||||
"max.ss" => FloatBinOp::Max,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
bin_op_ss(this, which, left, right, dest)?;
|
||||
}
|
||||
// Used to implement _mm_min_ps and _mm_max_ps functions.
|
||||
// Note that the semantics are a bit different from Rust simd_min
|
||||
// and simd_max intrinsics regarding handling of NaN and -0.0: Rust
|
||||
// matches the IEEE min/max operations, while x86 has different
|
||||
// semantics.
|
||||
"min.ps" | "max.ps" => {
|
||||
let [left, right] =
|
||||
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
|
||||
let which = match unprefixed_name {
|
||||
"min.ps" => FloatBinOp::Min,
|
||||
"max.ps" => FloatBinOp::Max,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
bin_op_ps(this, which, left, right, dest)?;
|
||||
}
|
||||
// Used to implement _mm_{sqrt,rcp,rsqrt}_ss functions.
|
||||
// Performs the operations on the first component of `op` and
|
||||
// copies the remaining components from `op`.
|
||||
"sqrt.ss" | "rcp.ss" | "rsqrt.ss" => {
|
||||
let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
|
||||
let which = match unprefixed_name {
|
||||
"sqrt.ss" => FloatUnaryOp::Sqrt,
|
||||
"rcp.ss" => FloatUnaryOp::Rcp,
|
||||
"rsqrt.ss" => FloatUnaryOp::Rsqrt,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
unary_op_ss(this, which, op, dest)?;
|
||||
}
|
||||
// Used to implement _mm_{sqrt,rcp,rsqrt}_ss functions.
|
||||
// Performs the operations on all components of `op`.
|
||||
"sqrt.ps" | "rcp.ps" | "rsqrt.ps" => {
|
||||
let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
|
||||
let which = match unprefixed_name {
|
||||
"sqrt.ps" => FloatUnaryOp::Sqrt,
|
||||
"rcp.ps" => FloatUnaryOp::Rcp,
|
||||
"rsqrt.ps" => FloatUnaryOp::Rsqrt,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
unary_op_ps(this, which, op, dest)?;
|
||||
}
|
||||
// Used to implement the _mm_cmp_ss function.
|
||||
// Performs a comparison operation on the first component of `left`
|
||||
// and `right`, returning 0 if false or `u32::MAX` if true. The remaining
|
||||
// components are copied from `left`.
|
||||
"cmp.ss" => {
|
||||
let [left, right, imm] =
|
||||
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
|
||||
let which = match this.read_scalar(imm)?.to_i8()? {
|
||||
0 => FloatBinOp::Cmp(FloatCmpOp::Eq),
|
||||
1 => FloatBinOp::Cmp(FloatCmpOp::Lt),
|
||||
2 => FloatBinOp::Cmp(FloatCmpOp::Le),
|
||||
3 => FloatBinOp::Cmp(FloatCmpOp::Unord),
|
||||
4 => FloatBinOp::Cmp(FloatCmpOp::Neq),
|
||||
5 => FloatBinOp::Cmp(FloatCmpOp::Nlt),
|
||||
6 => FloatBinOp::Cmp(FloatCmpOp::Nle),
|
||||
7 => FloatBinOp::Cmp(FloatCmpOp::Ord),
|
||||
imm => {
|
||||
throw_unsup_format!(
|
||||
"invalid 3rd parameter of llvm.x86.sse.cmp.ps: {}",
|
||||
imm
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
bin_op_ss(this, which, left, right, dest)?;
|
||||
}
|
||||
// Used to implement the _mm_cmp_ps function.
|
||||
// Performs a comparison operation on each component of `left`
|
||||
// and `right`. For each component, returns 0 if false or u32::MAX
|
||||
// if true.
|
||||
"cmp.ps" => {
|
||||
let [left, right, imm] =
|
||||
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
|
||||
let which = match this.read_scalar(imm)?.to_i8()? {
|
||||
0 => FloatBinOp::Cmp(FloatCmpOp::Eq),
|
||||
1 => FloatBinOp::Cmp(FloatCmpOp::Lt),
|
||||
2 => FloatBinOp::Cmp(FloatCmpOp::Le),
|
||||
3 => FloatBinOp::Cmp(FloatCmpOp::Unord),
|
||||
4 => FloatBinOp::Cmp(FloatCmpOp::Neq),
|
||||
5 => FloatBinOp::Cmp(FloatCmpOp::Nlt),
|
||||
6 => FloatBinOp::Cmp(FloatCmpOp::Nle),
|
||||
7 => FloatBinOp::Cmp(FloatCmpOp::Ord),
|
||||
imm => {
|
||||
throw_unsup_format!(
|
||||
"invalid 3rd parameter of llvm.x86.sse.cmp.ps: {}",
|
||||
imm
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
bin_op_ps(this, which, left, right, dest)?;
|
||||
}
|
||||
// Used to implement _mm_{,u}comi{eq,lt,le,gt,ge,neq}_ps functions.
|
||||
// Compares the first component of `left` and `right` and returns
|
||||
// a scalar value (0 or 1).
|
||||
"comieq.ss" | "comilt.ss" | "comile.ss" | "comigt.ss" | "comige.ss" | "comineq.ss"
|
||||
| "ucomieq.ss" | "ucomilt.ss" | "ucomile.ss" | "ucomigt.ss" | "ucomige.ss"
|
||||
| "ucomineq.ss" => {
|
||||
let [left, right] =
|
||||
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
|
||||
let (left, left_len) = this.operand_to_simd(left)?;
|
||||
let (right, right_len) = this.operand_to_simd(right)?;
|
||||
|
||||
assert_eq!(left_len, right_len);
|
||||
|
||||
let left = this.read_scalar(&this.project_index(&left, 0)?)?.to_f32()?;
|
||||
let right = this.read_scalar(&this.project_index(&right, 0)?)?.to_f32()?;
|
||||
// The difference between the com* and *ucom variants is signaling
|
||||
// of exceptions when either argument is a quiet NaN. We do not
|
||||
// support accessing the SSE status register from miri (or from Rust,
|
||||
// for that matter), so we treat equally both variants.
|
||||
let res = match unprefixed_name {
|
||||
"comieq.ss" | "ucomieq.ss" => left == right,
|
||||
"comilt.ss" | "ucomilt.ss" => left < right,
|
||||
"comile.ss" | "ucomile.ss" => left <= right,
|
||||
"comigt.ss" | "ucomigt.ss" => left > right,
|
||||
"comige.ss" | "ucomige.ss" => left >= right,
|
||||
"comineq.ss" | "ucomineq.ss" => left != right,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
this.write_scalar(Scalar::from_i32(i32::from(res)), dest)?;
|
||||
}
|
||||
// Use to implement _mm_cvtss_si32 and _mm_cvttss_si32.
|
||||
// Converts the first component of `op` from f32 to i32.
|
||||
"cvtss2si" | "cvttss2si" => {
|
||||
let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
let (op, _) = this.operand_to_simd(op)?;
|
||||
|
||||
let op = this.read_scalar(&this.project_index(&op, 0)?)?.to_f32()?;
|
||||
|
||||
let rnd = match unprefixed_name {
|
||||
// "current SSE rounding mode", assume nearest
|
||||
// https://www.felixcloutier.com/x86/cvtss2si
|
||||
"cvtss2si" => rustc_apfloat::Round::NearestTiesToEven,
|
||||
// always truncate
|
||||
// https://www.felixcloutier.com/x86/cvttss2si
|
||||
"cvttss2si" => rustc_apfloat::Round::TowardZero,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let mut exact = false;
|
||||
let cvt = op.to_i128_r(32, rnd, &mut exact);
|
||||
let res = if cvt.status.intersects(
|
||||
rustc_apfloat::Status::INVALID_OP
|
||||
| rustc_apfloat::Status::OVERFLOW
|
||||
| rustc_apfloat::Status::UNDERFLOW,
|
||||
) {
|
||||
// Input is NaN (flagged with INVALID_OP) or does not fit
|
||||
// in an i32 (flagged with OVERFLOW or UNDERFLOW), fallback
|
||||
// to minimum acording to SSE semantics. The INEXACT flag
|
||||
// is ignored on purpose because rounding can happen during
|
||||
// float-to-int conversion.
|
||||
i32::MIN
|
||||
} else {
|
||||
i32::try_from(cvt.value).unwrap()
|
||||
};
|
||||
|
||||
this.write_scalar(Scalar::from_i32(res), dest)?;
|
||||
}
|
||||
// Use to implement _mm_cvtss_si64 and _mm_cvttss_si64.
|
||||
// Converts the first component of `op` from f32 to i64.
|
||||
"cvtss2si64" | "cvttss2si64" => {
|
||||
let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
let (op, _) = this.operand_to_simd(op)?;
|
||||
|
||||
let op = this.read_scalar(&this.project_index(&op, 0)?)?.to_f32()?;
|
||||
|
||||
let rnd = match unprefixed_name {
|
||||
// "current SSE rounding mode", assume nearest
|
||||
// https://www.felixcloutier.com/x86/cvtss2si
|
||||
"cvtss2si64" => rustc_apfloat::Round::NearestTiesToEven,
|
||||
// always truncate
|
||||
// https://www.felixcloutier.com/x86/cvttss2si
|
||||
"cvttss2si64" => rustc_apfloat::Round::TowardZero,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let mut exact = false;
|
||||
let cvt = op.to_i128_r(64, rnd, &mut exact);
|
||||
let res = if cvt.status.intersects(
|
||||
rustc_apfloat::Status::INVALID_OP
|
||||
| rustc_apfloat::Status::OVERFLOW
|
||||
| rustc_apfloat::Status::UNDERFLOW,
|
||||
) {
|
||||
// Input is NaN (flagged with INVALID_OP) or does not fit
|
||||
// in an i64 (flagged with OVERFLOW or UNDERFLOW), fallback
|
||||
// to minimum acording to SSE semantics. The INEXACT flag
|
||||
// is ignored on purpose because rounding can happen during
|
||||
// float-to-int conversion.
|
||||
i64::MIN
|
||||
} else {
|
||||
i64::try_from(cvt.value).unwrap()
|
||||
};
|
||||
|
||||
this.write_scalar(Scalar::from_i64(res), dest)?;
|
||||
}
|
||||
// Used to implement the _mm_cvtsi32_ss function.
|
||||
// Converts `right` from i32 to f32. Returns a SIMD vector with
|
||||
// the result in the first component and the remaining components
|
||||
// are copied from `left`.
|
||||
// https://www.felixcloutier.com/x86/cvtsi2ss
|
||||
"cvtsi2ss" => {
|
||||
let [left, right] =
|
||||
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
|
||||
let (left, left_len) = this.operand_to_simd(left)?;
|
||||
let (dest, dest_len) = this.place_to_simd(dest)?;
|
||||
|
||||
assert_eq!(dest_len, left_len);
|
||||
|
||||
let right = this.read_scalar(right)?.to_i32()?;
|
||||
|
||||
let res0 = Scalar::from_f32(Single::from_i128(right.into()).value);
|
||||
this.write_scalar(res0, &this.project_index(&dest, 0)?)?;
|
||||
|
||||
for i in 1..dest_len {
|
||||
let left = this.read_immediate(&this.project_index(&left, i)?)?;
|
||||
let dest = this.project_index(&dest, i)?;
|
||||
|
||||
this.write_immediate(*left, &dest)?;
|
||||
}
|
||||
}
|
||||
// Used to implement the _mm_cvtsi64_ss function.
|
||||
// Converts `right` from i64 to f32. Returns a SIMD vector with
|
||||
// the result in the first component and the remaining components
|
||||
// are copied from `left`.
|
||||
// https://www.felixcloutier.com/x86/cvtsi2ss
|
||||
"cvtsi642ss" => {
|
||||
let [left, right] =
|
||||
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
|
||||
let (left, left_len) = this.operand_to_simd(left)?;
|
||||
let (dest, dest_len) = this.place_to_simd(dest)?;
|
||||
|
||||
assert_eq!(dest_len, left_len);
|
||||
|
||||
let right = this.read_scalar(right)?.to_i64()?;
|
||||
|
||||
let res0 = Scalar::from_f32(Single::from_i128(right.into()).value);
|
||||
this.write_scalar(res0, &this.project_index(&dest, 0)?)?;
|
||||
|
||||
for i in 1..dest_len {
|
||||
let left = this.read_immediate(&this.project_index(&left, i)?)?;
|
||||
let dest = this.project_index(&dest, i)?;
|
||||
|
||||
this.write_immediate(*left, &dest)?;
|
||||
}
|
||||
}
|
||||
// Used to implement the _mm_movemask_ps function.
|
||||
// Returns a scalar integer where the i-th bit is the highest
|
||||
// bit of the i-th component of `op`.
|
||||
// https://www.felixcloutier.com/x86/movmskps
|
||||
"movmsk.ps" => {
|
||||
let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
|
||||
let (op, op_len) = this.operand_to_simd(op)?;
|
||||
|
||||
let mut res = 0;
|
||||
for i in 0..op_len {
|
||||
let op = this.read_scalar(&this.project_index(&op, i)?)?;
|
||||
let op = op.to_u32()?;
|
||||
|
||||
res |= (op >> 31) << i;
|
||||
}
|
||||
|
||||
this.write_scalar(Scalar::from_u32(res), dest)?;
|
||||
}
|
||||
_ => return Ok(EmulateByNameResult::NotSupported),
|
||||
}
|
||||
Ok(EmulateByNameResult::NeedsJumping)
|
||||
}
|
||||
}
|
||||
|
||||
/// Floating point comparison operation
|
||||
///
|
||||
/// <https://www.felixcloutier.com/x86/cmpss>
|
||||
/// <https://www.felixcloutier.com/x86/cmpps>
|
||||
#[derive(Copy, Clone)]
|
||||
enum FloatCmpOp {
|
||||
Eq,
|
||||
Lt,
|
||||
Le,
|
||||
Unord,
|
||||
Neq,
|
||||
/// Not less-than
|
||||
Nlt,
|
||||
/// Not less-or-equal
|
||||
Nle,
|
||||
/// Ordered, i.e. neither of them is NaN
|
||||
Ord,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
enum FloatBinOp {
|
||||
/// Arithmetic operation
|
||||
Arith(mir::BinOp),
|
||||
/// Comparison
|
||||
Cmp(FloatCmpOp),
|
||||
/// Minimum value (with SSE semantics)
|
||||
///
|
||||
/// <https://www.felixcloutier.com/x86/minss>
|
||||
/// <https://www.felixcloutier.com/x86/minps>
|
||||
Min,
|
||||
/// Maximum value (with SSE semantics)
|
||||
///
|
||||
/// <https://www.felixcloutier.com/x86/maxss>
|
||||
/// <https://www.felixcloutier.com/x86/maxps>
|
||||
Max,
|
||||
}
|
||||
|
||||
/// Performs `which` scalar operation on `left` and `right` and returns
|
||||
/// the result.
|
||||
fn bin_op_f32<'tcx>(
|
||||
this: &crate::MiriInterpCx<'_, 'tcx>,
|
||||
which: FloatBinOp,
|
||||
left: &ImmTy<'tcx, Provenance>,
|
||||
right: &ImmTy<'tcx, Provenance>,
|
||||
) -> InterpResult<'tcx, Scalar<Provenance>> {
|
||||
match which {
|
||||
FloatBinOp::Arith(which) => {
|
||||
let (res, _, _) = this.overflowing_binary_op(which, left, right)?;
|
||||
Ok(res)
|
||||
}
|
||||
FloatBinOp::Cmp(which) => {
|
||||
let left = left.to_scalar().to_f32()?;
|
||||
let right = right.to_scalar().to_f32()?;
|
||||
// FIXME: Make sure that these operations match the semantics of cmpps
|
||||
let res = match which {
|
||||
FloatCmpOp::Eq => left == right,
|
||||
FloatCmpOp::Lt => left < right,
|
||||
FloatCmpOp::Le => left <= right,
|
||||
FloatCmpOp::Unord => left.is_nan() || right.is_nan(),
|
||||
FloatCmpOp::Neq => left != right,
|
||||
FloatCmpOp::Nlt => !(left < right),
|
||||
FloatCmpOp::Nle => !(left <= right),
|
||||
FloatCmpOp::Ord => !left.is_nan() && !right.is_nan(),
|
||||
};
|
||||
Ok(Scalar::from_u32(if res { u32::MAX } else { 0 }))
|
||||
}
|
||||
FloatBinOp::Min => {
|
||||
let left = left.to_scalar().to_f32()?;
|
||||
let right = right.to_scalar().to_f32()?;
|
||||
// SSE semantics to handle zero and NaN. Note that `x == Single::ZERO`
|
||||
// is true when `x` is either +0 or -0.
|
||||
if (left == Single::ZERO && right == Single::ZERO)
|
||||
|| left.is_nan()
|
||||
|| right.is_nan()
|
||||
|| left >= right
|
||||
{
|
||||
Ok(Scalar::from_f32(right))
|
||||
} else {
|
||||
Ok(Scalar::from_f32(left))
|
||||
}
|
||||
}
|
||||
FloatBinOp::Max => {
|
||||
let left = left.to_scalar().to_f32()?;
|
||||
let right = right.to_scalar().to_f32()?;
|
||||
// SSE semantics to handle zero and NaN. Note that `x == Single::ZERO`
|
||||
// is true when `x` is either +0 or -0.
|
||||
if (left == Single::ZERO && right == Single::ZERO)
|
||||
|| left.is_nan()
|
||||
|| right.is_nan()
|
||||
|| left <= right
|
||||
{
|
||||
Ok(Scalar::from_f32(right))
|
||||
} else {
|
||||
Ok(Scalar::from_f32(left))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Performs `which` operation on the first component of `left` and `right`
|
||||
/// and copies the other components from `left`. The result is stored in `dest`.
|
||||
fn bin_op_ss<'tcx>(
|
||||
this: &mut crate::MiriInterpCx<'_, 'tcx>,
|
||||
which: FloatBinOp,
|
||||
left: &OpTy<'tcx, Provenance>,
|
||||
right: &OpTy<'tcx, Provenance>,
|
||||
dest: &PlaceTy<'tcx, Provenance>,
|
||||
) -> InterpResult<'tcx, ()> {
|
||||
let (left, left_len) = this.operand_to_simd(left)?;
|
||||
let (right, right_len) = this.operand_to_simd(right)?;
|
||||
let (dest, dest_len) = this.place_to_simd(dest)?;
|
||||
|
||||
assert_eq!(dest_len, left_len);
|
||||
assert_eq!(dest_len, right_len);
|
||||
|
||||
let res0 = bin_op_f32(
|
||||
this,
|
||||
which,
|
||||
&this.read_immediate(&this.project_index(&left, 0)?)?,
|
||||
&this.read_immediate(&this.project_index(&right, 0)?)?,
|
||||
)?;
|
||||
this.write_scalar(res0, &this.project_index(&dest, 0)?)?;
|
||||
|
||||
for i in 1..dest_len {
|
||||
let left = this.read_immediate(&this.project_index(&left, i)?)?;
|
||||
let dest = this.project_index(&dest, i)?;
|
||||
|
||||
this.write_immediate(*left, &dest)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Performs `which` operation on each component of `left`, and
|
||||
/// `right` storing the result is stored in `dest`.
|
||||
fn bin_op_ps<'tcx>(
|
||||
this: &mut crate::MiriInterpCx<'_, 'tcx>,
|
||||
which: FloatBinOp,
|
||||
left: &OpTy<'tcx, Provenance>,
|
||||
right: &OpTy<'tcx, Provenance>,
|
||||
dest: &PlaceTy<'tcx, Provenance>,
|
||||
) -> InterpResult<'tcx, ()> {
|
||||
let (left, left_len) = this.operand_to_simd(left)?;
|
||||
let (right, right_len) = this.operand_to_simd(right)?;
|
||||
let (dest, dest_len) = this.place_to_simd(dest)?;
|
||||
|
||||
assert_eq!(dest_len, left_len);
|
||||
assert_eq!(dest_len, right_len);
|
||||
|
||||
for i in 0..dest_len {
|
||||
let left = this.read_immediate(&this.project_index(&left, i)?)?;
|
||||
let right = this.read_immediate(&this.project_index(&right, i)?)?;
|
||||
let dest = this.project_index(&dest, i)?;
|
||||
|
||||
let res = bin_op_f32(this, which, &left, &right)?;
|
||||
this.write_scalar(res, &dest)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
enum FloatUnaryOp {
|
||||
/// sqrt(x)
|
||||
///
|
||||
/// <https://www.felixcloutier.com/x86/sqrtss>
|
||||
/// <https://www.felixcloutier.com/x86/sqrtps>
|
||||
Sqrt,
|
||||
/// Approximation of 1/x
|
||||
///
|
||||
/// <https://www.felixcloutier.com/x86/rcpss>
|
||||
/// <https://www.felixcloutier.com/x86/rcpps>
|
||||
Rcp,
|
||||
/// Approximation of 1/sqrt(x)
|
||||
///
|
||||
/// <https://www.felixcloutier.com/x86/rsqrtss>
|
||||
/// <https://www.felixcloutier.com/x86/rsqrtps>
|
||||
Rsqrt,
|
||||
}
|
||||
|
||||
/// Performs `which` scalar operation on `op` and returns the result.
|
||||
#[allow(clippy::arithmetic_side_effects)] // floating point operations without side effects
|
||||
fn unary_op_f32<'tcx>(
|
||||
this: &mut crate::MiriInterpCx<'_, 'tcx>,
|
||||
which: FloatUnaryOp,
|
||||
op: &ImmTy<'tcx, Provenance>,
|
||||
) -> InterpResult<'tcx, Scalar<Provenance>> {
|
||||
match which {
|
||||
FloatUnaryOp::Sqrt => {
|
||||
let op = op.to_scalar();
|
||||
// FIXME using host floats
|
||||
Ok(Scalar::from_u32(f32::from_bits(op.to_u32()?).sqrt().to_bits()))
|
||||
}
|
||||
FloatUnaryOp::Rcp => {
|
||||
let op = op.to_scalar().to_f32()?;
|
||||
let div = (Single::from_u128(1).value / op).value;
|
||||
// Apply a relative error with a magnitude on the order of 2^-12 to simulate the
|
||||
// inaccuracy of RCP.
|
||||
let res = apply_random_float_error(this, div, -12);
|
||||
Ok(Scalar::from_f32(res))
|
||||
}
|
||||
FloatUnaryOp::Rsqrt => {
|
||||
let op = op.to_scalar().to_u32()?;
|
||||
// FIXME using host floats
|
||||
let sqrt = Single::from_bits(f32::from_bits(op).sqrt().to_bits().into());
|
||||
let rsqrt = (Single::from_u128(1).value / sqrt).value;
|
||||
// Apply a relative error with a magnitude on the order of 2^-12 to simulate the
|
||||
// inaccuracy of RSQRT.
|
||||
let res = apply_random_float_error(this, rsqrt, -12);
|
||||
Ok(Scalar::from_f32(res))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Disturbes a floating-point result by a relative error on the order of (-2^scale, 2^scale).
|
||||
#[allow(clippy::arithmetic_side_effects)] // floating point arithmetic cannot panic
|
||||
fn apply_random_float_error<F: rustc_apfloat::Float>(
|
||||
this: &mut crate::MiriInterpCx<'_, '_>,
|
||||
val: F,
|
||||
err_scale: i32,
|
||||
) -> F {
|
||||
let rng = this.machine.rng.get_mut();
|
||||
// generates rand(0, 2^64) * 2^(scale - 64) = rand(0, 1) * 2^scale
|
||||
let err =
|
||||
F::from_u128(rng.gen::<u64>().into()).value.scalbn(err_scale.checked_sub(64).unwrap());
|
||||
// give it a random sign
|
||||
let err = if rng.gen::<bool>() { -err } else { err };
|
||||
// multiple the value with (1+err)
|
||||
(val * (F::from_u128(1).value + err).value).value
|
||||
}
|
||||
|
||||
/// Performs `which` operation on the first component of `op` and copies
|
||||
/// the other components. The result is stored in `dest`.
|
||||
fn unary_op_ss<'tcx>(
|
||||
this: &mut crate::MiriInterpCx<'_, 'tcx>,
|
||||
which: FloatUnaryOp,
|
||||
op: &OpTy<'tcx, Provenance>,
|
||||
dest: &PlaceTy<'tcx, Provenance>,
|
||||
) -> InterpResult<'tcx, ()> {
|
||||
let (op, op_len) = this.operand_to_simd(op)?;
|
||||
let (dest, dest_len) = this.place_to_simd(dest)?;
|
||||
|
||||
assert_eq!(dest_len, op_len);
|
||||
|
||||
let res0 = unary_op_f32(this, which, &this.read_immediate(&this.project_index(&op, 0)?)?)?;
|
||||
this.write_scalar(res0, &this.project_index(&dest, 0)?)?;
|
||||
|
||||
for i in 1..dest_len {
|
||||
let op = this.read_immediate(&this.project_index(&op, i)?)?;
|
||||
let dest = this.project_index(&dest, i)?;
|
||||
|
||||
this.write_immediate(*op, &dest)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Performs `which` operation on each component of `op`, storing the
|
||||
/// result is stored in `dest`.
|
||||
fn unary_op_ps<'tcx>(
|
||||
this: &mut crate::MiriInterpCx<'_, 'tcx>,
|
||||
which: FloatUnaryOp,
|
||||
op: &OpTy<'tcx, Provenance>,
|
||||
dest: &PlaceTy<'tcx, Provenance>,
|
||||
) -> InterpResult<'tcx, ()> {
|
||||
let (op, op_len) = this.operand_to_simd(op)?;
|
||||
let (dest, dest_len) = this.place_to_simd(dest)?;
|
||||
|
||||
assert_eq!(dest_len, op_len);
|
||||
|
||||
for i in 0..dest_len {
|
||||
let op = this.read_immediate(&this.project_index(&op, i)?)?;
|
||||
let dest = this.project_index(&dest, i)?;
|
||||
|
||||
let res = unary_op_f32(this, which, &op)?;
|
||||
this.write_scalar(res, &dest)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -0,0 +1,1075 @@
|
||||
//@only-target-x86_64
|
||||
|
||||
use std::arch::x86_64::*;
|
||||
use std::f32::NAN;
|
||||
use std::mem::transmute;
|
||||
|
||||
fn main() {
|
||||
assert!(is_x86_feature_detected!("sse"));
|
||||
|
||||
unsafe {
|
||||
test_sse();
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! assert_approx_eq {
|
||||
($a:expr, $b:expr, $eps:expr) => {{
|
||||
let (a, b) = (&$a, &$b);
|
||||
assert!(
|
||||
(*a - *b).abs() < $eps,
|
||||
"assertion failed: `(left !== right)` \
|
||||
(left: `{:?}`, right: `{:?}`, expect diff: `{:?}`, real diff: `{:?}`)",
|
||||
*a,
|
||||
*b,
|
||||
$eps,
|
||||
(*a - *b).abs()
|
||||
);
|
||||
}};
|
||||
}
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_sse() {
|
||||
// Mostly copied from library/stdarch/crates/core_arch/src/x86{,_64}/sse.rs
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn assert_eq_m128(a: __m128, b: __m128) {
|
||||
let r = _mm_cmpeq_ps(a, b);
|
||||
if _mm_movemask_ps(r) != 0b1111 {
|
||||
panic!("{:?} != {:?}", a, b);
|
||||
}
|
||||
}
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_add_ss() {
|
||||
let a = _mm_set_ps(-1.0, 5.0, 0.0, -10.0);
|
||||
let b = _mm_set_ps(-100.0, 20.0, 0.0, -5.0);
|
||||
let r = _mm_add_ss(a, b);
|
||||
assert_eq_m128(r, _mm_set_ps(-1.0, 5.0, 0.0, -15.0));
|
||||
}
|
||||
test_mm_add_ss();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_sub_ss() {
|
||||
let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
|
||||
let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
|
||||
let r = _mm_sub_ss(a, b);
|
||||
assert_eq_m128(r, _mm_setr_ps(99.0, 5.0, 0.0, -10.0));
|
||||
}
|
||||
test_mm_sub_ss();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_mul_ss() {
|
||||
let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
|
||||
let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
|
||||
let r = _mm_mul_ss(a, b);
|
||||
assert_eq_m128(r, _mm_setr_ps(100.0, 5.0, 0.0, -10.0));
|
||||
}
|
||||
test_mm_mul_ss();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_div_ss() {
|
||||
let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
|
||||
let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
|
||||
let r = _mm_div_ss(a, b);
|
||||
assert_eq_m128(r, _mm_setr_ps(0.01, 5.0, 0.0, -10.0));
|
||||
}
|
||||
test_mm_div_ss();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_sqrt_ss() {
|
||||
let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
|
||||
let r = _mm_sqrt_ss(a);
|
||||
let e = _mm_setr_ps(2.0, 13.0, 16.0, 100.0);
|
||||
assert_eq_m128(r, e);
|
||||
}
|
||||
test_mm_sqrt_ss();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_sqrt_ps() {
|
||||
let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
|
||||
let r = _mm_sqrt_ps(a);
|
||||
let e = _mm_setr_ps(2.0, 3.6055512, 4.0, 10.0);
|
||||
assert_eq_m128(r, e);
|
||||
}
|
||||
test_mm_sqrt_ps();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_rcp_ss() {
|
||||
let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
|
||||
let r = _mm_rcp_ss(a);
|
||||
let e = _mm_setr_ps(0.24993896, 13.0, 16.0, 100.0);
|
||||
let rel_err = 0.00048828125;
|
||||
|
||||
let r: [f32; 4] = transmute(r);
|
||||
let e: [f32; 4] = transmute(e);
|
||||
assert_approx_eq!(r[0], e[0], 2. * rel_err);
|
||||
for i in 1..4 {
|
||||
assert_eq!(r[i], e[i]);
|
||||
}
|
||||
}
|
||||
test_mm_rcp_ss();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_rcp_ps() {
|
||||
let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
|
||||
let r = _mm_rcp_ps(a);
|
||||
let e = _mm_setr_ps(0.24993896, 0.0769043, 0.06248474, 0.0099983215);
|
||||
let rel_err = 0.00048828125;
|
||||
|
||||
let r: [f32; 4] = transmute(r);
|
||||
let e: [f32; 4] = transmute(e);
|
||||
for i in 0..4 {
|
||||
assert_approx_eq!(r[i], e[i], 2. * rel_err);
|
||||
}
|
||||
}
|
||||
test_mm_rcp_ps();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_rsqrt_ss() {
|
||||
let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
|
||||
let r = _mm_rsqrt_ss(a);
|
||||
let e = _mm_setr_ps(0.49987793, 13.0, 16.0, 100.0);
|
||||
let rel_err = 0.00048828125;
|
||||
|
||||
let r: [f32; 4] = transmute(r);
|
||||
let e: [f32; 4] = transmute(e);
|
||||
assert_approx_eq!(r[0], e[0], 2. * rel_err);
|
||||
for i in 1..4 {
|
||||
assert_eq!(r[i], e[i]);
|
||||
}
|
||||
}
|
||||
test_mm_rsqrt_ss();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_rsqrt_ps() {
|
||||
let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
|
||||
let r = _mm_rsqrt_ps(a);
|
||||
let e = _mm_setr_ps(0.49987793, 0.2772827, 0.24993896, 0.099990845);
|
||||
let rel_err = 0.00048828125;
|
||||
|
||||
let r: [f32; 4] = transmute(r);
|
||||
let e: [f32; 4] = transmute(e);
|
||||
for i in 0..4 {
|
||||
assert_approx_eq!(r[i], e[i], 2. * rel_err);
|
||||
}
|
||||
}
|
||||
test_mm_rsqrt_ps();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_min_ss() {
|
||||
let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
|
||||
let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
|
||||
let r = _mm_min_ss(a, b);
|
||||
assert_eq_m128(r, _mm_setr_ps(-100.0, 5.0, 0.0, -10.0));
|
||||
}
|
||||
test_mm_min_ss();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_min_ps() {
|
||||
let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
|
||||
let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
|
||||
let r = _mm_min_ps(a, b);
|
||||
assert_eq_m128(r, _mm_setr_ps(-100.0, 5.0, 0.0, -10.0));
|
||||
|
||||
// `_mm_min_ps` can **not** be implemented using the `simd_min` rust intrinsic because
|
||||
// the semantics of `simd_min` are different to those of `_mm_min_ps` regarding handling
|
||||
// of `-0.0`.
|
||||
let a = _mm_setr_ps(-0.0, 0.0, 0.0, 0.0);
|
||||
let b = _mm_setr_ps(0.0, 0.0, 0.0, 0.0);
|
||||
let r1: [u8; 16] = transmute(_mm_min_ps(a, b));
|
||||
let r2: [u8; 16] = transmute(_mm_min_ps(b, a));
|
||||
let a: [u8; 16] = transmute(a);
|
||||
let b: [u8; 16] = transmute(b);
|
||||
assert_eq!(r1, b);
|
||||
assert_eq!(r2, a);
|
||||
assert_ne!(a, b); // sanity check that -0.0 is actually present
|
||||
}
|
||||
test_mm_min_ps();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_max_ss() {
|
||||
let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
|
||||
let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
|
||||
let r = _mm_max_ss(a, b);
|
||||
assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, -10.0));
|
||||
}
|
||||
test_mm_max_ss();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_max_ps() {
|
||||
let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
|
||||
let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
|
||||
let r = _mm_max_ps(a, b);
|
||||
assert_eq_m128(r, _mm_setr_ps(-1.0, 20.0, 0.0, -5.0));
|
||||
|
||||
// `_mm_max_ps` can **not** be implemented using the `simd_max` rust intrinsic because
|
||||
// the semantics of `simd_max` are different to those of `_mm_max_ps` regarding handling
|
||||
// of `-0.0`.
|
||||
let a = _mm_setr_ps(-0.0, 0.0, 0.0, 0.0);
|
||||
let b = _mm_setr_ps(0.0, 0.0, 0.0, 0.0);
|
||||
let r1: [u8; 16] = transmute(_mm_max_ps(a, b));
|
||||
let r2: [u8; 16] = transmute(_mm_max_ps(b, a));
|
||||
let a: [u8; 16] = transmute(a);
|
||||
let b: [u8; 16] = transmute(b);
|
||||
assert_eq!(r1, b);
|
||||
assert_eq!(r2, a);
|
||||
assert_ne!(a, b); // sanity check that -0.0 is actually present
|
||||
}
|
||||
test_mm_max_ps();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_cmpeq_ss() {
|
||||
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
|
||||
let b = _mm_setr_ps(-1.0, 5.0, 6.0, 7.0);
|
||||
let r: [u32; 4] = transmute(_mm_cmpeq_ss(a, b));
|
||||
let e: [u32; 4] = transmute(_mm_setr_ps(transmute(0u32), 2.0, 3.0, 4.0));
|
||||
assert_eq!(r, e);
|
||||
|
||||
let b2 = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
|
||||
let r2: [u32; 4] = transmute(_mm_cmpeq_ss(a, b2));
|
||||
let e2: [u32; 4] = transmute(_mm_setr_ps(transmute(0xffffffffu32), 2.0, 3.0, 4.0));
|
||||
assert_eq!(r2, e2);
|
||||
}
|
||||
test_mm_cmpeq_ss();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_cmplt_ss() {
|
||||
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
|
||||
let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
|
||||
let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
|
||||
let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
|
||||
|
||||
let b1 = 0u32; // a.extract(0) < b.extract(0)
|
||||
let c1 = 0u32; // a.extract(0) < c.extract(0)
|
||||
let d1 = !0u32; // a.extract(0) < d.extract(0)
|
||||
|
||||
let rb: [u32; 4] = transmute(_mm_cmplt_ss(a, b));
|
||||
let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
|
||||
assert_eq!(rb, eb);
|
||||
|
||||
let rc: [u32; 4] = transmute(_mm_cmplt_ss(a, c));
|
||||
let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
|
||||
assert_eq!(rc, ec);
|
||||
|
||||
let rd: [u32; 4] = transmute(_mm_cmplt_ss(a, d));
|
||||
let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
|
||||
assert_eq!(rd, ed);
|
||||
}
|
||||
test_mm_cmplt_ss();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_cmple_ss() {
|
||||
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
|
||||
let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
|
||||
let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
|
||||
let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
|
||||
|
||||
let b1 = 0u32; // a.extract(0) <= b.extract(0)
|
||||
let c1 = !0u32; // a.extract(0) <= c.extract(0)
|
||||
let d1 = !0u32; // a.extract(0) <= d.extract(0)
|
||||
|
||||
let rb: [u32; 4] = transmute(_mm_cmple_ss(a, b));
|
||||
let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
|
||||
assert_eq!(rb, eb);
|
||||
|
||||
let rc: [u32; 4] = transmute(_mm_cmple_ss(a, c));
|
||||
let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
|
||||
assert_eq!(rc, ec);
|
||||
|
||||
let rd: [u32; 4] = transmute(_mm_cmple_ss(a, d));
|
||||
let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
|
||||
assert_eq!(rd, ed);
|
||||
}
|
||||
test_mm_cmple_ss();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_cmpgt_ss() {
|
||||
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
|
||||
let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
|
||||
let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
|
||||
let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
|
||||
|
||||
let b1 = !0u32; // a.extract(0) > b.extract(0)
|
||||
let c1 = 0u32; // a.extract(0) > c.extract(0)
|
||||
let d1 = 0u32; // a.extract(0) > d.extract(0)
|
||||
|
||||
let rb: [u32; 4] = transmute(_mm_cmpgt_ss(a, b));
|
||||
let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
|
||||
assert_eq!(rb, eb);
|
||||
|
||||
let rc: [u32; 4] = transmute(_mm_cmpgt_ss(a, c));
|
||||
let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
|
||||
assert_eq!(rc, ec);
|
||||
|
||||
let rd: [u32; 4] = transmute(_mm_cmpgt_ss(a, d));
|
||||
let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
|
||||
assert_eq!(rd, ed);
|
||||
}
|
||||
test_mm_cmpgt_ss();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_cmpge_ss() {
|
||||
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
|
||||
let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
|
||||
let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
|
||||
let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
|
||||
|
||||
let b1 = !0u32; // a.extract(0) >= b.extract(0)
|
||||
let c1 = !0u32; // a.extract(0) >= c.extract(0)
|
||||
let d1 = 0u32; // a.extract(0) >= d.extract(0)
|
||||
|
||||
let rb: [u32; 4] = transmute(_mm_cmpge_ss(a, b));
|
||||
let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
|
||||
assert_eq!(rb, eb);
|
||||
|
||||
let rc: [u32; 4] = transmute(_mm_cmpge_ss(a, c));
|
||||
let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
|
||||
assert_eq!(rc, ec);
|
||||
|
||||
let rd: [u32; 4] = transmute(_mm_cmpge_ss(a, d));
|
||||
let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
|
||||
assert_eq!(rd, ed);
|
||||
}
|
||||
test_mm_cmpge_ss();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_cmpneq_ss() {
|
||||
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
|
||||
let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
|
||||
let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
|
||||
let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
|
||||
|
||||
let b1 = !0u32; // a.extract(0) != b.extract(0)
|
||||
let c1 = 0u32; // a.extract(0) != c.extract(0)
|
||||
let d1 = !0u32; // a.extract(0) != d.extract(0)
|
||||
|
||||
let rb: [u32; 4] = transmute(_mm_cmpneq_ss(a, b));
|
||||
let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
|
||||
assert_eq!(rb, eb);
|
||||
|
||||
let rc: [u32; 4] = transmute(_mm_cmpneq_ss(a, c));
|
||||
let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
|
||||
assert_eq!(rc, ec);
|
||||
|
||||
let rd: [u32; 4] = transmute(_mm_cmpneq_ss(a, d));
|
||||
let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
|
||||
assert_eq!(rd, ed);
|
||||
}
|
||||
test_mm_cmpneq_ss();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_cmpnlt_ss() {
|
||||
// TODO: this test is exactly the same as for `_mm_cmpge_ss`, but there
|
||||
// must be a difference. It may have to do with behavior in the
|
||||
// presence of NaNs (signaling or quiet). If so, we should add tests
|
||||
// for those.
|
||||
|
||||
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
|
||||
let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
|
||||
let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
|
||||
let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
|
||||
|
||||
let b1 = !0u32; // a.extract(0) >= b.extract(0)
|
||||
let c1 = !0u32; // a.extract(0) >= c.extract(0)
|
||||
let d1 = 0u32; // a.extract(0) >= d.extract(0)
|
||||
|
||||
let rb: [u32; 4] = transmute(_mm_cmpnlt_ss(a, b));
|
||||
let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
|
||||
assert_eq!(rb, eb);
|
||||
|
||||
let rc: [u32; 4] = transmute(_mm_cmpnlt_ss(a, c));
|
||||
let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
|
||||
assert_eq!(rc, ec);
|
||||
|
||||
let rd: [u32; 4] = transmute(_mm_cmpnlt_ss(a, d));
|
||||
let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
|
||||
assert_eq!(rd, ed);
|
||||
}
|
||||
test_mm_cmpnlt_ss();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_cmpnle_ss() {
|
||||
// TODO: this test is exactly the same as for `_mm_cmpgt_ss`, but there
|
||||
// must be a difference. It may have to do with behavior in the
|
||||
// presence
|
||||
// of NaNs (signaling or quiet). If so, we should add tests for those.
|
||||
|
||||
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
|
||||
let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
|
||||
let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
|
||||
let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
|
||||
|
||||
let b1 = !0u32; // a.extract(0) > b.extract(0)
|
||||
let c1 = 0u32; // a.extract(0) > c.extract(0)
|
||||
let d1 = 0u32; // a.extract(0) > d.extract(0)
|
||||
|
||||
let rb: [u32; 4] = transmute(_mm_cmpnle_ss(a, b));
|
||||
let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
|
||||
assert_eq!(rb, eb);
|
||||
|
||||
let rc: [u32; 4] = transmute(_mm_cmpnle_ss(a, c));
|
||||
let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
|
||||
assert_eq!(rc, ec);
|
||||
|
||||
let rd: [u32; 4] = transmute(_mm_cmpnle_ss(a, d));
|
||||
let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
|
||||
assert_eq!(rd, ed);
|
||||
}
|
||||
test_mm_cmpnle_ss();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_cmpngt_ss() {
|
||||
// TODO: this test is exactly the same as for `_mm_cmple_ss`, but there
|
||||
// must be a difference. It may have to do with behavior in the
|
||||
// presence of NaNs (signaling or quiet). If so, we should add tests
|
||||
// for those.
|
||||
|
||||
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
|
||||
let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
|
||||
let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
|
||||
let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
|
||||
|
||||
let b1 = 0u32; // a.extract(0) <= b.extract(0)
|
||||
let c1 = !0u32; // a.extract(0) <= c.extract(0)
|
||||
let d1 = !0u32; // a.extract(0) <= d.extract(0)
|
||||
|
||||
let rb: [u32; 4] = transmute(_mm_cmpngt_ss(a, b));
|
||||
let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
|
||||
assert_eq!(rb, eb);
|
||||
|
||||
let rc: [u32; 4] = transmute(_mm_cmpngt_ss(a, c));
|
||||
let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
|
||||
assert_eq!(rc, ec);
|
||||
|
||||
let rd: [u32; 4] = transmute(_mm_cmpngt_ss(a, d));
|
||||
let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
|
||||
assert_eq!(rd, ed);
|
||||
}
|
||||
test_mm_cmpngt_ss();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_cmpnge_ss() {
|
||||
// TODO: this test is exactly the same as for `_mm_cmplt_ss`, but there
|
||||
// must be a difference. It may have to do with behavior in the
|
||||
// presence of NaNs (signaling or quiet). If so, we should add tests
|
||||
// for those.
|
||||
|
||||
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
|
||||
let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
|
||||
let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
|
||||
let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
|
||||
|
||||
let b1 = 0u32; // a.extract(0) < b.extract(0)
|
||||
let c1 = 0u32; // a.extract(0) < c.extract(0)
|
||||
let d1 = !0u32; // a.extract(0) < d.extract(0)
|
||||
|
||||
let rb: [u32; 4] = transmute(_mm_cmpnge_ss(a, b));
|
||||
let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
|
||||
assert_eq!(rb, eb);
|
||||
|
||||
let rc: [u32; 4] = transmute(_mm_cmpnge_ss(a, c));
|
||||
let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
|
||||
assert_eq!(rc, ec);
|
||||
|
||||
let rd: [u32; 4] = transmute(_mm_cmpnge_ss(a, d));
|
||||
let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
|
||||
assert_eq!(rd, ed);
|
||||
}
|
||||
test_mm_cmpnge_ss();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_cmpord_ss() {
|
||||
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
|
||||
let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
|
||||
let c = _mm_setr_ps(NAN, 5.0, 6.0, 7.0);
|
||||
let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
|
||||
|
||||
let b1 = !0u32; // a.extract(0) ord b.extract(0)
|
||||
let c1 = 0u32; // a.extract(0) ord c.extract(0)
|
||||
let d1 = !0u32; // a.extract(0) ord d.extract(0)
|
||||
|
||||
let rb: [u32; 4] = transmute(_mm_cmpord_ss(a, b));
|
||||
let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
|
||||
assert_eq!(rb, eb);
|
||||
|
||||
let rc: [u32; 4] = transmute(_mm_cmpord_ss(a, c));
|
||||
let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
|
||||
assert_eq!(rc, ec);
|
||||
|
||||
let rd: [u32; 4] = transmute(_mm_cmpord_ss(a, d));
|
||||
let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
|
||||
assert_eq!(rd, ed);
|
||||
}
|
||||
test_mm_cmpord_ss();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_cmpunord_ss() {
|
||||
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
|
||||
let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
|
||||
let c = _mm_setr_ps(NAN, 5.0, 6.0, 7.0);
|
||||
let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0);
|
||||
|
||||
let b1 = 0u32; // a.extract(0) unord b.extract(0)
|
||||
let c1 = !0u32; // a.extract(0) unord c.extract(0)
|
||||
let d1 = 0u32; // a.extract(0) unord d.extract(0)
|
||||
|
||||
let rb: [u32; 4] = transmute(_mm_cmpunord_ss(a, b));
|
||||
let eb: [u32; 4] = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0));
|
||||
assert_eq!(rb, eb);
|
||||
|
||||
let rc: [u32; 4] = transmute(_mm_cmpunord_ss(a, c));
|
||||
let ec: [u32; 4] = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0));
|
||||
assert_eq!(rc, ec);
|
||||
|
||||
let rd: [u32; 4] = transmute(_mm_cmpunord_ss(a, d));
|
||||
let ed: [u32; 4] = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0));
|
||||
assert_eq!(rd, ed);
|
||||
}
|
||||
test_mm_cmpunord_ss();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_cmpeq_ps() {
|
||||
let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
|
||||
let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
|
||||
let tru = !0u32;
|
||||
let fls = 0u32;
|
||||
|
||||
let e = [fls, fls, tru, fls];
|
||||
let r: [u32; 4] = transmute(_mm_cmpeq_ps(a, b));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
test_mm_cmpeq_ps();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_cmplt_ps() {
|
||||
let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
|
||||
let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
|
||||
let tru = !0u32;
|
||||
let fls = 0u32;
|
||||
|
||||
let e = [tru, fls, fls, fls];
|
||||
let r: [u32; 4] = transmute(_mm_cmplt_ps(a, b));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
test_mm_cmplt_ps();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_cmple_ps() {
|
||||
let a = _mm_setr_ps(10.0, 50.0, 1.0, 4.0);
|
||||
let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
|
||||
let tru = !0u32;
|
||||
let fls = 0u32;
|
||||
|
||||
let e = [tru, fls, tru, fls];
|
||||
let r: [u32; 4] = transmute(_mm_cmple_ps(a, b));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
test_mm_cmple_ps();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_cmpgt_ps() {
|
||||
let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
|
||||
let b = _mm_setr_ps(15.0, 20.0, 1.0, 42.0);
|
||||
let tru = !0u32;
|
||||
let fls = 0u32;
|
||||
|
||||
let e = [fls, tru, fls, fls];
|
||||
let r: [u32; 4] = transmute(_mm_cmpgt_ps(a, b));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
test_mm_cmpgt_ps();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_cmpge_ps() {
|
||||
let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
|
||||
let b = _mm_setr_ps(15.0, 20.0, 1.0, 42.0);
|
||||
let tru = !0u32;
|
||||
let fls = 0u32;
|
||||
|
||||
let e = [fls, tru, tru, fls];
|
||||
let r: [u32; 4] = transmute(_mm_cmpge_ps(a, b));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
test_mm_cmpge_ps();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_cmpneq_ps() {
|
||||
let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
|
||||
let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
|
||||
let tru = !0u32;
|
||||
let fls = 0u32;
|
||||
|
||||
let e = [tru, tru, fls, tru];
|
||||
let r: [u32; 4] = transmute(_mm_cmpneq_ps(a, b));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
test_mm_cmpneq_ps();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_cmpnlt_ps() {
|
||||
let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
|
||||
let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
|
||||
let tru = !0u32;
|
||||
let fls = 0u32;
|
||||
|
||||
let e = [fls, tru, tru, tru];
|
||||
let r: [u32; 4] = transmute(_mm_cmpnlt_ps(a, b));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
test_mm_cmpnlt_ps();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_cmpnle_ps() {
|
||||
let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
|
||||
let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
|
||||
let tru = !0u32;
|
||||
let fls = 0u32;
|
||||
|
||||
let e = [fls, tru, fls, tru];
|
||||
let r: [u32; 4] = transmute(_mm_cmpnle_ps(a, b));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
test_mm_cmpnle_ps();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_cmpngt_ps() {
|
||||
let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
|
||||
let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
|
||||
let tru = !0u32;
|
||||
let fls = 0u32;
|
||||
|
||||
let e = [tru, fls, tru, tru];
|
||||
let r: [u32; 4] = transmute(_mm_cmpngt_ps(a, b));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
test_mm_cmpngt_ps();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_cmpnge_ps() {
|
||||
let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
|
||||
let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
|
||||
let tru = !0u32;
|
||||
let fls = 0u32;
|
||||
|
||||
let e = [tru, fls, fls, tru];
|
||||
let r: [u32; 4] = transmute(_mm_cmpnge_ps(a, b));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
test_mm_cmpnge_ps();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_cmpord_ps() {
|
||||
let a = _mm_setr_ps(10.0, 50.0, NAN, NAN);
|
||||
let b = _mm_setr_ps(15.0, NAN, 1.0, NAN);
|
||||
let tru = !0u32;
|
||||
let fls = 0u32;
|
||||
|
||||
let e = [tru, fls, fls, fls];
|
||||
let r: [u32; 4] = transmute(_mm_cmpord_ps(a, b));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
test_mm_cmpord_ps();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_cmpunord_ps() {
|
||||
let a = _mm_setr_ps(10.0, 50.0, NAN, NAN);
|
||||
let b = _mm_setr_ps(15.0, NAN, 1.0, NAN);
|
||||
let tru = !0u32;
|
||||
let fls = 0u32;
|
||||
|
||||
let e = [fls, tru, tru, tru];
|
||||
let r: [u32; 4] = transmute(_mm_cmpunord_ps(a, b));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
test_mm_cmpunord_ps();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_comieq_ss() {
|
||||
let aa = &[3.0f32, 12.0, 23.0, NAN];
|
||||
let bb = &[3.0f32, 47.5, 1.5, NAN];
|
||||
|
||||
let ee = &[1i32, 0, 0, 0];
|
||||
|
||||
for i in 0..4 {
|
||||
let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
|
||||
let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
|
||||
|
||||
let r = _mm_comieq_ss(a, b);
|
||||
|
||||
assert_eq!(
|
||||
ee[i], r,
|
||||
"_mm_comieq_ss({:?}, {:?}) = {}, expected: {} (i={})",
|
||||
a, b, r, ee[i], i
|
||||
);
|
||||
}
|
||||
}
|
||||
test_mm_comieq_ss();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_comilt_ss() {
|
||||
let aa = &[3.0f32, 12.0, 23.0, NAN];
|
||||
let bb = &[3.0f32, 47.5, 1.5, NAN];
|
||||
|
||||
let ee = &[0i32, 1, 0, 0];
|
||||
|
||||
for i in 0..4 {
|
||||
let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
|
||||
let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
|
||||
|
||||
let r = _mm_comilt_ss(a, b);
|
||||
|
||||
assert_eq!(
|
||||
ee[i], r,
|
||||
"_mm_comilt_ss({:?}, {:?}) = {}, expected: {} (i={})",
|
||||
a, b, r, ee[i], i
|
||||
);
|
||||
}
|
||||
}
|
||||
test_mm_comilt_ss();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_comile_ss() {
|
||||
let aa = &[3.0f32, 12.0, 23.0, NAN];
|
||||
let bb = &[3.0f32, 47.5, 1.5, NAN];
|
||||
|
||||
let ee = &[1i32, 1, 0, 0];
|
||||
|
||||
for i in 0..4 {
|
||||
let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
|
||||
let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
|
||||
|
||||
let r = _mm_comile_ss(a, b);
|
||||
|
||||
assert_eq!(
|
||||
ee[i], r,
|
||||
"_mm_comile_ss({:?}, {:?}) = {}, expected: {} (i={})",
|
||||
a, b, r, ee[i], i
|
||||
);
|
||||
}
|
||||
}
|
||||
test_mm_comile_ss();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_comigt_ss() {
|
||||
let aa = &[3.0f32, 12.0, 23.0, NAN];
|
||||
let bb = &[3.0f32, 47.5, 1.5, NAN];
|
||||
|
||||
let ee = &[1i32, 0, 1, 0];
|
||||
|
||||
for i in 0..4 {
|
||||
let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
|
||||
let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
|
||||
|
||||
let r = _mm_comige_ss(a, b);
|
||||
|
||||
assert_eq!(
|
||||
ee[i], r,
|
||||
"_mm_comige_ss({:?}, {:?}) = {}, expected: {} (i={})",
|
||||
a, b, r, ee[i], i
|
||||
);
|
||||
}
|
||||
}
|
||||
test_mm_comigt_ss();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_comineq_ss() {
|
||||
let aa = &[3.0f32, 12.0, 23.0, NAN];
|
||||
let bb = &[3.0f32, 47.5, 1.5, NAN];
|
||||
|
||||
let ee = &[0i32, 1, 1, 1];
|
||||
|
||||
for i in 0..4 {
|
||||
let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
|
||||
let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
|
||||
|
||||
let r = _mm_comineq_ss(a, b);
|
||||
|
||||
assert_eq!(
|
||||
ee[i], r,
|
||||
"_mm_comineq_ss({:?}, {:?}) = {}, expected: {} (i={})",
|
||||
a, b, r, ee[i], i
|
||||
);
|
||||
}
|
||||
}
|
||||
test_mm_comineq_ss();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_ucomieq_ss() {
|
||||
let aa = &[3.0f32, 12.0, 23.0, NAN];
|
||||
let bb = &[3.0f32, 47.5, 1.5, NAN];
|
||||
|
||||
let ee = &[1i32, 0, 0, 0];
|
||||
|
||||
for i in 0..4 {
|
||||
let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
|
||||
let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
|
||||
|
||||
let r = _mm_ucomieq_ss(a, b);
|
||||
|
||||
assert_eq!(
|
||||
ee[i], r,
|
||||
"_mm_ucomieq_ss({:?}, {:?}) = {}, expected: {} (i={})",
|
||||
a, b, r, ee[i], i
|
||||
);
|
||||
}
|
||||
}
|
||||
test_mm_ucomieq_ss();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_ucomilt_ss() {
|
||||
let aa = &[3.0f32, 12.0, 23.0, NAN];
|
||||
let bb = &[3.0f32, 47.5, 1.5, NAN];
|
||||
|
||||
let ee = &[0i32, 1, 0, 0];
|
||||
|
||||
for i in 0..4 {
|
||||
let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
|
||||
let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
|
||||
|
||||
let r = _mm_ucomilt_ss(a, b);
|
||||
|
||||
assert_eq!(
|
||||
ee[i], r,
|
||||
"_mm_ucomilt_ss({:?}, {:?}) = {}, expected: {} (i={})",
|
||||
a, b, r, ee[i], i
|
||||
);
|
||||
}
|
||||
}
|
||||
test_mm_ucomilt_ss();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_ucomile_ss() {
|
||||
let aa = &[3.0f32, 12.0, 23.0, NAN];
|
||||
let bb = &[3.0f32, 47.5, 1.5, NAN];
|
||||
|
||||
let ee = &[1i32, 1, 0, 0];
|
||||
|
||||
for i in 0..4 {
|
||||
let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
|
||||
let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
|
||||
|
||||
let r = _mm_ucomile_ss(a, b);
|
||||
|
||||
assert_eq!(
|
||||
ee[i], r,
|
||||
"_mm_ucomile_ss({:?}, {:?}) = {}, expected: {} (i={})",
|
||||
a, b, r, ee[i], i
|
||||
);
|
||||
}
|
||||
}
|
||||
test_mm_ucomile_ss();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_ucomigt_ss() {
|
||||
let aa = &[3.0f32, 12.0, 23.0, NAN];
|
||||
let bb = &[3.0f32, 47.5, 1.5, NAN];
|
||||
|
||||
let ee = &[0i32, 0, 1, 0];
|
||||
|
||||
for i in 0..4 {
|
||||
let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
|
||||
let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
|
||||
|
||||
let r = _mm_ucomigt_ss(a, b);
|
||||
|
||||
assert_eq!(
|
||||
ee[i], r,
|
||||
"_mm_ucomigt_ss({:?}, {:?}) = {}, expected: {} (i={})",
|
||||
a, b, r, ee[i], i
|
||||
);
|
||||
}
|
||||
}
|
||||
test_mm_ucomigt_ss();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_ucomige_ss() {
|
||||
let aa = &[3.0f32, 12.0, 23.0, NAN];
|
||||
let bb = &[3.0f32, 47.5, 1.5, NAN];
|
||||
|
||||
let ee = &[1i32, 0, 1, 0];
|
||||
|
||||
for i in 0..4 {
|
||||
let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
|
||||
let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
|
||||
|
||||
let r = _mm_ucomige_ss(a, b);
|
||||
|
||||
assert_eq!(
|
||||
ee[i], r,
|
||||
"_mm_ucomige_ss({:?}, {:?}) = {}, expected: {} (i={})",
|
||||
a, b, r, ee[i], i
|
||||
);
|
||||
}
|
||||
}
|
||||
test_mm_ucomige_ss();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_ucomineq_ss() {
|
||||
let aa = &[3.0f32, 12.0, 23.0, NAN];
|
||||
let bb = &[3.0f32, 47.5, 1.5, NAN];
|
||||
|
||||
let ee = &[0i32, 1, 1, 1];
|
||||
|
||||
for i in 0..4 {
|
||||
let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
|
||||
let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
|
||||
|
||||
let r = _mm_ucomineq_ss(a, b);
|
||||
|
||||
assert_eq!(
|
||||
ee[i], r,
|
||||
"_mm_ucomineq_ss({:?}, {:?}) = {}, expected: {} (i={})",
|
||||
a, b, r, ee[i], i
|
||||
);
|
||||
}
|
||||
}
|
||||
test_mm_ucomineq_ss();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_cvtss_si32() {
|
||||
let inputs = &[42.0f32, -3.1, 4.0e10, 4.0e-20, NAN, 2147483500.1];
|
||||
let result = &[42i32, -3, i32::MIN, 0, i32::MIN, 2147483520];
|
||||
for i in 0..inputs.len() {
|
||||
let x = _mm_setr_ps(inputs[i], 1.0, 3.0, 4.0);
|
||||
let e = result[i];
|
||||
let r = _mm_cvtss_si32(x);
|
||||
assert_eq!(e, r, "TestCase #{} _mm_cvtss_si32({:?}) = {}, expected: {}", i, x, r, e);
|
||||
}
|
||||
}
|
||||
test_mm_cvtss_si32();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_cvttss_si32() {
|
||||
let inputs = &[
|
||||
(42.0f32, 42i32),
|
||||
(-31.4, -31),
|
||||
(-33.5, -33),
|
||||
(-34.5, -34),
|
||||
(10.999, 10),
|
||||
(-5.99, -5),
|
||||
(4.0e10, i32::MIN),
|
||||
(4.0e-10, 0),
|
||||
(NAN, i32::MIN),
|
||||
(2147483500.1, 2147483520),
|
||||
];
|
||||
for i in 0..inputs.len() {
|
||||
let (xi, e) = inputs[i];
|
||||
let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0);
|
||||
let r = _mm_cvttss_si32(x);
|
||||
assert_eq!(e, r, "TestCase #{} _mm_cvttss_si32({:?}) = {}, expected: {}", i, x, r, e);
|
||||
}
|
||||
}
|
||||
test_mm_cvttss_si32();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_cvtss_f32() {
|
||||
let a = _mm_setr_ps(312.0134, 5.0, 6.0, 7.0);
|
||||
assert_eq!(_mm_cvtss_f32(a), 312.0134);
|
||||
}
|
||||
test_mm_cvtss_f32();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_cvtsi32_ss() {
|
||||
let inputs = &[
|
||||
(4555i32, 4555.0f32),
|
||||
(322223333, 322223330.0),
|
||||
(-432, -432.0),
|
||||
(-322223333, -322223330.0),
|
||||
];
|
||||
|
||||
for i in 0..inputs.len() {
|
||||
let (x, f) = inputs[i];
|
||||
let a = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
|
||||
let r = _mm_cvtsi32_ss(a, x);
|
||||
let e = _mm_setr_ps(f, 6.0, 7.0, 8.0);
|
||||
assert_eq_m128(e, r);
|
||||
}
|
||||
}
|
||||
test_mm_cvtsi32_ss();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_cvtss_si64() {
|
||||
let inputs = &[
|
||||
(42.0f32, 42i64),
|
||||
(-31.4, -31),
|
||||
(-33.5, -34),
|
||||
(-34.5, -34),
|
||||
(4.0e10, 40_000_000_000),
|
||||
(4.0e-10, 0),
|
||||
(f32::NAN, i64::MIN),
|
||||
(2147483500.1, 2147483520),
|
||||
(9.223371e18, 9223370937343148032),
|
||||
];
|
||||
for i in 0..inputs.len() {
|
||||
let (xi, e) = inputs[i];
|
||||
let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0);
|
||||
let r = _mm_cvtss_si64(x);
|
||||
assert_eq!(e, r, "TestCase #{} _mm_cvtss_si64({:?}) = {}, expected: {}", i, x, r, e);
|
||||
}
|
||||
}
|
||||
test_mm_cvtss_si64();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_cvttss_si64() {
|
||||
let inputs = &[
|
||||
(42.0f32, 42i64),
|
||||
(-31.4, -31),
|
||||
(-33.5, -33),
|
||||
(-34.5, -34),
|
||||
(10.999, 10),
|
||||
(-5.99, -5),
|
||||
(4.0e10, 40_000_000_000),
|
||||
(4.0e-10, 0),
|
||||
(f32::NAN, i64::MIN),
|
||||
(2147483500.1, 2147483520),
|
||||
(9.223371e18, 9223370937343148032),
|
||||
(9.223372e18, i64::MIN),
|
||||
];
|
||||
for i in 0..inputs.len() {
|
||||
let (xi, e) = inputs[i];
|
||||
let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0);
|
||||
let r = _mm_cvttss_si64(x);
|
||||
assert_eq!(e, r, "TestCase #{} _mm_cvttss_si64({:?}) = {}, expected: {}", i, x, r, e);
|
||||
}
|
||||
}
|
||||
test_mm_cvttss_si64();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_cvtsi64_ss() {
|
||||
let inputs = &[
|
||||
(4555i64, 4555.0f32),
|
||||
(322223333, 322223330.0),
|
||||
(-432, -432.0),
|
||||
(-322223333, -322223330.0),
|
||||
(9223372036854775807, 9.223372e18),
|
||||
(-9223372036854775808, -9.223372e18),
|
||||
];
|
||||
|
||||
for i in 0..inputs.len() {
|
||||
let (x, f) = inputs[i];
|
||||
let a = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
|
||||
let r = _mm_cvtsi64_ss(a, x);
|
||||
let e = _mm_setr_ps(f, 6.0, 7.0, 8.0);
|
||||
assert_eq_m128(e, r);
|
||||
}
|
||||
}
|
||||
test_mm_cvtsi64_ss();
|
||||
|
||||
#[target_feature(enable = "sse")]
|
||||
unsafe fn test_mm_movemask_ps() {
|
||||
let r = _mm_movemask_ps(_mm_setr_ps(-1.0, 5.0, -5.0, 0.0));
|
||||
assert_eq!(r, 0b0101);
|
||||
|
||||
let r = _mm_movemask_ps(_mm_setr_ps(-1.0, -5.0, -5.0, 0.0));
|
||||
assert_eq!(r, 0b0111);
|
||||
}
|
||||
test_mm_movemask_ps();
|
||||
|
||||
let x = 0i8;
|
||||
_mm_prefetch(&x, _MM_HINT_T0);
|
||||
_mm_prefetch(&x, _MM_HINT_T1);
|
||||
_mm_prefetch(&x, _MM_HINT_T2);
|
||||
_mm_prefetch(&x, _MM_HINT_NTA);
|
||||
_mm_prefetch(&x, _MM_HINT_ET0);
|
||||
_mm_prefetch(&x, _MM_HINT_ET1);
|
||||
}
|
||||
Reference in New Issue
Block a user