[neon] reciprocal square-root estimate (#121)

This commit is contained in:
gnzlbg
2017-10-18 20:16:34 +02:00
committed by Alex Crichton
parent 13bc6b8517
commit 2dc965b69a
3 changed files with 54 additions and 0 deletions
+11
View File
@@ -9,5 +9,16 @@ export RUSTFLAGS="$RUSTFLAGS -C codegen-units=1"
# having only one thread increases debuggability to be worth it.
export RUST_TEST_THREADS=1
# FIXME(rust-lang-nursery/stdsimd#120) run-time feature detection for ARM Neon
case ${TARGET} in
aarch*)
export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+neon"
;;
*)
;;
esac
echo "RUSTFLAGS=${RUSTFLAGS}"
cargo test --target $TARGET
cargo test --release --target $TARGET
+5
View File
@@ -3,8 +3,13 @@
pub use self::v7::*;
#[cfg(target_arch = "aarch64")]
pub use self::v8::*;
#[cfg(target_feature = "neon")]
pub use self::neon::*;
mod v6;
mod v7;
#[cfg(target_arch = "aarch64")]
mod v8;
#[cfg(target_feature = "neon")]
mod neon;
+38
View File
@@ -0,0 +1,38 @@
//! ARM NEON intrinsics
//!
//! The references is [ARM's NEON Intrinsics Reference](http://infocenter.arm.com/help/topic/com.arm.doc.ihi0073a/IHI0073A_arm_neon_intrinsics_ref.pdf). [ARM's NEON Intrinsics Online Database](https://developer.arm.com/technologies/neon/intrinsics) is also useful.
#[cfg(test)]
use stdsimd_test::assert_instr;
use v64::{f32x2};
#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.aarch64.neon.frsqrte.v2f32"]
fn frsqrte_v2f32(a: f32x2) -> f32x2;
}
/// Reciprocal square-root estimate.
#[inline(always)]
#[target_feature = "+neon"]
#[cfg_attr(test, assert_instr(frsqrte))]
pub unsafe fn vrsqrte_f32(a: f32x2) -> f32x2 {
frsqrte_v2f32(a)
}
#[cfg(test)]
mod tests {
use stdsimd_test::simd_test;
use v64::{f32x2};
use arm::neon;
#[test]
fn vrsqrt_f32() {
let a = f32x2::new(1.0, 2.0);
let e = f32x2::new(0.9980469, 0.7050781);
let r = unsafe { neon::vrsqrte_f32(a) };
assert_eq!(r, e);
}
}