* avx: _mm256_zextps128_ps256

* avx: _mm256_zextpd128_pd256

* avx: _mm256_set_m128

* avx: _mm256_set_m128d

* avx: _mm256_castpd_ps

* avx: _mm256_castps_pd

* avx: _mm256_castps_si256

* avx: _mm256_castsi256_ps

* avx: _mm256_zextsi128_si256

* avx: _mm256_set_m128i
This commit is contained in:
gwenn
2017-10-17 00:14:09 +02:00
committed by Alex Crichton
parent 3286bbbab7
commit 19e7d0ed3e
+178 -1
View File
@@ -7,6 +7,7 @@
use simd_llvm::{simd_cast, simd_shuffle2, simd_shuffle4, simd_shuffle8};
use v128::{f32x4, f64x2, i32x4, i64x2};
use v256::*;
use x86::{__m128i, __m256i};
/// Add packed double-precision (64-bit) floating-point elements
/// in `a` and `b`.
@@ -1827,6 +1828,34 @@ pub unsafe fn _mm256_set1_epi64x(a: i64) -> i64x4 {
i64x4::new(a, a, a, a)
}
/// Cast vector of type __m256d to type __m256.
#[inline(always)]
#[target_feature = "+avx"]
pub unsafe fn _mm256_castpd_ps(a: f64x4) -> f32x8 {
mem::transmute(a)
}
/// Cast vector of type __m256 to type __m256d.
#[inline(always)]
#[target_feature = "+avx"]
pub unsafe fn _mm256_castps_pd(a: f32x8) -> f64x4 {
mem::transmute(a)
}
/// Casts vector of type __m256 to type __m256i.
#[inline(always)]
#[target_feature = "+avx"]
pub unsafe fn _mm256_castps_si256(a: f32x8) -> i64x4 {
mem::transmute(a)
}
/// Casts vector of type __m256i to type __m256.
#[inline(always)]
#[target_feature = "+avx"]
pub unsafe fn _mm256_castsi256_ps(a: i64x4) -> f32x8 {
mem::transmute(a)
}
/// Casts vector of type __m256d to type __m256i.
/// This intrinsic is only used for compilation and does not generate any
/// instructions, thus it has zero latency.
@@ -1899,6 +1928,37 @@ pub unsafe fn _mm256_castsi128_si256(a: i64x2) -> i64x4 {
simd_shuffle4(a, a, [0, 1, 0, 0])
}
/// Constructs a 256-bit floating-point vector of [8 x float] from a
/// 128-bit floating-point vector of [4 x float]. The lower 128 bits contain
/// the value of the source vector. The upper 128 bits are set to zero.
#[inline(always)]
#[target_feature = "+avx,+sse"]
pub unsafe fn _mm256_zextps128_ps256(a: f32x4) -> f32x8 {
use x86::sse::_mm_setzero_ps;
simd_shuffle8(a, _mm_setzero_ps(), [0, 1, 2, 3, 4, 5, 6, 7])
}
/// Constructs a 256-bit integer vector from a 128-bit integer vector.
/// The lower 128 bits contain the value of the source vector. The upper
/// 128 bits are set to zero.
#[inline(always)]
#[target_feature = "+avx,+sse2"]
pub unsafe fn _mm256_zextsi128_si256(a: i64x2) -> i64x4 {
use x86::sse2::_mm_setzero_si128;
simd_shuffle4(a, mem::transmute(_mm_setzero_si128()), [0, 1, 2, 3])
}
/// Constructs a 256-bit floating-point vector of [4 x double] from a
/// 128-bit floating-point vector of [2 x double]. The lower 128 bits
/// contain the value of the source vector. The upper 128 bits are set
/// to zero.
#[inline(always)]
#[target_feature = "+avx,+sse2"]
pub unsafe fn _mm256_zextpd128_pd256(a: f64x2) -> f64x4 {
use x86::sse2::_mm_setzero_pd;
simd_shuffle4(a, _mm_setzero_pd(), [0, 1, 2, 3])
}
/// Return vector of type `f32x8` with undefined elements.
#[inline(always)]
#[target_feature = "+avx"]
@@ -1920,6 +1980,34 @@ pub unsafe fn _mm256_undefined_si256() -> i64x4 {
i64x4::splat(mem::uninitialized())
}
/// Set packed __m256 returned vector with the supplied values.
#[inline(always)]
#[target_feature = "+avx"]
#[cfg_attr(test, assert_instr(vinsertf128))]
pub unsafe fn _mm256_set_m128(hi: f32x4, lo: f32x4) -> f32x8 {
simd_shuffle8(lo, hi, [0, 1, 2, 3, 4, 5, 6, 7])
}
/// Set packed __m256d returned vector with the supplied values.
#[inline(always)]
#[target_feature = "+avx"]
#[cfg_attr(test, assert_instr(vinsertf128))]
pub unsafe fn _mm256_set_m128d(hi: f64x2, lo: f64x2) -> f64x4 {
let hi: f32x4 = mem::transmute(hi);
let lo: f32x4 = mem::transmute(lo);
mem::transmute(_mm256_set_m128(hi, lo))
}
/// Set packed __m256i returned vector with the supplied values.
#[inline(always)]
#[target_feature = "+avx"]
#[cfg_attr(test, assert_instr(vinsertf128))]
pub unsafe fn _mm256_set_m128i(hi: __m128i, lo: __m128i) -> __m256i {
let hi: f32x4 = mem::transmute(hi);
let lo: f32x4 = mem::transmute(lo);
mem::transmute(_mm256_set_m128(hi, lo))
}
/// LLVM intrinsics used in the above functions
#[allow(improper_ctypes)]
extern "C" {
@@ -2070,7 +2158,7 @@ mod tests {
use stdsimd_test::simd_test;
use test::black_box; // Used to inhibit constant-folding.
use v128::{f32x4, f64x2, i32x4, i64x2};
use v128::{f32x4, f64x2, i8x16, i32x4, i64x2};
use v256::*;
use x86::avx;
@@ -3390,6 +3478,38 @@ unsafe fn _mm256_set1_epi64x() {
assert_eq!(r, i64x4::splat(1));
}
#[simd_test = "avx"]
unsafe fn _mm256_castpd_ps() {
let a = f64x4::new(1., 2., 3., 4.);
let r = avx::_mm256_castpd_ps(a);
let e = f32x8::new(0., 1.875, 0., 2., 0., 2.125, 0., 2.25);
assert_eq!(r, e);
}
#[simd_test = "avx"]
unsafe fn _mm256_castps_pd() {
let a = f32x8::new(0., 1.875, 0., 2., 0., 2.125, 0., 2.25);
let r = avx::_mm256_castps_pd(a);
let e = f64x4::new(1., 2., 3., 4.);
assert_eq!(r, e);
}
#[simd_test = "avx"]
unsafe fn _mm256_castps_si256() {
let a = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.);
let r = avx::_mm256_castps_si256(a);
let e = i64x4::new(4611686019492741120, 4647714816524288000, 4665729215040061440, 4683743613553737728);
assert_eq!(r, e);
}
#[simd_test = "avx"]
unsafe fn _mm256_castsi256_ps() {
let a = i64x4::new(4611686019492741120, 4647714816524288000, 4665729215040061440, 4683743613553737728);
let r = avx::_mm256_castsi256_ps(a);
let e = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.);
assert_eq!(r, e);
}
#[simd_test = "avx"]
unsafe fn _mm256_castpd_si256() {
let a = f64x4::new(1., 2., 3., 4.);
@@ -3424,4 +3544,61 @@ unsafe fn _mm256_castsi256_si128() {
let r = avx::_mm256_castsi256_si128(a);
assert_eq!(r, i64x2::new(1, 2));
}
#[simd_test = "avx"]
unsafe fn _mm256_zextps128_ps256() {
let a = f32x4::new(1., 2., 3., 4.);
let r = avx::_mm256_zextps128_ps256(a);
let e = f32x8::new(1., 2., 3., 4., 0., 0., 0., 0.);
assert_eq!(r, e);
}
#[simd_test = "avx"]
unsafe fn _mm256_zextsi128_si256() {
let a = i64x2::new(1, 2);
let r = avx::_mm256_zextsi128_si256(a);
let e = i64x4::new(1, 2, 0, 0);
assert_eq!(r, e);
}
#[simd_test = "avx"]
unsafe fn _mm256_zextpd128_pd256() {
let a = f64x2::new(1., 2.);
let r = avx::_mm256_zextpd128_pd256(a);
let e = f64x4::new(1., 2., 0., 0.);
assert_eq!(r, e);
}
#[simd_test = "avx"]
unsafe fn _mm256_set_m128() {
let hi = f32x4::new(5., 6., 7., 8.);
let lo = f32x4::new(1., 2., 3., 4.);
let r = avx::_mm256_set_m128(hi, lo);
let e = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.);
assert_eq!(r, e);
}
#[simd_test = "avx"]
unsafe fn _mm256_set_m128d() {
let hi = f64x2::new(3., 4.);
let lo = f64x2::new(1., 2.);
let r = avx::_mm256_set_m128d(hi, lo);
let e = f64x4::new(1., 2., 3., 4.);
assert_eq!(r, e);
}
#[simd_test = "avx"]
unsafe fn _mm256_set_m128i() {
let hi = i8x16::new(17, 18, 19, 20, 21, 22, 23, 24,
25, 26, 27, 28, 29, 30, 31, 32);
let lo = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 16);
let r = avx::_mm256_set_m128i(hi, lo);
let e = i8x32::new(
1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24,
25, 26, 27, 28, 29, 30, 31, 32);
assert_eq!(r, e);
}
}