Merge pull request #2032 from folkertdev/interleaving-store

use `intrinsics::simd` for interleaving store
This commit is contained in:
Amanieu d'Antras
2026-02-21 15:33:06 +00:00
committed by GitHub
6 changed files with 164 additions and 491 deletions
@@ -25039,16 +25039,9 @@ pub unsafe fn vst1q_lane_f64<const LANE: i32>(a: *mut f64, b: float64x2_t) {
#[inline(always)]
#[target_feature(enable = "neon")]
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(st1))]
#[cfg_attr(test, assert_instr(stp))]
pub unsafe fn vst2_f64(a: *mut f64, b: float64x1x2_t) {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.st2.v1f64.p0"
)]
fn _vst2_f64(a: float64x1_t, b: float64x1_t, ptr: *mut i8);
}
_vst2_f64(b.0, b.1, a as _)
core::ptr::write_unaligned(a.cast(), b)
}
#[doc = "Store multiple 2-element structures from two registers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f64)"]
@@ -25125,14 +25118,7 @@ pub unsafe fn vst2_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x1x2_t) {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(st2))]
pub unsafe fn vst2q_f64(a: *mut f64, b: float64x2x2_t) {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.st2.v2f64.p0"
)]
fn _vst2q_f64(a: float64x2_t, b: float64x2_t, ptr: *mut i8);
}
_vst2q_f64(b.0, b.1, a as _)
crate::core_arch::macros::interleaving_store!(f64, 2, 2, a, b)
}
#[doc = "Store multiple 2-element structures from two registers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s64)"]
@@ -25143,14 +25129,7 @@ pub unsafe fn vst2q_f64(a: *mut f64, b: float64x2x2_t) {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(st2))]
pub unsafe fn vst2q_s64(a: *mut i64, b: int64x2x2_t) {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.st2.v2i64.p0"
)]
fn _vst2q_s64(a: int64x2_t, b: int64x2_t, ptr: *mut i8);
}
_vst2q_s64(b.0, b.1, a as _)
crate::core_arch::macros::interleaving_store!(i64, 2, 2, a, b)
}
#[doc = "Store multiple 2-element structures from two registers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f64)"]
@@ -25295,14 +25274,7 @@ pub unsafe fn vst2q_u64(a: *mut u64, b: uint64x2x2_t) {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(nop))]
pub unsafe fn vst3_f64(a: *mut f64, b: float64x1x3_t) {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.st3.v1f64.p0"
)]
fn _vst3_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t, ptr: *mut i8);
}
_vst3_f64(b.0, b.1, b.2, a as _)
core::ptr::write_unaligned(a.cast(), b)
}
#[doc = "Store multiple 3-element structures from three registers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f64)"]
@@ -25379,14 +25351,7 @@ pub unsafe fn vst3_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x1x3_t) {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(st3))]
pub unsafe fn vst3q_f64(a: *mut f64, b: float64x2x3_t) {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.st3.v2f64.p0"
)]
fn _vst3q_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t, ptr: *mut i8);
}
_vst3q_f64(b.0, b.1, b.2, a as _)
crate::core_arch::macros::interleaving_store!(f64, 2, 3, a, b)
}
#[doc = "Store multiple 3-element structures from three registers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s64)"]
@@ -25397,14 +25362,7 @@ pub unsafe fn vst3q_f64(a: *mut f64, b: float64x2x3_t) {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(st3))]
pub unsafe fn vst3q_s64(a: *mut i64, b: int64x2x3_t) {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.st3.v2i64.p0"
)]
fn _vst3q_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t, ptr: *mut i8);
}
_vst3q_s64(b.0, b.1, b.2, a as _)
crate::core_arch::macros::interleaving_store!(i64, 2, 3, a, b)
}
#[doc = "Store multiple 3-element structures from three registers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f64)"]
@@ -25549,14 +25507,7 @@ pub unsafe fn vst3q_u64(a: *mut u64, b: uint64x2x3_t) {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(nop))]
pub unsafe fn vst4_f64(a: *mut f64, b: float64x1x4_t) {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.st4.v1f64.p0"
)]
fn _vst4_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t, d: float64x1_t, ptr: *mut i8);
}
_vst4_f64(b.0, b.1, b.2, b.3, a as _)
core::ptr::write_unaligned(a.cast(), b)
}
#[doc = "Store multiple 4-element structures from four registers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f64)"]
@@ -25647,14 +25598,7 @@ pub unsafe fn vst4_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x1x4_t) {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(st4))]
pub unsafe fn vst4q_f64(a: *mut f64, b: float64x2x4_t) {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.st4.v2f64.p0"
)]
fn _vst4q_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t, d: float64x2_t, ptr: *mut i8);
}
_vst4q_f64(b.0, b.1, b.2, b.3, a as _)
crate::core_arch::macros::interleaving_store!(f64, 2, 4, a, b)
}
#[doc = "Store multiple 4-element structures from four registers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s64)"]
@@ -25665,14 +25609,7 @@ pub unsafe fn vst4q_f64(a: *mut f64, b: float64x2x4_t) {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(st4))]
pub unsafe fn vst4q_s64(a: *mut i64, b: int64x2x4_t) {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.st4.v2i64.p0"
)]
fn _vst4q_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t, d: int64x2_t, ptr: *mut i8);
}
_vst4q_s64(b.0, b.1, b.2, b.3, a as _)
crate::core_arch::macros::interleaving_store!(i64, 2, 4, a, b)
}
#[doc = "Store multiple 4-element structures from four registers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f64)"]
@@ -1050,6 +1050,14 @@ unsafe fn $name() {
test_vld1q_f16_x2(f16, 16, float16x8x2_t, vst1q_f16_x2, vld1q_f16_x2);
test_vld1q_f16_x3(f16, 24, float16x8x3_t, vst1q_f16_x3, vld1q_f16_x3);
test_vld1q_f16_x4(f16, 32, float16x8x4_t, vst1q_f16_x4, vld1q_f16_x4);
test_vld2_f16_x2(f16, 8, float16x4x2_t, vst2_f16, vld2_f16);
test_vld2_f16_x3(f16, 12, float16x4x3_t, vst3_f16, vld3_f16);
test_vld2_f16_x4(f16, 16, float16x4x4_t, vst4_f16, vld4_f16);
test_vld2q_f16_x2(f16, 16, float16x8x2_t, vst2q_f16, vld2q_f16);
test_vld3q_f16_x3(f16, 24, float16x8x3_t, vst3q_f16, vld3q_f16);
test_vld4q_f16_x4(f16, 32, float16x8x4_t, vst4q_f16, vld4q_f16);
}
macro_rules! wide_store_load_roundtrip_aes {
@@ -65833,14 +65833,7 @@ pub unsafe fn vst2q_f16(a: *mut f16, b: float16x8x2_t) {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(st2))]
pub unsafe fn vst2_f32(a: *mut f32, b: float32x2x2_t) {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.st2.v2f32.p0"
)]
fn _vst2_f32(a: float32x2_t, b: float32x2_t, ptr: *mut i8);
}
_vst2_f32(b.0, b.1, a as _)
crate::core_arch::macros::interleaving_store!(f32, 2, 2, a, b)
}
#[doc = "Store multiple 2-element structures from two registers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f32)"]
@@ -65852,14 +65845,7 @@ pub unsafe fn vst2_f32(a: *mut f32, b: float32x2x2_t) {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(st2))]
pub unsafe fn vst2q_f32(a: *mut f32, b: float32x4x2_t) {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.st2.v4f32.p0"
)]
fn _vst2q_f32(a: float32x4_t, b: float32x4_t, ptr: *mut i8);
}
_vst2q_f32(b.0, b.1, a as _)
crate::core_arch::macros::interleaving_store!(f32, 4, 2, a, b)
}
#[doc = "Store multiple 2-element structures from two registers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s8)"]
@@ -65871,14 +65857,7 @@ pub unsafe fn vst2q_f32(a: *mut f32, b: float32x4x2_t) {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(st2))]
pub unsafe fn vst2_s8(a: *mut i8, b: int8x8x2_t) {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.st2.v8i8.p0"
)]
fn _vst2_s8(a: int8x8_t, b: int8x8_t, ptr: *mut i8);
}
_vst2_s8(b.0, b.1, a as _)
crate::core_arch::macros::interleaving_store!(i8, 8, 2, a, b)
}
#[doc = "Store multiple 2-element structures from two registers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s8)"]
@@ -65890,14 +65869,7 @@ pub unsafe fn vst2_s8(a: *mut i8, b: int8x8x2_t) {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(st2))]
pub unsafe fn vst2q_s8(a: *mut i8, b: int8x16x2_t) {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.st2.v16i8.p0"
)]
fn _vst2q_s8(a: int8x16_t, b: int8x16_t, ptr: *mut i8);
}
_vst2q_s8(b.0, b.1, a as _)
crate::core_arch::macros::interleaving_store!(i8, 16, 2, a, b)
}
#[doc = "Store multiple 2-element structures from two registers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s16)"]
@@ -65909,14 +65881,7 @@ pub unsafe fn vst2q_s8(a: *mut i8, b: int8x16x2_t) {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(st2))]
pub unsafe fn vst2_s16(a: *mut i16, b: int16x4x2_t) {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.st2.v4i16.p0"
)]
fn _vst2_s16(a: int16x4_t, b: int16x4_t, ptr: *mut i8);
}
_vst2_s16(b.0, b.1, a as _)
crate::core_arch::macros::interleaving_store!(i16, 4, 2, a, b)
}
#[doc = "Store multiple 2-element structures from two registers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s16)"]
@@ -65928,14 +65893,7 @@ pub unsafe fn vst2_s16(a: *mut i16, b: int16x4x2_t) {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(st2))]
pub unsafe fn vst2q_s16(a: *mut i16, b: int16x8x2_t) {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.st2.v8i16.p0"
)]
fn _vst2q_s16(a: int16x8_t, b: int16x8_t, ptr: *mut i8);
}
_vst2q_s16(b.0, b.1, a as _)
crate::core_arch::macros::interleaving_store!(i16, 8, 2, a, b)
}
#[doc = "Store multiple 2-element structures from two registers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s32)"]
@@ -65947,14 +65905,7 @@ pub unsafe fn vst2q_s16(a: *mut i16, b: int16x8x2_t) {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(st2))]
pub unsafe fn vst2_s32(a: *mut i32, b: int32x2x2_t) {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.st2.v2i32.p0"
)]
fn _vst2_s32(a: int32x2_t, b: int32x2_t, ptr: *mut i8);
}
_vst2_s32(b.0, b.1, a as _)
crate::core_arch::macros::interleaving_store!(i32, 2, 2, a, b)
}
#[doc = "Store multiple 2-element structures from two registers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s32)"]
@@ -65966,14 +65917,7 @@ pub unsafe fn vst2_s32(a: *mut i32, b: int32x2x2_t) {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(st2))]
pub unsafe fn vst2q_s32(a: *mut i32, b: int32x4x2_t) {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.st2.v4i32.p0"
)]
fn _vst2q_s32(a: int32x4_t, b: int32x4_t, ptr: *mut i8);
}
_vst2q_s32(b.0, b.1, a as _)
crate::core_arch::macros::interleaving_store!(i32, 4, 2, a, b)
}
#[doc = "Store multiple 2-element structures from two registers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f32)"]
@@ -66697,11 +66641,7 @@ pub unsafe fn vst2_p64(a: *mut p64, b: poly64x1x2_t) {
#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
#[cfg_attr(test, assert_instr(nop))]
pub unsafe fn vst2_s64(a: *mut i64, b: int64x1x2_t) {
unsafe extern "unadjusted" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v1i64.p0")]
fn _vst2_s64(ptr: *mut i8, a: int64x1_t, b: int64x1_t, size: i32);
}
_vst2_s64(a as _, b.0, b.1, 8)
core::ptr::write_unaligned(a.cast(), b)
}
#[doc = "Store multiple 2-element structures from two registers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s64)"]
@@ -66713,14 +66653,7 @@ pub unsafe fn vst2_s64(a: *mut i64, b: int64x1x2_t) {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(nop))]
pub unsafe fn vst2_s64(a: *mut i64, b: int64x1x2_t) {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.st2.v1i64.p0"
)]
fn _vst2_s64(a: int64x1_t, b: int64x1_t, ptr: *mut i8);
}
_vst2_s64(b.0, b.1, a as _)
core::ptr::write_unaligned(a.cast(), b)
}
#[doc = "Store multiple 2-element structures from two registers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_u64)"]
@@ -67065,11 +66998,7 @@ pub unsafe fn vst3q_f16(a: *mut f16, b: float16x8x3_t) {
#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
#[cfg_attr(test, assert_instr(vst3))]
pub unsafe fn vst3_f32(a: *mut f32, b: float32x2x3_t) {
unsafe extern "unadjusted" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v2f32")]
fn _vst3_f32(ptr: *mut i8, a: float32x2_t, b: float32x2_t, c: float32x2_t, size: i32);
}
_vst3_f32(a as _, b.0, b.1, b.2, 4)
crate::core_arch::macros::interleaving_store!(f32, 2, 3, a, b)
}
#[doc = "Store multiple 3-element structures from three registers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f32)"]
@@ -67081,11 +67010,7 @@ pub unsafe fn vst3_f32(a: *mut f32, b: float32x2x3_t) {
#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
#[cfg_attr(test, assert_instr(vst3))]
pub unsafe fn vst3q_f32(a: *mut f32, b: float32x4x3_t) {
unsafe extern "unadjusted" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v4f32")]
fn _vst3q_f32(ptr: *mut i8, a: float32x4_t, b: float32x4_t, c: float32x4_t, size: i32);
}
_vst3q_f32(a as _, b.0, b.1, b.2, 4)
crate::core_arch::macros::interleaving_store!(f32, 4, 3, a, b)
}
#[doc = "Store multiple 3-element structures from three registers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s8)"]
@@ -67097,11 +67022,7 @@ pub unsafe fn vst3q_f32(a: *mut f32, b: float32x4x3_t) {
#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
#[cfg_attr(test, assert_instr(vst3))]
pub unsafe fn vst3_s8(a: *mut i8, b: int8x8x3_t) {
unsafe extern "unadjusted" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v8i8")]
fn _vst3_s8(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, size: i32);
}
_vst3_s8(a as _, b.0, b.1, b.2, 1)
crate::core_arch::macros::interleaving_store!(i8, 8, 3, a, b)
}
#[doc = "Store multiple 3-element structures from three registers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s8)"]
@@ -67113,11 +67034,7 @@ pub unsafe fn vst3_s8(a: *mut i8, b: int8x8x3_t) {
#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
#[cfg_attr(test, assert_instr(vst3))]
pub unsafe fn vst3q_s8(a: *mut i8, b: int8x16x3_t) {
unsafe extern "unadjusted" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v16i8")]
fn _vst3q_s8(ptr: *mut i8, a: int8x16_t, b: int8x16_t, c: int8x16_t, size: i32);
}
_vst3q_s8(a as _, b.0, b.1, b.2, 1)
crate::core_arch::macros::interleaving_store!(i8, 16, 3, a, b)
}
#[doc = "Store multiple 3-element structures from three registers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s16)"]
@@ -67129,11 +67046,7 @@ pub unsafe fn vst3q_s8(a: *mut i8, b: int8x16x3_t) {
#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
#[cfg_attr(test, assert_instr(vst3))]
pub unsafe fn vst3_s16(a: *mut i16, b: int16x4x3_t) {
unsafe extern "unadjusted" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v4i16")]
fn _vst3_s16(ptr: *mut i8, a: int16x4_t, b: int16x4_t, c: int16x4_t, size: i32);
}
_vst3_s16(a as _, b.0, b.1, b.2, 2)
crate::core_arch::macros::interleaving_store!(i16, 4, 3, a, b)
}
#[doc = "Store multiple 3-element structures from three registers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s16)"]
@@ -67145,11 +67058,7 @@ pub unsafe fn vst3_s16(a: *mut i16, b: int16x4x3_t) {
#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
#[cfg_attr(test, assert_instr(vst3))]
pub unsafe fn vst3q_s16(a: *mut i16, b: int16x8x3_t) {
unsafe extern "unadjusted" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v8i16")]
fn _vst3q_s16(ptr: *mut i8, a: int16x8_t, b: int16x8_t, c: int16x8_t, size: i32);
}
_vst3q_s16(a as _, b.0, b.1, b.2, 2)
crate::core_arch::macros::interleaving_store!(i16, 8, 3, a, b)
}
#[doc = "Store multiple 3-element structures from three registers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s32)"]
@@ -67161,11 +67070,7 @@ pub unsafe fn vst3q_s16(a: *mut i16, b: int16x8x3_t) {
#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
#[cfg_attr(test, assert_instr(vst3))]
pub unsafe fn vst3_s32(a: *mut i32, b: int32x2x3_t) {
unsafe extern "unadjusted" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v2i32")]
fn _vst3_s32(ptr: *mut i8, a: int32x2_t, b: int32x2_t, c: int32x2_t, size: i32);
}
_vst3_s32(a as _, b.0, b.1, b.2, 4)
crate::core_arch::macros::interleaving_store!(i32, 2, 3, a, b)
}
#[doc = "Store multiple 3-element structures from three registers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s32)"]
@@ -67177,11 +67082,7 @@ pub unsafe fn vst3_s32(a: *mut i32, b: int32x2x3_t) {
#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
#[cfg_attr(test, assert_instr(vst3))]
pub unsafe fn vst3q_s32(a: *mut i32, b: int32x4x3_t) {
unsafe extern "unadjusted" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v4i32")]
fn _vst3q_s32(ptr: *mut i8, a: int32x4_t, b: int32x4_t, c: int32x4_t, size: i32);
}
_vst3q_s32(a as _, b.0, b.1, b.2, 4)
crate::core_arch::macros::interleaving_store!(i32, 4, 3, a, b)
}
#[doc = "Store multiple 3-element structures from three registers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f32)"]
@@ -67985,14 +67886,7 @@ pub unsafe fn vst3_p64(a: *mut p64, b: poly64x1x3_t) {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(nop))]
pub unsafe fn vst3_s64(a: *mut i64, b: int64x1x3_t) {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.st3.v1i64.p0"
)]
fn _vst3_s64(a: int64x1_t, b: int64x1_t, c: int64x1_t, ptr: *mut i8);
}
_vst3_s64(b.0, b.1, b.2, a as _)
core::ptr::write_unaligned(a.cast(), b)
}
#[doc = "Store multiple 3-element structures from three registers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s64)"]
@@ -68004,11 +67898,7 @@ pub unsafe fn vst3_s64(a: *mut i64, b: int64x1x3_t) {
#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
#[cfg_attr(test, assert_instr(nop))]
pub unsafe fn vst3_s64(a: *mut i64, b: int64x1x3_t) {
unsafe extern "unadjusted" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v1i64")]
fn _vst3_s64(ptr: *mut i8, a: int64x1_t, b: int64x1_t, c: int64x1_t, size: i32);
}
_vst3_s64(a as _, b.0, b.1, b.2, 8)
core::ptr::write_unaligned(a.cast(), b)
}
#[doc = "Store multiple 3-element structures from three registers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_u64)"]
@@ -68544,14 +68434,7 @@ fn _vst4q_s32(
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(st4))]
pub unsafe fn vst4_f32(a: *mut f32, b: float32x2x4_t) {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.st4.v2f32.p0"
)]
fn _vst4_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t, d: float32x2_t, ptr: *mut i8);
}
_vst4_f32(b.0, b.1, b.2, b.3, a as _)
crate::core_arch::macros::interleaving_store!(f32, 2, 4, a, b)
}
#[doc = "Store multiple 4-element structures from four registers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f32)"]
@@ -68563,14 +68446,7 @@ pub unsafe fn vst4_f32(a: *mut f32, b: float32x2x4_t) {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(st4))]
pub unsafe fn vst4q_f32(a: *mut f32, b: float32x4x4_t) {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.st4.v4f32.p0"
)]
fn _vst4q_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t, d: float32x4_t, ptr: *mut i8);
}
_vst4q_f32(b.0, b.1, b.2, b.3, a as _)
crate::core_arch::macros::interleaving_store!(f32, 4, 4, a, b)
}
#[doc = "Store multiple 4-element structures from four registers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s8)"]
@@ -68582,14 +68458,7 @@ pub unsafe fn vst4q_f32(a: *mut f32, b: float32x4x4_t) {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(st4))]
pub unsafe fn vst4_s8(a: *mut i8, b: int8x8x4_t) {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.st4.v8i8.p0"
)]
fn _vst4_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, ptr: *mut i8);
}
_vst4_s8(b.0, b.1, b.2, b.3, a as _)
crate::core_arch::macros::interleaving_store!(i8, 8, 4, a, b)
}
#[doc = "Store multiple 4-element structures from four registers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s8)"]
@@ -68601,14 +68470,7 @@ pub unsafe fn vst4_s8(a: *mut i8, b: int8x8x4_t) {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(st4))]
pub unsafe fn vst4q_s8(a: *mut i8, b: int8x16x4_t) {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.st4.v16i8.p0"
)]
fn _vst4q_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, ptr: *mut i8);
}
_vst4q_s8(b.0, b.1, b.2, b.3, a as _)
crate::core_arch::macros::interleaving_store!(i8, 16, 4, a, b)
}
#[doc = "Store multiple 4-element structures from four registers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s16)"]
@@ -68620,14 +68482,7 @@ pub unsafe fn vst4q_s8(a: *mut i8, b: int8x16x4_t) {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(st4))]
pub unsafe fn vst4_s16(a: *mut i16, b: int16x4x4_t) {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.st4.v4i16.p0"
)]
fn _vst4_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t, d: int16x4_t, ptr: *mut i8);
}
_vst4_s16(b.0, b.1, b.2, b.3, a as _)
crate::core_arch::macros::interleaving_store!(i16, 4, 4, a, b)
}
#[doc = "Store multiple 4-element structures from four registers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s16)"]
@@ -68639,14 +68494,7 @@ pub unsafe fn vst4_s16(a: *mut i16, b: int16x4x4_t) {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(st4))]
pub unsafe fn vst4q_s16(a: *mut i16, b: int16x8x4_t) {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.st4.v8i16.p0"
)]
fn _vst4q_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t, d: int16x8_t, ptr: *mut i8);
}
_vst4q_s16(b.0, b.1, b.2, b.3, a as _)
crate::core_arch::macros::interleaving_store!(i16, 8, 4, a, b)
}
#[doc = "Store multiple 4-element structures from four registers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s32)"]
@@ -68658,14 +68506,7 @@ pub unsafe fn vst4q_s16(a: *mut i16, b: int16x8x4_t) {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(st4))]
pub unsafe fn vst4_s32(a: *mut i32, b: int32x2x4_t) {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.st4.v2i32.p0"
)]
fn _vst4_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t, d: int32x2_t, ptr: *mut i8);
}
_vst4_s32(b.0, b.1, b.2, b.3, a as _)
crate::core_arch::macros::interleaving_store!(i32, 2, 4, a, b)
}
#[doc = "Store multiple 4-element structures from four registers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s32)"]
@@ -68677,14 +68518,7 @@ pub unsafe fn vst4_s32(a: *mut i32, b: int32x2x4_t) {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(st4))]
pub unsafe fn vst4q_s32(a: *mut i32, b: int32x4x4_t) {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.st4.v4i32.p0"
)]
fn _vst4q_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t, d: int32x4_t, ptr: *mut i8);
}
_vst4q_s32(b.0, b.1, b.2, b.3, a as _)
crate::core_arch::macros::interleaving_store!(i32, 4, 4, a, b)
}
#[doc = "Store multiple 4-element structures from four registers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f16)"]
@@ -69408,18 +69242,7 @@ pub unsafe fn vst4_p64(a: *mut p64, b: poly64x1x4_t) {
#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
#[cfg_attr(test, assert_instr(nop))]
pub unsafe fn vst4_s64(a: *mut i64, b: int64x1x4_t) {
unsafe extern "unadjusted" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v1i64")]
fn _vst4_s64(
ptr: *mut i8,
a: int64x1_t,
b: int64x1_t,
c: int64x1_t,
d: int64x1_t,
size: i32,
);
}
_vst4_s64(a as _, b.0, b.1, b.2, b.3, 8)
core::ptr::write_unaligned(a.cast(), b)
}
#[doc = "Store multiple 4-element structures from four registers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s64)"]
@@ -69431,14 +69254,7 @@ fn _vst4_s64(
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(nop))]
pub unsafe fn vst4_s64(a: *mut i64, b: int64x1x4_t) {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.st4.v1i64.p0"
)]
fn _vst4_s64(a: int64x1_t, b: int64x1_t, c: int64x1_t, d: int64x1_t, ptr: *mut i8);
}
_vst4_s64(b.0, b.1, b.2, b.3, a as _)
core::ptr::write_unaligned(a.cast(), b)
}
#[doc = "Store multiple 4-element structures from four registers"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_u64)"]
@@ -187,6 +187,17 @@ macro_rules! simd_masked_store {
};
}
/// The first N indices `[0, 1, 2, ...]`.
pub(crate) const fn identity<const N: usize>() -> [u32; N] {
let mut out = [0u32; N];
let mut i = 0usize;
while i < N {
out[i] = i as u32;
i += 1;
}
out
}
/// The first N even indices `[0, 2, 4, ...]`.
pub(crate) const fn even<const N: usize>() -> [u32; N] {
let mut out = [0u32; N];
@@ -277,3 +288,59 @@ macro_rules! deinterleaving_load {
#[allow(unused)]
pub(crate) use deinterleaving_load;
pub(crate) const fn interleave_mask<const LANES: usize, const N: usize, const K: usize>()
-> [u32; LANES] {
let mut out = [0u32; LANES];
let mut j = 0usize;
while j < LANES {
out[j] = ((j % K) * N + j / K) as u32;
j += 1;
}
out
}
#[allow(unused)]
macro_rules! interleaving_store {
($elem:ty, $lanes:literal, 2, $ptr:expr, $v:expr) => {{
use $crate::core_arch::macros::interleave_mask;
use $crate::core_arch::simd::Simd;
type W = Simd<$elem, { $lanes * 2 }>;
let w: W = simd_shuffle!($v.0, $v.1, interleave_mask::<{ $lanes * 2 }, $lanes, 2>());
$crate::ptr::write_unaligned($ptr as *mut W, w);
}};
// N = 3
($elem:ty, $lanes:literal, 3, $ptr:expr, $v:expr) => {{
use $crate::core_arch::macros::{identity, interleave_mask};
use $crate::core_arch::simd::Simd;
let v0v1: Simd<$elem, { $lanes * 2 }> =
simd_shuffle!($v.0, $v.1, identity::<{ $lanes * 2 }>());
let v2v2: Simd<$elem, { $lanes * 2 }> =
simd_shuffle!($v.2, $v.2, identity::<{ $lanes * 2 }>());
type W = Simd<$elem, { $lanes * 3 }>;
let w: W = simd_shuffle!(v0v1, v2v2, interleave_mask::<{ $lanes * 3 }, $lanes, 3>());
$crate::ptr::write_unaligned($ptr as *mut W, w);
}};
// N = 4
($elem:ty, $lanes:literal, 4, $ptr:expr, $v:expr) => {{
use $crate::core_arch::macros::{identity, interleave_mask};
use $crate::core_arch::simd::Simd;
let v0v1: Simd<$elem, { $lanes * 2 }> =
simd_shuffle!($v.0, $v.1, identity::<{ $lanes * 2 }>());
let v2v3: Simd<$elem, { $lanes * 2 }> =
simd_shuffle!($v.2, $v.3, identity::<{ $lanes * 2 }>());
type W = Simd<$elem, { $lanes * 4 }>;
let w: W = simd_shuffle!(v0v1, v2v3, interleave_mask::<{ $lanes * 4 }, $lanes, 4>());
$crate::ptr::write_unaligned($ptr as *mut W, w);
}};
}
#[allow(unused)]
pub(crate) use interleaving_store;
@@ -4567,20 +4567,11 @@ intrinsics:
unsafe: [neon]
attr:
- *neon-stable
assert_instr: [st1]
assert_instr: [stp]
types:
- ['f64', float64x1x2_t, float64x1_t]
- ['f64', float64x1x2_t]
compose:
- LLVMLink:
name: 'st2.{neon_type[1]}'
arguments:
- 'a: {type[2]}'
- 'b: {type[2]}'
- 'ptr: *mut i8'
links:
- link: 'llvm.aarch64.neon.st2.v{neon_type[1].lane}{type[0]}.p0'
arch: aarch64,arm64ec
- FnCall: ['_vst2{neon_type[1].nox}', ['b.0', 'b.1', 'a as _']]
- FnCall: [core::ptr::write_unaligned, ['a.cast()', b]]
- name: "vst2{neon_type[1].nox}"
doc: "Store multiple 2-element structures from two registers"
@@ -4591,19 +4582,10 @@ intrinsics:
- *neon-stable
assert_instr: [st2]
types:
- [i64, int64x2x2_t, int64x2_t]
- [f64, float64x2x2_t, float64x2_t]
- [i64, int64x2x2_t, "2"]
- [f64, float64x2x2_t, "2"]
compose:
- LLVMLink:
name: 'st2.{neon_type[1]}'
arguments:
- 'a: {type[2]}'
- 'b: {type[2]}'
- 'ptr: *mut i8'
links:
- link: 'llvm.aarch64.neon.st2.v{neon_type[1].lane}{type[0]}.p0'
arch: aarch64,arm64ec
- FnCall: ['_vst2{neon_type[1].nox}', ['b.0', 'b.1', 'a as _']]
- FnCall: ["crate::core_arch::macros::interleaving_store!", [{ Type: "{type[0]}" }, "{type[2]}", "2", a, b], [], true]
- name: "vst2{neon_type[1].lane_nox}"
doc: "Store multiple 2-element structures from two registers"
@@ -4781,19 +4763,9 @@ intrinsics:
safety:
unsafe: [neon]
types:
- [f64, float64x1x3_t, float64x1_t]
- [f64, float64x1x3_t]
compose:
- LLVMLink:
name: 'st3.{neon_type[1].nox}'
arguments:
- 'a: {type[2]}'
- 'b: {type[2]}'
- 'c: {type[2]}'
- 'ptr: *mut i8'
links:
- link: 'llvm.aarch64.neon.st3.v{neon_type[1].lane}{type[0]}.p0'
arch: aarch64,arm64ec
- FnCall: ['_vst3{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'a as _']]
- FnCall: [core::ptr::write_unaligned, ['a.cast()', b]]
- name: "vst3{neon_type[1].lane_nox}"
doc: "Store multiple 3-element structures from three registers"
@@ -4860,20 +4832,10 @@ intrinsics:
safety:
unsafe: [neon]
types:
- [i64, int64x2x3_t, int64x2_t]
- [f64, float64x2x3_t, float64x2_t]
- [i64, int64x2x3_t, "2"]
- [f64, float64x2x3_t, "2"]
compose:
- LLVMLink:
name: 'st3.{neon_type[1].nox}'
arguments:
- 'a: {type[2]}'
- 'b: {type[2]}'
- 'c: {type[2]}'
- 'ptr: *mut i8'
links:
- link: 'llvm.aarch64.neon.st3.v{neon_type[1].lane}{type[0]}.p0'
arch: aarch64,arm64ec
- FnCall: ['_vst3{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'a as _']]
- FnCall: ["crate::core_arch::macros::interleaving_store!", [{ Type: "{type[0]}" }, "{type[2]}", "3", a, b], [], true]
- name: "vst3{neon_type[1].nox}"
doc: "Store multiple 3-element structures from three registers"
@@ -4995,20 +4957,9 @@ intrinsics:
safety:
unsafe: [neon]
types:
- [f64, float64x1x4_t, float64x1_t]
- [f64, float64x1x4_t]
compose:
- LLVMLink:
name: 'st4.{neon_type[1].nox}'
arguments:
- 'a: {type[2]}'
- 'b: {type[2]}'
- 'c: {type[2]}'
- 'd: {type[2]}'
- 'ptr: *mut i8'
links:
- link: 'llvm.aarch64.neon.st4.v{neon_type[1].lane}{type[0]}.p0'
arch: aarch64,arm64ec
- FnCall: ['_vst4{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'a as _']]
- FnCall: [core::ptr::write_unaligned, ['a.cast()', b]]
- name: "vst4{neon_type[1].lane_nox}"
doc: "Store multiple 4-element structures from four registers"
@@ -5075,21 +5026,10 @@ intrinsics:
safety:
unsafe: [neon]
types:
- [i64, int64x2x4_t, int64x2_t]
- [f64, float64x2x4_t, float64x2_t]
- [i64, int64x2x4_t, "2"]
- [f64, float64x2x4_t, "2"]
compose:
- LLVMLink:
name: 'st4.{neon_type[1].nox}'
arguments:
- 'a: {type[2]}'
- 'b: {type[2]}'
- 'c: {type[2]}'
- 'd: {type[2]}'
- 'ptr: *mut i8'
links:
- link: 'llvm.aarch64.neon.st4.v{neon_type[1].lane}{type[0]}.p0'
arch: aarch64,arm64ec
- FnCall: ['_vst4{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'a as _']]
- FnCall: ["crate::core_arch::macros::interleaving_store!", [{ Type: "{type[0]}" }, "{type[2]}", "4", a, b], [], true]
- name: "vst4{neon_type[1].nox}"
doc: "Store multiple 4-element structures from four registers"
@@ -5049,17 +5049,7 @@ intrinsics:
types:
- [i64, int64x1x2_t, int64x1_t]
compose:
- LLVMLink:
name: 'vst2.{neon_type[1]}'
arguments:
- 'ptr: *mut i8'
- 'a: {type[2]}'
- 'b: {type[2]}'
- 'size: i32'
links:
- link: 'llvm.arm.neon.vst2.v{neon_type[1].lane}{type[0]}.p0'
arch: arm
- FnCall: ['_vst2{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', '8']]
- FnCall: [core::ptr::write_unaligned, ['a.cast()', b]]
- name: "vst2{neon_type[1].nox}"
doc: "Store multiple 2-element structures from two registers"
@@ -5092,16 +5082,7 @@ intrinsics:
types:
- [i64, int64x1x2_t, int64x1_t]
compose:
- LLVMLink:
name: 'st2.{neon_type[1]}'
arguments:
- 'a: {type[2]}'
- 'b: {type[2]}'
- 'ptr: *mut i8'
links:
- link: 'llvm.aarch64.neon.st2.v{neon_type[1].lane}{type[0]}.p0'
arch: aarch64,arm64ec
- FnCall: ['_vst2{neon_type[1].nox}', ['b.0', 'b.1', 'a as _']]
- FnCall: [core::ptr::write_unaligned, ['a.cast()', b]]
- name: "vst2{neon_type[1].nox}"
doc: "Store multiple 2-element structures from two registers"
@@ -5113,26 +5094,16 @@ intrinsics:
safety:
unsafe: [neon]
types:
- [i8, int8x8x2_t, int8x8_t]
- [i16, int16x4x2_t, int16x4_t]
- [i32, int32x2x2_t, int32x2_t]
- [i8, int8x16x2_t, int8x16_t]
- [i16, int16x8x2_t, int16x8_t]
- [i32, int32x4x2_t, int32x4_t]
- [f32, float32x2x2_t, float32x2_t]
- [f32, float32x4x2_t, float32x4_t]
- [i8, int8x8x2_t, "8"]
- [i16, int16x4x2_t, "4"]
- [i32, int32x2x2_t, "2"]
- [i8, int8x16x2_t, "16"]
- [i16, int16x8x2_t, "8"]
- [i32, int32x4x2_t, "4"]
- [f32, float32x2x2_t, "2"]
- [f32, float32x4x2_t, "4"]
compose:
- LLVMLink:
name: 'st2.{neon_type[1]}'
arguments:
- 'a: {type[2]}'
- 'b: {type[2]}'
- 'ptr: *mut i8'
links:
- link: 'llvm.aarch64.neon.st2.v{neon_type[1].lane}{type[0]}.p0'
arch: aarch64,arm64ec
- FnCall: ['_vst2{neon_type[1].nox}', ['b.0', 'b.1', 'a as _']]
- FnCall: ["crate::core_arch::macros::interleaving_store!", [{ Type: "{type[0]}" }, "{type[2]}", "2", a, b], [], true]
- name: "vst2{neon_type[1].nox}"
doc: "Store multiple 2-element structures from two registers"
@@ -5426,17 +5397,7 @@ intrinsics:
types:
- [i64, int64x1x3_t, int64x1_t]
compose:
- LLVMLink:
name: 'st3.{neon_type[1].nox}'
arguments:
- 'a: {type[2]}'
- 'b: {type[2]}'
- 'c: {type[2]}'
- 'ptr: *mut i8'
links:
- link: 'llvm.aarch64.neon.st3.v{neon_type[1].lane}{type[0]}.p0'
arch: aarch64,arm64ec
- FnCall: ['_vst3{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'a as _']]
- FnCall: [core::ptr::write_unaligned, ['a.cast()', b]]
- name: "vst3{neon_type[1].nox}"
doc: "Store multiple 3-element structures from three registers"
@@ -5471,18 +5432,7 @@ intrinsics:
types:
- [i64, int64x1x3_t, int64x1_t]
compose:
- LLVMLink:
name: 'vst3.{neon_type[1]}'
arguments:
- 'ptr: *mut i8'
- 'a: {type[2]}'
- 'b: {type[2]}'
- 'c: {type[2]}'
- 'size: i32'
links:
- link: 'llvm.arm.neon.vst3.p0.v{neon_type[1].lane}{type[0]}'
arch: arm
- FnCall: ['_vst3{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', 'b.2', '8']]
- FnCall: [core::ptr::write_unaligned, ['a.cast()', b]]
- name: "vst3{neon_type[1].nox}"
doc: "Store multiple 3-element structures from three registers"
@@ -5571,27 +5521,16 @@ intrinsics:
safety:
unsafe: [neon]
types:
- [i8, int8x8x3_t, int8x8_t, '1']
- [i16, int16x4x3_t, int16x4_t, '2']
- [i32, int32x2x3_t, int32x2_t, '4']
- [i8, int8x16x3_t, int8x16_t, '1']
- [i16, int16x8x3_t, int16x8_t, '2']
- [i32, int32x4x3_t, int32x4_t, '4']
- [f32, float32x2x3_t, float32x2_t, '4']
- [f32, float32x4x3_t, float32x4_t, '4']
- [i8, int8x8x3_t, '8']
- [i16, int16x4x3_t, '4']
- [i32, int32x2x3_t, '2']
- [i8, int8x16x3_t, '16']
- [i16, int16x8x3_t, '8']
- [i32, int32x4x3_t, '4']
- [f32, float32x2x3_t, '2']
- [f32, float32x4x3_t, '4']
compose:
- LLVMLink:
name: 'vst3.{neon_type[1]}'
arguments:
- 'ptr: *mut i8'
- 'a: {type[2]}'
- 'b: {type[2]}'
- 'c: {type[2]}'
- 'size: i32'
links:
- link: 'llvm.arm.neon.vst3.p0.v{neon_type[1].lane}{type[0]}'
arch: arm
- FnCall: ['_vst3{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', 'b.2', "{type[3]}"]]
- FnCall: ["crate::core_arch::macros::interleaving_store!", [{ Type: "{type[0]}" }, "{type[2]}", "3", a, b], [], true]
- name: "vst3{neon_type[1].nox}"
@@ -5853,19 +5792,7 @@ intrinsics:
types:
- [i64, int64x1x4_t, int64x1_t]
compose:
- LLVMLink:
name: 'vst4.{neon_type[1]}'
arguments:
- 'ptr: *mut i8'
- 'a: {type[2]}'
- 'b: {type[2]}'
- 'c: {type[2]}'
- 'd: {type[2]}'
- 'size: i32'
links:
- link: 'llvm.arm.neon.vst4.p0.v{neon_type[1].lane}{type[0]}'
arch: arm
- FnCall: ['_vst4{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', 'b.2', 'b.3', '8']]
- FnCall: [core::ptr::write_unaligned, ['a.cast()', b]]
- name: "vst4{neon_type[1].nox}"
doc: "Store multiple 4-element structures from four registers"
@@ -5879,18 +5806,7 @@ intrinsics:
types:
- [i64, int64x1x4_t, int64x1_t]
compose:
- LLVMLink:
name: 'vst4.{neon_type[1]}'
arguments:
- 'a: {type[2]}'
- 'b: {type[2]}'
- 'c: {type[2]}'
- 'd: {type[2]}'
- 'ptr: *mut i8'
links:
- link: 'llvm.aarch64.neon.st4.{neon_type[2]}.p0'
arch: aarch64,arm64ec
- FnCall: ['_vst4{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'a as _']]
- FnCall: [core::ptr::write_unaligned, ['a.cast()', b]]
- name: "vst4{neon_type[1].nox}"
doc: "Store multiple 4-element structures from four registers"
@@ -6114,27 +6030,16 @@ intrinsics:
safety:
unsafe: [neon]
types:
- [i8, int8x8x4_t, int8x8_t]
- [i16, int16x4x4_t, int16x4_t]
- [i32, int32x2x4_t, int32x2_t]
- [i8, int8x16x4_t, int8x16_t]
- [i16, int16x8x4_t, int16x8_t]
- [i32, int32x4x4_t, int32x4_t]
- [f32, float32x2x4_t, float32x2_t]
- [f32, float32x4x4_t, float32x4_t]
- [i8, int8x8x4_t, "8"]
- [i16, int16x4x4_t, "4"]
- [i32, int32x2x4_t, "2"]
- [i8, int8x16x4_t, "16"]
- [i16, int16x8x4_t, "8"]
- [i32, int32x4x4_t, "4"]
- [f32, float32x2x4_t, "2"]
- [f32, float32x4x4_t, "4"]
compose:
- LLVMLink:
name: 'vst4.{neon_type[1]}'
arguments:
- 'a: {type[2]}'
- 'b: {type[2]}'
- 'c: {type[2]}'
- 'd: {type[2]}'
- 'ptr: *mut i8'
links:
- link: 'llvm.aarch64.neon.st4.v{neon_type[1].lane}{type[0]}.p0'
arch: aarch64,arm64ec
- FnCall: ['_vst4{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'a as _']]
- FnCall: ["crate::core_arch::macros::interleaving_store!", [{ Type: "{type[0]}" }, "{type[2]}", "4", a, b], [], true]
- name: "vst4{neon_type[1].nox}"