From cb0d495daed94c1b68aca01081be183271ec061c Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Thu, 19 Mar 2026 15:38:10 +0100 Subject: [PATCH] support roundtrip of `vst3q` --- .../core_arch/src/aarch64/neon/generated.rs | 18 +---- .../src/arm_shared/neon/generated.rs | 72 +++---------------- .../spec/neon/aarch64.spec.yml | 13 +--- .../spec/neon/arm_shared.spec.yml | 29 +++----- 4 files changed, 22 insertions(+), 110 deletions(-) diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs index 74af50016690..34303b706c52 100644 --- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs +++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs @@ -12127,14 +12127,7 @@ pub unsafe fn vld3q_dup_u64(a: *const u64) -> uint64x2x3_t { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(ld3))] pub unsafe fn vld3q_f64(a: *const f64) -> float64x2x3_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld3.v2f64.p0" - )] - fn _vld3q_f64(ptr: *const float64x2_t) -> float64x2x3_t; - } - _vld3q_f64(a as _) + crate::core_arch::macros::deinterleaving_load!(f64, 2, 3, a) } #[doc = "Load multiple 3-element structures to three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s64)"] @@ -12145,14 +12138,7 @@ pub unsafe fn vld3q_f64(a: *const f64) -> float64x2x3_t { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(ld3))] pub unsafe fn vld3q_s64(a: *const i64) -> int64x2x3_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.ld3.v2i64.p0" - )] - fn _vld3q_s64(ptr: *const int64x2_t) -> int64x2x3_t; - } - _vld3q_s64(a as _) + crate::core_arch::macros::deinterleaving_load!(i64, 2, 3, a) } #[doc = "Load multiple 3-element structures to three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f64)"] diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs index 4a846e287746..cf4d10162ec9 100644 --- a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs +++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs @@ -67158,14 +67158,7 @@ pub unsafe fn vst3q_s32(a: *mut i32, b: int32x4x3_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st3))] pub unsafe fn vst3_f32(a: *mut f32, b: float32x2x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st3.v2f32.p0" - )] - fn _vst3_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t, ptr: *mut i8); - } - _vst3_f32(b.0, b.1, b.2, a as _) + crate::core_arch::macros::interleaving_store!(f32, 2, 3, a, b) } #[doc = "Store multiple 3-element structures from three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f32)"] @@ -67177,14 +67170,7 @@ pub unsafe fn vst3_f32(a: *mut f32, b: float32x2x3_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st3))] pub unsafe fn vst3q_f32(a: *mut f32, b: float32x4x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st3.v4f32.p0" - )] - fn _vst3q_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t, ptr: *mut i8); - } - _vst3q_f32(b.0, b.1, b.2, a as _) + crate::core_arch::macros::interleaving_store!(f32, 4, 3, a, b) } #[doc = "Store multiple 3-element structures from three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s8)"] @@ -67196,14 +67182,7 @@ pub unsafe fn vst3q_f32(a: *mut f32, b: float32x4x3_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st3))] pub unsafe fn vst3_s8(a: *mut i8, b: int8x8x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st3.v8i8.p0" - )] - fn _vst3_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t, ptr: *mut i8); - } - _vst3_s8(b.0, b.1, b.2, a as _) + crate::core_arch::macros::interleaving_store!(i8, 8, 3, a, b) } #[doc = "Store multiple 3-element structures from three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s8)"] @@ -67215,14 +67194,7 @@ pub unsafe fn vst3_s8(a: *mut i8, b: int8x8x3_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st3))] pub unsafe fn vst3q_s8(a: *mut i8, b: int8x16x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st3.v16i8.p0" - )] - fn _vst3q_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t, ptr: *mut i8); - } - _vst3q_s8(b.0, b.1, b.2, a as _) + crate::core_arch::macros::interleaving_store!(i8, 16, 3, a, b) } #[doc = "Store multiple 3-element structures from three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s16)"] @@ -67234,14 +67206,7 @@ pub unsafe fn vst3q_s8(a: *mut i8, b: int8x16x3_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st3))] pub unsafe fn vst3_s16(a: *mut i16, b: int16x4x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st3.v4i16.p0" - )] - fn _vst3_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t, ptr: *mut i8); - } - _vst3_s16(b.0, b.1, b.2, a as _) + crate::core_arch::macros::interleaving_store!(i16, 4, 3, a, b) } #[doc = "Store multiple 3-element structures from three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s16)"] @@ -67253,14 +67218,7 @@ pub unsafe fn vst3_s16(a: *mut i16, b: int16x4x3_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st3))] pub unsafe fn vst3q_s16(a: *mut i16, b: int16x8x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st3.v8i16.p0" - )] - fn _vst3q_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t, ptr: *mut i8); - } - _vst3q_s16(b.0, b.1, b.2, a as _) + crate::core_arch::macros::interleaving_store!(i16, 8, 3, a, b) } #[doc = "Store multiple 3-element structures from three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s32)"] @@ -67272,14 +67230,7 @@ pub unsafe fn vst3q_s16(a: *mut i16, b: int16x8x3_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st3))] pub unsafe fn vst3_s32(a: *mut i32, b: int32x2x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st3.v2i32.p0" - )] - fn _vst3_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t, ptr: *mut i8); - } - _vst3_s32(b.0, b.1, b.2, a as _) + crate::core_arch::macros::interleaving_store!(i32, 2, 3, a, b) } #[doc = "Store multiple 3-element structures from three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s32)"] @@ -67291,14 +67242,7 @@ pub unsafe fn vst3_s32(a: *mut i32, b: int32x2x3_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st3))] pub unsafe fn vst3q_s32(a: *mut i32, b: int32x4x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st3.v4i32.p0" - )] - fn _vst3q_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t, ptr: *mut i8); - } - _vst3q_s32(b.0, b.1, b.2, a as _) + crate::core_arch::macros::interleaving_store!(i32, 4, 3, a, b) } #[doc = "Store multiple 3-element structures from three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f16)"] diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml index e88860717b6d..dfcdfb59a5c1 100644 --- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml +++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml @@ -4031,17 +4031,10 @@ intrinsics: unsafe: [neon] assert_instr: [ld3] types: - - ['*const i64', int64x2x3_t, '*const int64x2_t', i64] - - ['*const f64', float64x2x3_t, '*const float64x2_t', f64] + - ['*const i64', int64x2x3_t, i64, "2"] + - ['*const f64', float64x2x3_t, f64, "2"] compose: - - LLVMLink: - name: 'vld3{neon_type[1].nox}' - arguments: - - 'ptr: {type[2]}' - links: - - link: 'llvm.aarch64.neon.ld3.v{neon_type[1].lane}{type[3]}.p0' - arch: aarch64,arm64ec - - FnCall: ['_vld3{neon_type[1].nox}', ['a as _']] + - FnCall: ["crate::core_arch::macros::deinterleaving_load!", [{ Type: "{type[2]}" }, "{type[3]}", "3", a], [], true] - name: "vld3{neon_type[1].nox}" doc: Load multiple 3-element structures to three registers diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml index 56b2252c9ef0..f6ef7f17d73b 100644 --- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml +++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml @@ -5642,27 +5642,16 @@ intrinsics: safety: unsafe: [neon] types: - - [i8, int8x8x3_t, int8x8_t] - - [i16, int16x4x3_t, int16x4_t] - - [i32, int32x2x3_t, int32x2_t] - - [i8, int8x16x3_t, int8x16_t] - - [i16, int16x8x3_t, int16x8_t] - - [i32, int32x4x3_t, int32x4_t] - - [f32, float32x2x3_t, float32x2_t] - - [f32, float32x4x3_t, float32x4_t] + - [i8, int8x8x3_t, "8"] + - [i16, int16x4x3_t, "4"] + - [i32, int32x2x3_t, "2"] + - [i8, int8x16x3_t, "16"] + - [i16, int16x8x3_t, "8"] + - [i32, int32x4x3_t, "4"] + - [f32, float32x2x3_t, "2"] + - [f32, float32x4x3_t, "4"] compose: - - LLVMLink: - name: 'vst3.{neon_type[1]}' - arguments: - - 'a: {type[2]}' - - 'b: {type[2]}' - - 'c: {type[2]}' - - 'ptr: *mut i8' - links: - - link: 'llvm.aarch64.neon.st3.v{neon_type[1].lane}{type[0]}.p0' - arch: aarch64,arm64ec - - FnCall: ['_vst3{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'a as _']] - + - FnCall: ["crate::core_arch::macros::interleaving_store!", [{ Type: "{type[0]}" }, "{type[2]}", "3", a, b], [], true] - name: "vst3{neon_type[1].nox}" doc: "Store multiple 3-element structures from three registers"