From d4bb7f4fdd9d4db982cd1a1d79dd357300433d4b Mon Sep 17 00:00:00 2001 From: sayantn Date: Sat, 9 May 2026 05:49:07 +0530 Subject: [PATCH] Change implementation of `v{q}{r}shr{u}n_high_n` --- .../core_arch/src/aarch64/neon/generated.rs | 156 +++++------------- .../spec/neon/aarch64.spec.yml | 111 ++++++------- 2 files changed, 94 insertions(+), 173 deletions(-) diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs index 112c84036fbb..1cbac3e284b5 100644 --- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs +++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs @@ -15902,13 +15902,7 @@ pub fn vqrshld_u64(a: u64, b: i64) -> u64 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqrshrn_high_n_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t { static_assert!(N >= 1 && N <= 8); - unsafe { - simd_shuffle!( - a, - vqrshrn_n_s16::(b), - [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] - ) - } + vcombine_s8(a, vqrshrn_n_s16::(b)) } #[doc = "Signed saturating rounded shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_s32)"] @@ -15919,7 +15913,7 @@ pub fn vqrshrn_high_n_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqrshrn_high_n_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t { static_assert!(N >= 1 && N <= 16); - unsafe { simd_shuffle!(a, vqrshrn_n_s32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) } + vcombine_s16(a, vqrshrn_n_s32::(b)) } #[doc = "Signed saturating rounded shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_s64)"] @@ -15930,7 +15924,7 @@ pub fn vqrshrn_high_n_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqrshrn_high_n_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t { static_assert!(N >= 1 && N <= 32); - unsafe { simd_shuffle!(a, vqrshrn_n_s64::(b), [0, 1, 2, 3]) } + vcombine_s32(a, vqrshrn_n_s64::(b)) } #[doc = "Unsigned saturating rounded shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_u16)"] @@ -15941,13 +15935,7 @@ pub fn vqrshrn_high_n_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqrshrn_high_n_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t { static_assert!(N >= 1 && N <= 8); - unsafe { - simd_shuffle!( - a, - vqrshrn_n_u16::(b), - [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] - ) - } + vcombine_u8(a, vqrshrn_n_u16::(b)) } #[doc = "Unsigned saturating rounded shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_u32)"] @@ -15958,7 +15946,7 @@ pub fn vqrshrn_high_n_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16 #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqrshrn_high_n_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t { static_assert!(N >= 1 && N <= 16); - unsafe { simd_shuffle!(a, vqrshrn_n_u32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) } + vcombine_u16(a, vqrshrn_n_u32::(b)) } #[doc = "Unsigned saturating rounded shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_u64)"] @@ -15969,7 +15957,7 @@ pub fn vqrshrn_high_n_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqrshrn_high_n_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t { static_assert!(N >= 1 && N <= 32); - unsafe { simd_shuffle!(a, vqrshrn_n_u64::(b), [0, 1, 2, 3]) } + vcombine_u32(a, vqrshrn_n_u64::(b)) } #[doc = "Unsigned saturating rounded shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrnd_n_u64)"] @@ -16052,13 +16040,7 @@ pub fn vqrshrnd_n_s64(a: i64) -> i32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqrshrun_high_n_s16(a: uint8x8_t, b: int16x8_t) -> uint8x16_t { static_assert!(N >= 1 && N <= 8); - unsafe { - simd_shuffle!( - a, - vqrshrun_n_s16::(b), - [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] - ) - } + vcombine_u8(a, vqrshrun_n_s16::(b)) } #[doc = "Signed saturating rounded shift right unsigned narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_high_n_s32)"] @@ -16069,7 +16051,7 @@ pub fn vqrshrun_high_n_s16(a: uint8x8_t, b: int16x8_t) -> uint8x16 #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqrshrun_high_n_s32(a: uint16x4_t, b: int32x4_t) -> uint16x8_t { static_assert!(N >= 1 && N <= 16); - unsafe { simd_shuffle!(a, vqrshrun_n_s32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) } + vcombine_u16(a, vqrshrun_n_s32::(b)) } #[doc = "Signed saturating rounded shift right unsigned narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_high_n_s64)"] @@ -16080,7 +16062,7 @@ pub fn vqrshrun_high_n_s32(a: uint16x4_t, b: int32x4_t) -> uint16x #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqrshrun_high_n_s64(a: uint32x2_t, b: int64x2_t) -> uint32x4_t { static_assert!(N >= 1 && N <= 32); - unsafe { simd_shuffle!(a, vqrshrun_n_s64::(b), [0, 1, 2, 3]) } + vcombine_u32(a, vqrshrun_n_s64::(b)) } #[doc = "Signed saturating rounded shift right unsigned narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrund_n_s64)"] @@ -16351,13 +16333,7 @@ pub fn vqshlus_n_s32(a: i32) -> u32 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqshrn_high_n_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t { static_assert!(N >= 1 && N <= 8); - unsafe { - simd_shuffle!( - a, - vqshrn_n_s16::(b), - [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] - ) - } + vcombine_s8(a, vqshrn_n_s16::(b)) } #[doc = "Signed saturating shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_s32)"] @@ -16368,7 +16344,7 @@ pub fn vqshrn_high_n_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqshrn_high_n_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t { static_assert!(N >= 1 && N <= 16); - unsafe { simd_shuffle!(a, vqshrn_n_s32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) } + vcombine_s16(a, vqshrn_n_s32::(b)) } #[doc = "Signed saturating shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_s64)"] @@ -16379,7 +16355,7 @@ pub fn vqshrn_high_n_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqshrn_high_n_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t { static_assert!(N >= 1 && N <= 32); - unsafe { simd_shuffle!(a, vqshrn_n_s64::(b), [0, 1, 2, 3]) } + vcombine_s32(a, vqshrn_n_s64::(b)) } #[doc = "Unsigned saturating shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_u16)"] @@ -16390,13 +16366,7 @@ pub fn vqshrn_high_n_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqshrn_high_n_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t { static_assert!(N >= 1 && N <= 8); - unsafe { - simd_shuffle!( - a, - vqshrn_n_u16::(b), - [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] - ) - } + vcombine_u8(a, vqshrn_n_u16::(b)) } #[doc = "Unsigned saturating shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_u32)"] @@ -16407,7 +16377,7 @@ pub fn vqshrn_high_n_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_ #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqshrn_high_n_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t { static_assert!(N >= 1 && N <= 16); - unsafe { simd_shuffle!(a, vqshrn_n_u32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) } + vcombine_u16(a, vqshrn_n_u32::(b)) } #[doc = "Unsigned saturating shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_u64)"] @@ -16418,7 +16388,7 @@ pub fn vqshrn_high_n_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8 #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqshrn_high_n_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t { static_assert!(N >= 1 && N <= 32); - unsafe { simd_shuffle!(a, vqshrn_n_u64::(b), [0, 1, 2, 3]) } + vcombine_u32(a, vqshrn_n_u64::(b)) } #[doc = "Signed saturating shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrnd_n_s64)"] @@ -16509,13 +16479,7 @@ pub fn vqshrns_n_u32(a: u32) -> u16 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqshrun_high_n_s16(a: uint8x8_t, b: int16x8_t) -> uint8x16_t { static_assert!(N >= 1 && N <= 8); - unsafe { - simd_shuffle!( - a, - vqshrun_n_s16::(b), - [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] - ) - } + vcombine_u8(a, vqshrun_n_s16::(b)) } #[doc = "Signed saturating shift right unsigned narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_high_n_s32)"] @@ -16526,7 +16490,7 @@ pub fn vqshrun_high_n_s16(a: uint8x8_t, b: int16x8_t) -> uint8x16_ #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqshrun_high_n_s32(a: uint16x4_t, b: int32x4_t) -> uint16x8_t { static_assert!(N >= 1 && N <= 16); - unsafe { simd_shuffle!(a, vqshrun_n_s32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) } + vcombine_u16(a, vqshrun_n_s32::(b)) } #[doc = "Signed saturating shift right unsigned narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_high_n_s64)"] @@ -16537,7 +16501,7 @@ pub fn vqshrun_high_n_s32(a: uint16x4_t, b: int32x4_t) -> uint16x8 #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vqshrun_high_n_s64(a: uint32x2_t, b: int64x2_t) -> uint32x4_t { static_assert!(N >= 1 && N <= 32); - unsafe { simd_shuffle!(a, vqshrun_n_s64::(b), [0, 1, 2, 3]) } + vcombine_u32(a, vqshrun_n_s64::(b)) } #[doc = "Signed saturating shift right unsigned narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrund_n_s64)"] @@ -20128,13 +20092,7 @@ pub fn vrshrd_n_u64(a: u64) -> u64 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vrshrn_high_n_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t { static_assert!(N >= 1 && N <= 8); - unsafe { - simd_shuffle!( - a, - vrshrn_n_s16::(b), - [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] - ) - } + vcombine_s8(a, vrshrn_n_s16::(b)) } #[doc = "Rounding shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_s32)"] @@ -20145,7 +20103,7 @@ pub fn vrshrn_high_n_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vrshrn_high_n_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t { static_assert!(N >= 1 && N <= 16); - unsafe { simd_shuffle!(a, vrshrn_n_s32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) } + vcombine_s16(a, vrshrn_n_s32::(b)) } #[doc = "Rounding shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_s64)"] @@ -20156,7 +20114,7 @@ pub fn vrshrn_high_n_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vrshrn_high_n_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t { static_assert!(N >= 1 && N <= 32); - unsafe { simd_shuffle!(a, vrshrn_n_s64::(b), [0, 1, 2, 3]) } + vcombine_s32(a, vrshrn_n_s64::(b)) } #[doc = "Rounding shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_u16)"] @@ -20167,13 +20125,7 @@ pub fn vrshrn_high_n_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vrshrn_high_n_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t { static_assert!(N >= 1 && N <= 8); - unsafe { - simd_shuffle!( - a, - vrshrn_n_u16::(b), - [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] - ) - } + vcombine_u8(a, vrshrn_n_u16::(b)) } #[doc = "Rounding shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_u32)"] @@ -20184,7 +20136,7 @@ pub fn vrshrn_high_n_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_ #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vrshrn_high_n_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t { static_assert!(N >= 1 && N <= 16); - unsafe { simd_shuffle!(a, vrshrn_n_u32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) } + vcombine_u16(a, vrshrn_n_u32::(b)) } #[doc = "Rounding shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_u64)"] @@ -20195,7 +20147,7 @@ pub fn vrshrn_high_n_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8 #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vrshrn_high_n_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t { static_assert!(N >= 1 && N <= 32); - unsafe { simd_shuffle!(a, vrshrn_n_u64::(b), [0, 1, 2, 3]) } + vcombine_u32(a, vrshrn_n_u64::(b)) } #[doc = "Reciprocal square-root estimate."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrte_f64)"] @@ -20708,10 +20660,8 @@ pub fn vshld_u64(a: u64, b: i64) -> u64 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vshll_high_n_s8(a: int8x16_t) -> int16x8_t { static_assert!(N >= 0 && N <= 8); - unsafe { - let b: int8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); - vshll_n_s8::(b) - } + let b = vget_high_s8(a); + vshll_n_s8::(b) } #[doc = "Signed shift left long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_s16)"] @@ -20722,10 +20672,8 @@ pub fn vshll_high_n_s8(a: int8x16_t) -> int16x8_t { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vshll_high_n_s16(a: int16x8_t) -> int32x4_t { static_assert!(N >= 0 && N <= 16); - unsafe { - let b: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]); - vshll_n_s16::(b) - } + let b = vget_high_s16(a); + vshll_n_s16::(b) } #[doc = "Signed shift left long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_s32)"] @@ -20736,10 +20684,8 @@ pub fn vshll_high_n_s16(a: int16x8_t) -> int32x4_t { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vshll_high_n_s32(a: int32x4_t) -> int64x2_t { static_assert!(N >= 0 && N <= 32); - unsafe { - let b: int32x2_t = simd_shuffle!(a, a, [2, 3]); - vshll_n_s32::(b) - } + let b = vget_high_s32(a); + vshll_n_s32::(b) } #[doc = "Signed shift left long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_u8)"] @@ -20750,10 +20696,8 @@ pub fn vshll_high_n_s32(a: int32x4_t) -> int64x2_t { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vshll_high_n_u8(a: uint8x16_t) -> uint16x8_t { static_assert!(N >= 0 && N <= 8); - unsafe { - let b: uint8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); - vshll_n_u8::(b) - } + let b: uint8x8_t = vget_high_u8(a); + vshll_n_u8::(b) } #[doc = "Signed shift left long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_u16)"] @@ -20764,10 +20708,8 @@ pub fn vshll_high_n_u8(a: uint8x16_t) -> uint16x8_t { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vshll_high_n_u16(a: uint16x8_t) -> uint32x4_t { static_assert!(N >= 0 && N <= 16); - unsafe { - let b: uint16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]); - vshll_n_u16::(b) - } + let b: uint16x4_t = vget_high_u16(a); + vshll_n_u16::(b) } #[doc = "Signed shift left long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_u32)"] @@ -20778,10 +20720,8 @@ pub fn vshll_high_n_u16(a: uint16x8_t) -> uint32x4_t { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vshll_high_n_u32(a: uint32x4_t) -> uint64x2_t { static_assert!(N >= 0 && N <= 32); - unsafe { - let b: uint32x2_t = simd_shuffle!(a, a, [2, 3]); - vshll_n_u32::(b) - } + let b: uint32x2_t = vget_high_u32(a); + vshll_n_u32::(b) } #[doc = "Shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_s16)"] @@ -20792,13 +20732,7 @@ pub fn vshll_high_n_u32(a: uint32x4_t) -> uint64x2_t { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vshrn_high_n_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t { static_assert!(N >= 1 && N <= 8); - unsafe { - simd_shuffle!( - a, - vshrn_n_s16::(b), - [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] - ) - } + vcombine_s8(a, vshrn_n_s16::(b)) } #[doc = "Shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_s32)"] @@ -20809,7 +20743,7 @@ pub fn vshrn_high_n_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vshrn_high_n_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t { static_assert!(N >= 1 && N <= 16); - unsafe { simd_shuffle!(a, vshrn_n_s32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) } + vcombine_s16(a, vshrn_n_s32::(b)) } #[doc = "Shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_s64)"] @@ -20820,7 +20754,7 @@ pub fn vshrn_high_n_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vshrn_high_n_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t { static_assert!(N >= 1 && N <= 32); - unsafe { simd_shuffle!(a, vshrn_n_s64::(b), [0, 1, 2, 3]) } + vcombine_s32(a, vshrn_n_s64::(b)) } #[doc = "Shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_u16)"] @@ -20831,13 +20765,7 @@ pub fn vshrn_high_n_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vshrn_high_n_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t { static_assert!(N >= 1 && N <= 8); - unsafe { - simd_shuffle!( - a, - vshrn_n_u16::(b), - [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] - ) - } + vcombine_u8(a, vshrn_n_u16::(b)) } #[doc = "Shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_u32)"] @@ -20848,7 +20776,7 @@ pub fn vshrn_high_n_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vshrn_high_n_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t { static_assert!(N >= 1 && N <= 16); - unsafe { simd_shuffle!(a, vshrn_n_u32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) } + vcombine_u16(a, vshrn_n_u32::(b)) } #[doc = "Shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_u64)"] @@ -20859,7 +20787,7 @@ pub fn vshrn_high_n_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_ #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vshrn_high_n_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t { static_assert!(N >= 1 && N <= 32); - unsafe { simd_shuffle!(a, vshrn_n_u64::(b), [0, 1, 2, 3]) } + vcombine_u32(a, vshrn_n_u64::(b)) } #[doc = "Shift Left and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s8)"] diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml index a805fbd2058a..f6705033f923 100644 --- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml +++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml @@ -7828,12 +7828,12 @@ intrinsics: static_defs: ['const N: i32'] safety: safe types: - - [int8x8_t, int16x8_t, int8x16_t, '_high_n_s16', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]', 'N >= 1 && N <= 8'] - - [int16x4_t, int32x4_t, int16x8_t, '_high_n_s32', '[0, 1, 2, 3, 4, 5, 6, 7]', 'N >= 1 && N <= 16'] - - [int32x2_t, int64x2_t, int32x4_t, '_high_n_s64', '[0, 1, 2, 3]', 'N >= 1 && N <= 32'] + - [int8x8_t, int16x8_t, int8x16_t, '_high_n_s16', 'N >= 1 && N <= 8'] + - [int16x4_t, int32x4_t, int16x8_t, '_high_n_s32', 'N >= 1 && N <= 16'] + - [int32x2_t, int64x2_t, int32x4_t, '_high_n_s64', 'N >= 1 && N <= 32'] compose: - - FnCall: [static_assert!, ["{type[5]}"]] - - FnCall: [simd_shuffle!, [a, {FnCall: ["vqrshrn_n{neon_type[1].noq}::", [b]]}, "{type[4]}"]] + - FnCall: [static_assert!, ["{type[4]}"]] + - FnCall: ['vcombine_{neon_type[0]}', [a, {FnCall: ["vqrshrn_n{neon_type[1].noq}::", [b]]}]] - name: "vqrshrn{type[0]}" doc: "Unsigned saturating rounded shift right narrow" @@ -7865,18 +7865,17 @@ intrinsics: static_defs: ['const N: i32'] safety: safe types: - - [uint8x8_t, uint16x8_t, uint8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] - - [uint16x4_t, uint32x4_t, uint16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]'] - - [uint32x2_t, uint64x2_t, uint32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]'] + - [uint8x8_t, uint16x8_t, uint8x16_t, 'N >= 1 && N <= 8'] + - [uint16x4_t, uint32x4_t, uint16x8_t, 'N >= 1 && N <= 16'] + - [uint32x2_t, uint64x2_t, uint32x4_t, 'N >= 1 && N <= 32'] compose: - FnCall: [static_assert!, ['{type[3]}']] - FnCall: - - simd_shuffle! + - 'vcombine_{neon_type[0]}' - - a - FnCall: - "vqrshrn_n{neon_type[1].noq}::" - - b - - "{type[4]}" - name: "vqrshrun{type[0]}" doc: "Signed saturating rounded shift right unsigned narrow" @@ -7916,18 +7915,17 @@ intrinsics: static_defs: ['const N: i32'] safety: safe types: - - [uint8x8_t, int16x8_t, uint8x16_t, 'N >= 1 && N <= 8', s16, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] - - [uint16x4_t, int32x4_t, uint16x8_t, 'N >= 1 && N <= 16', s32, '[0, 1, 2, 3, 4, 5, 6, 7]'] - - [uint32x2_t, int64x2_t, uint32x4_t, 'N >= 1 && N <= 32', s64, '[0, 1, 2, 3]'] + - [uint8x8_t, int16x8_t, uint8x16_t, 'N >= 1 && N <= 8'] + - [uint16x4_t, int32x4_t, uint16x8_t, 'N >= 1 && N <= 16'] + - [uint32x2_t, int64x2_t, uint32x4_t, 'N >= 1 && N <= 32'] compose: - FnCall: [static_assert!, ["{type[3]}"]] - FnCall: - - simd_shuffle! + - 'vcombine_{neon_type[0]}' - - a - FnCall: - - "vqrshrun_n_{type[4]}::" + - "vqrshrun_n_{neon_type[1]}::" - - b - - "{type[5]}" - name: "vqshld_{type}" doc: "Signed saturating shift left" @@ -8110,16 +8108,15 @@ intrinsics: static_defs: ['const N: i32'] safety: safe types: - - [_high_n_s16, int8x8_t, int16x8_t, int8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]', s16] - - [_high_n_s32, int16x4_t, int32x4_t, int16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]', s32] - - [_high_n_s64, int32x2_t, int64x2_t, int32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]', s64] + - [_high_n_s16, int8x8_t, int16x8_t, int8x16_t, 'N >= 1 && N <= 8'] + - [_high_n_s32, int16x4_t, int32x4_t, int16x8_t, 'N >= 1 && N <= 16'] + - [_high_n_s64, int32x2_t, int64x2_t, int32x4_t, 'N >= 1 && N <= 32'] compose: - FnCall: [static_assert!, ["{type[4]}"]] - FnCall: - - simd_shuffle! + - 'vcombine_{neon_type[1]}' - - a - - FnCall: ["vqshrn_n_{type[6]}::", [b]] - - "{type[5]}" + - FnCall: ["vqshrn_n_{neon_type[2]}::", [b]] - name: "vqshrnd_n_u64" doc: "Unsigned saturating shift right narrow" @@ -8178,16 +8175,15 @@ intrinsics: static_defs: ['const N: i32'] safety: safe types: - - [_high_n_u16, uint8x8_t, uint16x8_t, uint8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] - - [_high_n_u32, uint16x4_t, uint32x4_t, uint16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]'] - - [_high_n_u64, uint32x2_t, uint64x2_t, uint32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]'] + - [_high_n_u16, uint8x8_t, uint16x8_t, uint8x16_t, 'N >= 1 && N <= 8'] + - [_high_n_u32, uint16x4_t, uint32x4_t, uint16x8_t, 'N >= 1 && N <= 16'] + - [_high_n_u64, uint32x2_t, uint64x2_t, uint32x4_t, 'N >= 1 && N <= 32'] compose: - FnCall: [static_assert!, ["{type[4]}"]] - FnCall: - - simd_shuffle! + - 'vcombine_{neon_type[1]}' - - a - FnCall: ["vqshrn_n_{neon_type[2]}::", [b]] - - "{type[5]}" - name: "vqshrun{type[0]}" doc: "Signed saturating shift right unsigned narrow" @@ -8223,16 +8219,15 @@ intrinsics: static_defs: ['const N: i32'] safety: safe types: - - [uint8x8_t, int16x8_t, uint8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] - - [uint16x4_t, int32x4_t, uint16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]'] - - [uint32x2_t, int64x2_t, uint32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]'] + - [uint8x8_t, int16x8_t, uint8x16_t, 'N >= 1 && N <= 8'] + - [uint16x4_t, int32x4_t, uint16x8_t, 'N >= 1 && N <= 16'] + - [uint32x2_t, int64x2_t, uint32x4_t, 'N >= 1 && N <= 32'] compose: - FnCall: [static_assert!, ["{type[3]}"]] - FnCall: - - simd_shuffle! + - 'vcombine_{neon_type[0]}' - - a - FnCall: ["vqshrun_n_{neon_type[1]}::", [b]] - - "{type[4]}" - name: "vsqadd{type[0]}" doc: "Unsigned saturating accumulate of signed value" @@ -8699,19 +8694,18 @@ intrinsics: static_defs: ['const N: i32'] safety: safe types: - - [int8x8_t, int16x8_t, int8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] - - [int16x4_t, int32x4_t, int16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]'] - - [int32x2_t, int64x2_t, int32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]'] - - [uint8x8_t, uint16x8_t, uint8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] - - [uint16x4_t, uint32x4_t, uint16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]'] - - [uint32x2_t, uint64x2_t, uint32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]'] + - [int8x8_t, int16x8_t, int8x16_t, 'N >= 1 && N <= 8'] + - [int16x4_t, int32x4_t, int16x8_t, 'N >= 1 && N <= 16'] + - [int32x2_t, int64x2_t, int32x4_t, 'N >= 1 && N <= 32'] + - [uint8x8_t, uint16x8_t, uint8x16_t, 'N >= 1 && N <= 8'] + - [uint16x4_t, uint32x4_t, uint16x8_t, 'N >= 1 && N <= 16'] + - [uint32x2_t, uint64x2_t, uint32x4_t, 'N >= 1 && N <= 32'] compose: - FnCall: [static_assert!, ["{type[3]}"]] - FnCall: - - simd_shuffle! + - 'vcombine_{neon_type[0]}' - - a - FnCall: ["vrshrn_n_{neon_type[1]}::", [b]] - - "{type[4]}" - name: "vrsubhn_high_{neon_type[1]}" doc: "Rounding subtract returning high narrow" @@ -9033,13 +9027,13 @@ intrinsics: static_defs: ['const N: i32'] safety: safe types: - - [int8x16_t, int16x8_t, int8x8_t, 'N >= 0 && N <= 8', '[8, 9, 10, 11, 12, 13, 14, 15]'] - - [int16x8_t, int32x4_t, int16x4_t, 'N >= 0 && N <= 16', '[4, 5, 6, 7]'] - - [int32x4_t, int64x2_t, int32x2_t, 'N >= 0 && N <= 32', '[2, 3]'] + - [int8x16_t, int16x8_t, int8x8_t, 'N >= 0 && N <= 8'] + - [int16x8_t, int32x4_t, int16x4_t, 'N >= 0 && N <= 16'] + - [int32x4_t, int64x2_t, int32x2_t, 'N >= 0 && N <= 32'] compose: - FnCall: [static_assert!, ["{type[3]}"]] - - Let: [b, "{neon_type[2]}", {FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]}] - - FnCall: ["vshll_n_{neon_type[2]}::", [b]] + - Let: [b, {FnCall: ['vget_high_{neon_type[0]}', [a]]}] + - FnCall: ["vshll_n_{neon_type[2]}", [b], [N]] - name: "vshll_high_n_{neon_type[0]}" doc: "Signed shift left long" @@ -9052,13 +9046,13 @@ intrinsics: static_defs: ['const N: i32'] safety: safe types: - - [uint8x16_t, uint16x8_t, uint8x8_t, 'N >= 0 && N <= 8', '[8, 9, 10, 11, 12, 13, 14, 15]'] - - [uint16x8_t, uint32x4_t, uint16x4_t, 'N >= 0 && N <= 16', '[4, 5, 6, 7]'] - - [uint32x4_t, uint64x2_t, uint32x2_t, 'N >= 0 && N <= 32', '[2, 3]'] + - [uint8x16_t, uint16x8_t, uint8x8_t, 'N >= 0 && N <= 8'] + - [uint16x8_t, uint32x4_t, uint16x4_t, 'N >= 0 && N <= 16'] + - [uint32x4_t, uint64x2_t, uint32x2_t, 'N >= 0 && N <= 32'] compose: - FnCall: [static_assert!, ["{type[3]}"]] - - Let: [b, "{neon_type[2]}", {FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]}] - - FnCall: ["vshll_n_{neon_type[2]}::", [b]] + - Let: [b, "{neon_type[2]}", {FnCall: ['vget_high_{neon_type[0]}', [a]]}] + - FnCall: ["vshll_n_{neon_type[2]}", [b], [N]] - name: "vshrn_high_n_{neon_type[1]}" doc: "Shift right narrow" @@ -9071,19 +9065,18 @@ intrinsics: static_defs: ['const N: i32'] safety: safe types: - - [int8x8_t, int16x8_t, int8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] - - [int16x4_t, int32x4_t, int16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]'] - - [int32x2_t, int64x2_t, int32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]'] - - [uint8x8_t, uint16x8_t, uint8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]'] - - [uint16x4_t, uint32x4_t, uint16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]'] - - [uint32x2_t, uint64x2_t, uint32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]'] + - [int8x8_t, int16x8_t, int8x16_t, 'N >= 1 && N <= 8'] + - [int16x4_t, int32x4_t, int16x8_t, 'N >= 1 && N <= 16'] + - [int32x2_t, int64x2_t, int32x4_t, 'N >= 1 && N <= 32'] + - [uint8x8_t, uint16x8_t, uint8x16_t, 'N >= 1 && N <= 8'] + - [uint16x4_t, uint32x4_t, uint16x8_t, 'N >= 1 && N <= 16'] + - [uint32x2_t, uint64x2_t, uint32x4_t, 'N >= 1 && N <= 32'] compose: - FnCall: [static_assert!, ["{type[3]}"]] - FnCall: - - simd_shuffle! + - 'vcombine_{neon_type[0]}' - - a - - FnCall: ["vshrn_n_{neon_type[1]}::", [b]] - - "{type[4]}" + - FnCall: ["vshrn_n_{neon_type[1]}", [b], [N]] - name: "vsm3partw1{neon_type.no}" doc: "SM3PARTW1"