From a023ebf71fbd470b30e25596a532f71c2c761ffa Mon Sep 17 00:00:00 2001 From: sayantn Date: Fri, 1 May 2026 01:11:01 +0530 Subject: [PATCH] gen-arm: fix `vmlal`, `vmlsl` and `vmull` --- .../core_arch/src/aarch64/neon/generated.rs | 48 +++++++-------- .../spec/neon/aarch64.spec.yml | 60 +++++++++---------- 2 files changed, 54 insertions(+), 54 deletions(-) diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs index 4bf10926e4ef..c67c277c7f2f 100644 --- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs +++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs @@ -14520,7 +14520,7 @@ pub fn vmlaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmlal_high_lane_s16(a: int32x4_t, b: int16x8_t, c: int16x4_t) -> int32x4_t { static_assert_uimm_bits!(LANE, 2); - unsafe { vmlal_high_s16(a, b, simd_shuffle!(c, c, [LANE as u32; 8])) } + vmlal_high_s16(a, b, vdupq_lane_s16::(c)) } #[doc = "Multiply-add long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_laneq_s16)"] @@ -14535,7 +14535,7 @@ pub fn vmlal_high_laneq_s16( c: int16x8_t, ) -> int32x4_t { static_assert_uimm_bits!(LANE, 3); - unsafe { vmlal_high_s16(a, b, simd_shuffle!(c, c, [LANE as u32; 8])) } + vmlal_high_s16(a, b, vdupq_laneq_s16::(c)) } #[doc = "Multiply-add long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_lane_s32)"] @@ -14546,7 +14546,7 @@ pub fn vmlal_high_laneq_s16( #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmlal_high_lane_s32(a: int64x2_t, b: int32x4_t, c: int32x2_t) -> int64x2_t { static_assert_uimm_bits!(LANE, 1); - unsafe { vmlal_high_s32(a, b, simd_shuffle!(c, c, [LANE as u32; 4])) } + vmlal_high_s32(a, b, vdupq_lane_s32::(c)) } #[doc = "Multiply-add long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_laneq_s32)"] @@ -14561,7 +14561,7 @@ pub fn vmlal_high_laneq_s32( c: int32x4_t, ) -> int64x2_t { static_assert_uimm_bits!(LANE, 2); - unsafe { vmlal_high_s32(a, b, simd_shuffle!(c, c, [LANE as u32; 4])) } + vmlal_high_s32(a, b, vdupq_laneq_s32::(c)) } #[doc = "Multiply-add long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_lane_u16)"] @@ -14576,7 +14576,7 @@ pub fn vmlal_high_lane_u16( c: uint16x4_t, ) -> uint32x4_t { static_assert_uimm_bits!(LANE, 2); - unsafe { vmlal_high_u16(a, b, simd_shuffle!(c, c, [LANE as u32; 8])) } + vmlal_high_u16(a, b, vdupq_lane_u16::(c)) } #[doc = "Multiply-add long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_laneq_u16)"] @@ -14591,7 +14591,7 @@ pub fn vmlal_high_laneq_u16( c: uint16x8_t, ) -> uint32x4_t { static_assert_uimm_bits!(LANE, 3); - unsafe { vmlal_high_u16(a, b, simd_shuffle!(c, c, [LANE as u32; 8])) } + vmlal_high_u16(a, b, vdupq_laneq_u16::(c)) } #[doc = "Multiply-add long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_lane_u32)"] @@ -14606,7 +14606,7 @@ pub fn vmlal_high_lane_u32( c: uint32x2_t, ) -> uint64x2_t { static_assert_uimm_bits!(LANE, 1); - unsafe { vmlal_high_u32(a, b, simd_shuffle!(c, c, [LANE as u32; 4])) } + vmlal_high_u32(a, b, vdupq_lane_u32::(c)) } #[doc = "Multiply-add long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_laneq_u32)"] @@ -14621,7 +14621,7 @@ pub fn vmlal_high_laneq_u32( c: uint32x4_t, ) -> uint64x2_t { static_assert_uimm_bits!(LANE, 2); - unsafe { vmlal_high_u32(a, b, simd_shuffle!(c, c, [LANE as u32; 4])) } + vmlal_high_u32(a, b, vdupq_laneq_u32::(c)) } #[doc = "Multiply-add long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_n_s16)"] @@ -14764,7 +14764,7 @@ pub fn vmlsq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmlsl_high_lane_s16(a: int32x4_t, b: int16x8_t, c: int16x4_t) -> int32x4_t { static_assert_uimm_bits!(LANE, 2); - unsafe { vmlsl_high_s16(a, b, simd_shuffle!(c, c, [LANE as u32; 8])) } + vmlsl_high_s16(a, b, vdupq_lane_s16::(c)) } #[doc = "Multiply-subtract long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_laneq_s16)"] @@ -14779,7 +14779,7 @@ pub fn vmlsl_high_laneq_s16( c: int16x8_t, ) -> int32x4_t { static_assert_uimm_bits!(LANE, 3); - unsafe { vmlsl_high_s16(a, b, simd_shuffle!(c, c, [LANE as u32; 8])) } + vmlsl_high_s16(a, b, vdupq_laneq_s16::(c)) } #[doc = "Multiply-subtract long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_lane_s32)"] @@ -14790,7 +14790,7 @@ pub fn vmlsl_high_laneq_s16( #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmlsl_high_lane_s32(a: int64x2_t, b: int32x4_t, c: int32x2_t) -> int64x2_t { static_assert_uimm_bits!(LANE, 1); - unsafe { vmlsl_high_s32(a, b, simd_shuffle!(c, c, [LANE as u32; 4])) } + vmlsl_high_s32(a, b, vdupq_lane_s32::(c)) } #[doc = "Multiply-subtract long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_laneq_s32)"] @@ -14805,7 +14805,7 @@ pub fn vmlsl_high_laneq_s32( c: int32x4_t, ) -> int64x2_t { static_assert_uimm_bits!(LANE, 2); - unsafe { vmlsl_high_s32(a, b, simd_shuffle!(c, c, [LANE as u32; 4])) } + vmlsl_high_s32(a, b, vdupq_laneq_s32::(c)) } #[doc = "Multiply-subtract long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_lane_u16)"] @@ -14820,7 +14820,7 @@ pub fn vmlsl_high_lane_u16( c: uint16x4_t, ) -> uint32x4_t { static_assert_uimm_bits!(LANE, 2); - unsafe { vmlsl_high_u16(a, b, simd_shuffle!(c, c, [LANE as u32; 8])) } + vmlsl_high_u16(a, b, vdupq_lane_u16::(c)) } #[doc = "Multiply-subtract long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_laneq_u16)"] @@ -14835,7 +14835,7 @@ pub fn vmlsl_high_laneq_u16( c: uint16x8_t, ) -> uint32x4_t { static_assert_uimm_bits!(LANE, 3); - unsafe { vmlsl_high_u16(a, b, simd_shuffle!(c, c, [LANE as u32; 8])) } + vmlsl_high_u16(a, b, vdupq_laneq_u16::(c)) } #[doc = "Multiply-subtract long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_lane_u32)"] @@ -14850,7 +14850,7 @@ pub fn vmlsl_high_lane_u32( c: uint32x2_t, ) -> uint64x2_t { static_assert_uimm_bits!(LANE, 1); - unsafe { vmlsl_high_u32(a, b, simd_shuffle!(c, c, [LANE as u32; 4])) } + vmlsl_high_u32(a, b, vdupq_lane_u32::(c)) } #[doc = "Multiply-subtract long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_laneq_u32)"] @@ -14865,7 +14865,7 @@ pub fn vmlsl_high_laneq_u32( c: uint32x4_t, ) -> uint64x2_t { static_assert_uimm_bits!(LANE, 2); - unsafe { vmlsl_high_u32(a, b, simd_shuffle!(c, c, [LANE as u32; 4])) } + vmlsl_high_u32(a, b, vdupq_laneq_u32::(c)) } #[doc = "Multiply-subtract long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_n_s16)"] @@ -15270,7 +15270,7 @@ pub fn vmulh_laneq_f16(a: f16, b: float16x8_t) -> f16 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmull_high_lane_s16(a: int16x8_t, b: int16x4_t) -> int32x4_t { static_assert_uimm_bits!(LANE, 2); - unsafe { vmull_high_s16(a, simd_shuffle!(b, b, [LANE as u32; 8])) } + vmull_high_s16(a, vdupq_lane_s16::(b)) } #[doc = "Multiply long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_laneq_s16)"] @@ -15281,7 +15281,7 @@ pub fn vmull_high_lane_s16(a: int16x8_t, b: int16x4_t) -> int32 #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmull_high_laneq_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t { static_assert_uimm_bits!(LANE, 3); - unsafe { vmull_high_s16(a, simd_shuffle!(b, b, [LANE as u32; 8])) } + vmull_high_s16(a, vdupq_laneq_s16::(b)) } #[doc = "Multiply long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_lane_s32)"] @@ -15292,7 +15292,7 @@ pub fn vmull_high_laneq_s16(a: int16x8_t, b: int16x8_t) -> int3 #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmull_high_lane_s32(a: int32x4_t, b: int32x2_t) -> int64x2_t { static_assert_uimm_bits!(LANE, 1); - unsafe { vmull_high_s32(a, simd_shuffle!(b, b, [LANE as u32; 4])) } + vmull_high_s32(a, vdupq_lane_s32::(b)) } #[doc = "Multiply long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_laneq_s32)"] @@ -15303,7 +15303,7 @@ pub fn vmull_high_lane_s32(a: int32x4_t, b: int32x2_t) -> int64 #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmull_high_laneq_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t { static_assert_uimm_bits!(LANE, 2); - unsafe { vmull_high_s32(a, simd_shuffle!(b, b, [LANE as u32; 4])) } + vmull_high_s32(a, vdupq_laneq_s32::(b)) } #[doc = "Multiply long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_lane_u16)"] @@ -15314,7 +15314,7 @@ pub fn vmull_high_laneq_s32(a: int32x4_t, b: int32x4_t) -> int6 #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmull_high_lane_u16(a: uint16x8_t, b: uint16x4_t) -> uint32x4_t { static_assert_uimm_bits!(LANE, 2); - unsafe { vmull_high_u16(a, simd_shuffle!(b, b, [LANE as u32; 8])) } + vmull_high_u16(a, vdupq_lane_u16::(b)) } #[doc = "Multiply long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_laneq_u16)"] @@ -15325,7 +15325,7 @@ pub fn vmull_high_lane_u16(a: uint16x8_t, b: uint16x4_t) -> uin #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmull_high_laneq_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t { static_assert_uimm_bits!(LANE, 3); - unsafe { vmull_high_u16(a, simd_shuffle!(b, b, [LANE as u32; 8])) } + vmull_high_u16(a, vdupq_laneq_u16::(b)) } #[doc = "Multiply long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_lane_u32)"] @@ -15336,7 +15336,7 @@ pub fn vmull_high_laneq_u16(a: uint16x8_t, b: uint16x8_t) -> ui #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmull_high_lane_u32(a: uint32x4_t, b: uint32x2_t) -> uint64x2_t { static_assert_uimm_bits!(LANE, 1); - unsafe { vmull_high_u32(a, simd_shuffle!(b, b, [LANE as u32; 4])) } + vmull_high_u32(a, vdupq_lane_u32::(b)) } #[doc = "Multiply long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_laneq_u32)"] @@ -15347,7 +15347,7 @@ pub fn vmull_high_lane_u32(a: uint32x4_t, b: uint32x2_t) -> uin #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmull_high_laneq_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t { static_assert_uimm_bits!(LANE, 2); - unsafe { vmull_high_u32(a, simd_shuffle!(b, b, [LANE as u32; 4])) } + vmull_high_u32(a, vdupq_laneq_u32::(b)) } #[doc = "Multiply long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_n_s16)"] diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml index 9c59af133871..64408a4d92ae 100644 --- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml +++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml @@ -11419,17 +11419,17 @@ intrinsics: static_defs: ['const LANE: i32'] safety: safe types: - - [int32x4_t, int16x8_t, int16x4_t, '2', '[LANE as u32; 8]'] - - [int32x4_t, int16x8_t, int16x8_t, '3', '[LANE as u32; 8]'] - - [int64x2_t, int32x4_t, int32x2_t, '1', '[LANE as u32; 4]'] - - [int64x2_t, int32x4_t, int32x4_t, '2', '[LANE as u32; 4]'] + - [int32x4_t, int16x8_t, int16x4_t, '2'] + - [int32x4_t, int16x8_t, int16x8_t, '3'] + - [int64x2_t, int32x4_t, int32x2_t, '1'] + - [int64x2_t, int32x4_t, int32x4_t, '2'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] - FnCall: - "vmlsl_high_{neon_type[1]}" - - a - b - - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]] + - FnCall: ['vdupq_lane{neon_type[2].no}', [c], [LANE]] - name: "vmlsl_high_lane{neon_type[2].no}" doc: "Multiply-subtract long" @@ -11442,17 +11442,17 @@ intrinsics: static_defs: ['const LANE: i32'] safety: safe types: - - [uint32x4_t, uint16x8_t, uint16x4_t, '2', '[LANE as u32; 8]'] - - [uint32x4_t, uint16x8_t, uint16x8_t, '3', '[LANE as u32; 8]'] - - [uint64x2_t, uint32x4_t, uint32x2_t, '1', '[LANE as u32; 4]'] - - [uint64x2_t, uint32x4_t, uint32x4_t, '2', '[LANE as u32; 4]'] + - [uint32x4_t, uint16x8_t, uint16x4_t, '2'] + - [uint32x4_t, uint16x8_t, uint16x8_t, '3'] + - [uint64x2_t, uint32x4_t, uint32x2_t, '1'] + - [uint64x2_t, uint32x4_t, uint32x4_t, '2'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] - FnCall: - "vmlsl_high_{neon_type[1]}" - - a - b - - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]] + - FnCall: ['vdupq_lane{neon_type[2].no}', [c], [LANE]] - name: "vclt{neon_type[0].no}" doc: "Floating-point compare less than" @@ -11830,16 +11830,16 @@ intrinsics: static_defs: ['const LANE: i32'] safety: safe types: - - [int16x8_t, int16x4_t, int32x4_t, '2', '[LANE as u32; 8]'] - - [int16x8_t, int16x8_t, int32x4_t, '3', '[LANE as u32; 8]'] - - [int32x4_t, int32x2_t, int64x2_t, '1', '[LANE as u32; 4]'] - - [int32x4_t, int32x4_t, int64x2_t, '2', '[LANE as u32; 4]'] + - [int16x8_t, int16x4_t, int32x4_t, '2'] + - [int16x8_t, int16x8_t, int32x4_t, '3'] + - [int32x4_t, int32x2_t, int64x2_t, '1'] + - [int32x4_t, int32x4_t, int64x2_t, '2'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]] - FnCall: - "vmull_high_{neon_type[0]}" - - a - - FnCall: [simd_shuffle!, [b, b, '{type[4]}']] + - FnCall: ['vdupq_lane{neon_type[1].no}', [b], [LANE]] - name: "vmull_high_lane{neon_type[1].no}" doc: "Multiply long" @@ -11852,16 +11852,16 @@ intrinsics: static_defs: ['const LANE: i32'] safety: safe types: - - [uint16x8_t, uint16x4_t, uint32x4_t, '2', '[LANE as u32; 8]'] - - [uint16x8_t, uint16x8_t, uint32x4_t, '3', '[LANE as u32; 8]'] - - [uint32x4_t, uint32x2_t, uint64x2_t, '1', '[LANE as u32; 4]'] - - [uint32x4_t, uint32x4_t, uint64x2_t, '2', '[LANE as u32; 4]'] + - [uint16x8_t, uint16x4_t, uint32x4_t, '2'] + - [uint16x8_t, uint16x8_t, uint32x4_t, '3'] + - [uint32x4_t, uint32x2_t, uint64x2_t, '1'] + - [uint32x4_t, uint32x4_t, uint64x2_t, '2'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]] - FnCall: - "vmull_high_{neon_type[0]}" - - a - - FnCall: [simd_shuffle!, [b, b, '{type[4]}']] + - FnCall: ['vdupq_lane{neon_type[1].no}', [b], [LANE]] - name: "vrsqrte{neon_type.no}" doc: "Reciprocal square-root estimate." @@ -12143,13 +12143,13 @@ intrinsics: static_defs: ['const LANE: i32'] safety: safe types: - - [int32x4_t, int16x8_t, int16x4_t, '2', '[LANE as u32; 8]'] - - [int32x4_t, int16x8_t, int16x8_t, '3', '[LANE as u32; 8]'] - - [int64x2_t, int32x4_t, int32x2_t, '1', '[LANE as u32; 4]'] - - [int64x2_t, int32x4_t, int32x4_t, '2', '[LANE as u32; 4]'] + - [int32x4_t, int16x8_t, int16x4_t, '2'] + - [int32x4_t, int16x8_t, int16x8_t, '3'] + - [int64x2_t, int32x4_t, int32x2_t, '1'] + - [int64x2_t, int32x4_t, int32x4_t, '2'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] - - FnCall: ['vmlal_high_{neon_type[2]}', [a, b, {FnCall: [simd_shuffle!, [c, c, '{type[4]}']]}]] + - FnCall: ['vmlal_high_{neon_type[2]}', [a, b, {FnCall: ['vdupq_lane{neon_type[2].no}', [c], [LANE]]}]] - name: "vmlal_high_lane{neon_type[2].no}" doc: "Multiply-add long" @@ -12162,13 +12162,13 @@ intrinsics: static_defs: ['const LANE: i32'] safety: safe types: - - [uint32x4_t, uint16x8_t, uint16x4_t, '2', '[LANE as u32; 8]'] - - [uint32x4_t, uint16x8_t, uint16x8_t, '3', '[LANE as u32; 8]'] - - [uint64x2_t, uint32x4_t, uint32x2_t, '1', '[LANE as u32; 4]'] - - [uint64x2_t, uint32x4_t, uint32x4_t, '2', '[LANE as u32; 4]'] + - [uint32x4_t, uint16x8_t, uint16x4_t, '2'] + - [uint32x4_t, uint16x8_t, uint16x8_t, '3'] + - [uint64x2_t, uint32x4_t, uint32x2_t, '1'] + - [uint64x2_t, uint32x4_t, uint32x4_t, '2'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] - - FnCall: ['vmlal_high_{neon_type[2]}', [a, b, {FnCall: [simd_shuffle!, [c, c, '{type[4]}']]}]] + - FnCall: ['vmlal_high_{neon_type[2]}', [a, b, {FnCall: ['vdupq_lane{neon_type[2].no}', [c], [LANE]]}]] - name: "vrsrad_n_u64" doc: "Unsigned rounding shift right and accumulate."