Change implementation of vqrdml{a,s}h_lane

This commit is contained in:
sayantn
2026-05-09 05:15:21 +05:30
parent 077f63f91e
commit 933aa5c3b5
2 changed files with 50 additions and 82 deletions
@@ -15276,10 +15276,8 @@ pub fn vqnegd_s64(a: i64) -> i64 {
#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
pub fn vqrdmlah_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
static_assert_uimm_bits!(LANE, 2);
unsafe {
let c: int16x4_t = simd_shuffle!(c, c, [LANE as u32; 4]);
vqrdmlah_s16(a, b, c)
}
let c = vdup_lane_s16::<LANE>(c);
vqrdmlah_s16(a, b, c)
}
#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlah_lane_s32)"]
@@ -15290,10 +15288,8 @@ pub fn vqrdmlah_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4
#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
pub fn vqrdmlah_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
static_assert_uimm_bits!(LANE, 1);
unsafe {
let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32; 2]);
vqrdmlah_s32(a, b, c)
}
let c = vdup_lane_s32::<LANE>(c);
vqrdmlah_s32(a, b, c)
}
#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlah_laneq_s16)"]
@@ -15304,10 +15300,8 @@ pub fn vqrdmlah_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x2
#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
pub fn vqrdmlah_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t {
static_assert_uimm_bits!(LANE, 3);
unsafe {
let c: int16x4_t = simd_shuffle!(c, c, [LANE as u32; 4]);
vqrdmlah_s16(a, b, c)
}
let c = vdup_laneq_s16::<LANE>(c);
vqrdmlah_s16(a, b, c)
}
#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlah_laneq_s32)"]
@@ -15318,10 +15312,8 @@ pub fn vqrdmlah_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x
#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
pub fn vqrdmlah_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t {
static_assert_uimm_bits!(LANE, 2);
unsafe {
let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32; 2]);
vqrdmlah_s32(a, b, c)
}
let c = vdup_laneq_s32::<LANE>(c);
vqrdmlah_s32(a, b, c)
}
#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahq_lane_s16)"]
@@ -15332,10 +15324,8 @@ pub fn vqrdmlah_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x
#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
pub fn vqrdmlahq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t {
static_assert_uimm_bits!(LANE, 2);
unsafe {
let c: int16x8_t = simd_shuffle!(c, c, [LANE as u32; 8]);
vqrdmlahq_s16(a, b, c)
}
let c = vdupq_lane_s16::<LANE>(c);
vqrdmlahq_s16(a, b, c)
}
#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahq_lane_s32)"]
@@ -15346,10 +15336,8 @@ pub fn vqrdmlahq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x
#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
pub fn vqrdmlahq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t {
static_assert_uimm_bits!(LANE, 1);
unsafe {
let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32; 4]);
vqrdmlahq_s32(a, b, c)
}
let c = vdupq_lane_s32::<LANE>(c);
vqrdmlahq_s32(a, b, c)
}
#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahq_laneq_s16)"]
@@ -15360,10 +15348,8 @@ pub fn vqrdmlahq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x
#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
pub fn vqrdmlahq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
static_assert_uimm_bits!(LANE, 3);
unsafe {
let c: int16x8_t = simd_shuffle!(c, c, [LANE as u32; 8]);
vqrdmlahq_s16(a, b, c)
}
let c = vdupq_laneq_s16::<LANE>(c);
vqrdmlahq_s16(a, b, c)
}
#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahq_laneq_s32)"]
@@ -15374,10 +15360,8 @@ pub fn vqrdmlahq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16
#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
pub fn vqrdmlahq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
static_assert_uimm_bits!(LANE, 2);
unsafe {
let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32; 4]);
vqrdmlahq_s32(a, b, c)
}
let c = vdupq_laneq_s32::<LANE>(c);
vqrdmlahq_s32(a, b, c)
}
#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlah_s16)"]
@@ -15520,10 +15504,8 @@ pub fn vqrdmlahs_s32(a: i32, b: i32, c: i32) -> i32 {
#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
pub fn vqrdmlsh_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
static_assert_uimm_bits!(LANE, 2);
unsafe {
let c: int16x4_t = simd_shuffle!(c, c, [LANE as u32; 4]);
vqrdmlsh_s16(a, b, c)
}
let c = vdup_lane_s16::<LANE>(c);
vqrdmlsh_s16(a, b, c)
}
#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlsh_lane_s32)"]
@@ -15534,10 +15516,8 @@ pub fn vqrdmlsh_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4
#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
pub fn vqrdmlsh_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
static_assert_uimm_bits!(LANE, 1);
unsafe {
let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32; 2]);
vqrdmlsh_s32(a, b, c)
}
let c = vdup_lane_s32::<LANE>(c);
vqrdmlsh_s32(a, b, c)
}
#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlsh_laneq_s16)"]
@@ -15548,10 +15528,8 @@ pub fn vqrdmlsh_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x2
#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
pub fn vqrdmlsh_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t {
static_assert_uimm_bits!(LANE, 3);
unsafe {
let c: int16x4_t = simd_shuffle!(c, c, [LANE as u32; 4]);
vqrdmlsh_s16(a, b, c)
}
let c = vdup_laneq_s16::<LANE>(c);
vqrdmlsh_s16(a, b, c)
}
#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlsh_laneq_s32)"]
@@ -15562,10 +15540,8 @@ pub fn vqrdmlsh_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x
#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
pub fn vqrdmlsh_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t {
static_assert_uimm_bits!(LANE, 2);
unsafe {
let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32; 2]);
vqrdmlsh_s32(a, b, c)
}
let c = vdup_laneq_s32::<LANE>(c);
vqrdmlsh_s32(a, b, c)
}
#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshq_lane_s16)"]
@@ -15576,10 +15552,8 @@ pub fn vqrdmlsh_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x
#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
pub fn vqrdmlshq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t {
static_assert_uimm_bits!(LANE, 2);
unsafe {
let c: int16x8_t = simd_shuffle!(c, c, [LANE as u32; 8]);
vqrdmlshq_s16(a, b, c)
}
let c = vdupq_lane_s16::<LANE>(c);
vqrdmlshq_s16(a, b, c)
}
#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshq_lane_s32)"]
@@ -15590,10 +15564,8 @@ pub fn vqrdmlshq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x
#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
pub fn vqrdmlshq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t {
static_assert_uimm_bits!(LANE, 1);
unsafe {
let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32; 4]);
vqrdmlshq_s32(a, b, c)
}
let c = vdupq_lane_s32::<LANE>(c);
vqrdmlshq_s32(a, b, c)
}
#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshq_laneq_s16)"]
@@ -15604,10 +15576,8 @@ pub fn vqrdmlshq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x
#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
pub fn vqrdmlshq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
static_assert_uimm_bits!(LANE, 3);
unsafe {
let c: int16x8_t = simd_shuffle!(c, c, [LANE as u32; 8]);
vqrdmlshq_s16(a, b, c)
}
let c = vdupq_laneq_s16::<LANE>(c);
vqrdmlshq_s16(a, b, c)
}
#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshq_laneq_s32)"]
@@ -15618,10 +15588,8 @@ pub fn vqrdmlshq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16
#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
pub fn vqrdmlshq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
static_assert_uimm_bits!(LANE, 2);
unsafe {
let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32; 4]);
vqrdmlshq_s32(a, b, c)
}
let c = vdupq_laneq_s32::<LANE>(c);
vqrdmlshq_s32(a, b, c)
}
#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlsh_s16)"]
@@ -7613,17 +7613,17 @@ intrinsics:
static_defs: ['const LANE: i32']
safety: safe
types:
- [_lane_s16, int16x4_t, int16x4_t, int16x4_t, '2', '[LANE as u32; 4]']
- [_laneq_s16, int16x4_t, int16x4_t, int16x8_t, '3', '[LANE as u32; 4]']
- [q_lane_s16, int16x8_t, int16x8_t, int16x4_t, '2', '[LANE as u32; 8]']
- [q_laneq_s16, int16x8_t, int16x8_t, int16x8_t, '3', '[LANE as u32; 8]']
- [_lane_s32, int32x2_t, int32x2_t, int32x2_t, '1', '[LANE as u32; 2]']
- [_laneq_s32, int32x2_t, int32x2_t, int32x4_t, '2', '[LANE as u32; 2]']
- [q_lane_s32, int32x4_t, int32x4_t, int32x2_t, '1', '[LANE as u32; 4]']
- [q_laneq_s32, int32x4_t, int32x4_t, int32x4_t, '2', '[LANE as u32; 4]']
- [_lane_s16, int16x4_t, int16x4_t, int16x4_t, '2']
- [_laneq_s16, int16x4_t, int16x4_t, int16x8_t, '3']
- [q_lane_s16, int16x8_t, int16x8_t, int16x4_t, '2']
- [q_laneq_s16, int16x8_t, int16x8_t, int16x8_t, '3']
- [_lane_s32, int32x2_t, int32x2_t, int32x2_t, '1']
- [_laneq_s32, int32x2_t, int32x2_t, int32x4_t, '2']
- [q_lane_s32, int32x4_t, int32x4_t, int32x2_t, '1']
- [q_laneq_s32, int32x4_t, int32x4_t, int32x4_t, '2']
compose:
- FnCall: [static_assert_uimm_bits!, [LANE, '{type[4]}']]
- Let: [c, "{type[1]}", {FnCall: [simd_shuffle!, [c, c, "{type[5]}"]]}]
- Let: [c, {FnCall: ['vdup{type[0]}', [c], [LANE]]}]
- FnCall: ["vqrdmlah{neon_type[2].no}", [a, b, c]]
- name: "vqrdmlah{type[4]}"
@@ -7697,17 +7697,17 @@ intrinsics:
static_defs: ['const LANE: i32']
safety: safe
types:
- [_lane_s16, int16x4_t, int16x4_t, int16x4_t, '2', '[LANE as u32; 4]']
- [_laneq_s16, int16x4_t, int16x4_t, int16x8_t, '3', '[LANE as u32; 4]']
- [q_lane_s16, int16x8_t, int16x8_t, int16x4_t, '2', '[LANE as u32; 8]']
- [q_laneq_s16, int16x8_t, int16x8_t, int16x8_t, '3', '[LANE as u32; 8]']
- [_lane_s32, int32x2_t, int32x2_t, int32x2_t, '1', '[LANE as u32; 2]']
- [_laneq_s32, int32x2_t, int32x2_t, int32x4_t, '2', '[LANE as u32; 2]']
- [q_lane_s32, int32x4_t, int32x4_t, int32x2_t, '1', '[LANE as u32; 4]']
- [q_laneq_s32, int32x4_t, int32x4_t, int32x4_t, '2', '[LANE as u32; 4]']
- [_lane_s16, int16x4_t, int16x4_t, int16x4_t, '2']
- [_laneq_s16, int16x4_t, int16x4_t, int16x8_t, '3']
- [q_lane_s16, int16x8_t, int16x8_t, int16x4_t, '2']
- [q_laneq_s16, int16x8_t, int16x8_t, int16x8_t, '3']
- [_lane_s32, int32x2_t, int32x2_t, int32x2_t, '1']
- [_laneq_s32, int32x2_t, int32x2_t, int32x4_t, '2']
- [q_lane_s32, int32x4_t, int32x4_t, int32x2_t, '1']
- [q_laneq_s32, int32x4_t, int32x4_t, int32x4_t, '2']
compose:
- FnCall: [static_assert_uimm_bits!, [LANE, '{type[4]}']]
- Let: [c, "{type[1]}", {FnCall: [simd_shuffle!, [c, c, "{type[5]}"]]}]
- Let: [c, {FnCall: ['vdup{type[0]}', [c], [LANE]]}]
- FnCall: ["vqrdmlsh{neon_type[2].no}", [a, b, c]]
- name: "vqrdmlsh{type[3]}"