gen-arm: fix vmlal, vmlsl and vmull

This commit is contained in:
sayantn
2026-05-01 01:11:01 +05:30
parent 1964b04a31
commit a023ebf71f
2 changed files with 54 additions and 54 deletions
@@ -14520,7 +14520,7 @@ pub fn vmlaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
pub fn vmlal_high_lane_s16<const LANE: i32>(a: int32x4_t, b: int16x8_t, c: int16x4_t) -> int32x4_t {
static_assert_uimm_bits!(LANE, 2);
unsafe { vmlal_high_s16(a, b, simd_shuffle!(c, c, [LANE as u32; 8])) }
vmlal_high_s16(a, b, vdupq_lane_s16::<LANE>(c))
}
#[doc = "Multiply-add long"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_laneq_s16)"]
@@ -14535,7 +14535,7 @@ pub fn vmlal_high_laneq_s16<const LANE: i32>(
c: int16x8_t,
) -> int32x4_t {
static_assert_uimm_bits!(LANE, 3);
unsafe { vmlal_high_s16(a, b, simd_shuffle!(c, c, [LANE as u32; 8])) }
vmlal_high_s16(a, b, vdupq_laneq_s16::<LANE>(c))
}
#[doc = "Multiply-add long"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_lane_s32)"]
@@ -14546,7 +14546,7 @@ pub fn vmlal_high_laneq_s16<const LANE: i32>(
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
pub fn vmlal_high_lane_s32<const LANE: i32>(a: int64x2_t, b: int32x4_t, c: int32x2_t) -> int64x2_t {
static_assert_uimm_bits!(LANE, 1);
unsafe { vmlal_high_s32(a, b, simd_shuffle!(c, c, [LANE as u32; 4])) }
vmlal_high_s32(a, b, vdupq_lane_s32::<LANE>(c))
}
#[doc = "Multiply-add long"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_laneq_s32)"]
@@ -14561,7 +14561,7 @@ pub fn vmlal_high_laneq_s32<const LANE: i32>(
c: int32x4_t,
) -> int64x2_t {
static_assert_uimm_bits!(LANE, 2);
unsafe { vmlal_high_s32(a, b, simd_shuffle!(c, c, [LANE as u32; 4])) }
vmlal_high_s32(a, b, vdupq_laneq_s32::<LANE>(c))
}
#[doc = "Multiply-add long"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_lane_u16)"]
@@ -14576,7 +14576,7 @@ pub fn vmlal_high_lane_u16<const LANE: i32>(
c: uint16x4_t,
) -> uint32x4_t {
static_assert_uimm_bits!(LANE, 2);
unsafe { vmlal_high_u16(a, b, simd_shuffle!(c, c, [LANE as u32; 8])) }
vmlal_high_u16(a, b, vdupq_lane_u16::<LANE>(c))
}
#[doc = "Multiply-add long"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_laneq_u16)"]
@@ -14591,7 +14591,7 @@ pub fn vmlal_high_laneq_u16<const LANE: i32>(
c: uint16x8_t,
) -> uint32x4_t {
static_assert_uimm_bits!(LANE, 3);
unsafe { vmlal_high_u16(a, b, simd_shuffle!(c, c, [LANE as u32; 8])) }
vmlal_high_u16(a, b, vdupq_laneq_u16::<LANE>(c))
}
#[doc = "Multiply-add long"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_lane_u32)"]
@@ -14606,7 +14606,7 @@ pub fn vmlal_high_lane_u32<const LANE: i32>(
c: uint32x2_t,
) -> uint64x2_t {
static_assert_uimm_bits!(LANE, 1);
unsafe { vmlal_high_u32(a, b, simd_shuffle!(c, c, [LANE as u32; 4])) }
vmlal_high_u32(a, b, vdupq_lane_u32::<LANE>(c))
}
#[doc = "Multiply-add long"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_laneq_u32)"]
@@ -14621,7 +14621,7 @@ pub fn vmlal_high_laneq_u32<const LANE: i32>(
c: uint32x4_t,
) -> uint64x2_t {
static_assert_uimm_bits!(LANE, 2);
unsafe { vmlal_high_u32(a, b, simd_shuffle!(c, c, [LANE as u32; 4])) }
vmlal_high_u32(a, b, vdupq_laneq_u32::<LANE>(c))
}
#[doc = "Multiply-add long"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_n_s16)"]
@@ -14764,7 +14764,7 @@ pub fn vmlsq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
pub fn vmlsl_high_lane_s16<const LANE: i32>(a: int32x4_t, b: int16x8_t, c: int16x4_t) -> int32x4_t {
static_assert_uimm_bits!(LANE, 2);
unsafe { vmlsl_high_s16(a, b, simd_shuffle!(c, c, [LANE as u32; 8])) }
vmlsl_high_s16(a, b, vdupq_lane_s16::<LANE>(c))
}
#[doc = "Multiply-subtract long"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_laneq_s16)"]
@@ -14779,7 +14779,7 @@ pub fn vmlsl_high_laneq_s16<const LANE: i32>(
c: int16x8_t,
) -> int32x4_t {
static_assert_uimm_bits!(LANE, 3);
unsafe { vmlsl_high_s16(a, b, simd_shuffle!(c, c, [LANE as u32; 8])) }
vmlsl_high_s16(a, b, vdupq_laneq_s16::<LANE>(c))
}
#[doc = "Multiply-subtract long"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_lane_s32)"]
@@ -14790,7 +14790,7 @@ pub fn vmlsl_high_laneq_s16<const LANE: i32>(
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
pub fn vmlsl_high_lane_s32<const LANE: i32>(a: int64x2_t, b: int32x4_t, c: int32x2_t) -> int64x2_t {
static_assert_uimm_bits!(LANE, 1);
unsafe { vmlsl_high_s32(a, b, simd_shuffle!(c, c, [LANE as u32; 4])) }
vmlsl_high_s32(a, b, vdupq_lane_s32::<LANE>(c))
}
#[doc = "Multiply-subtract long"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_laneq_s32)"]
@@ -14805,7 +14805,7 @@ pub fn vmlsl_high_laneq_s32<const LANE: i32>(
c: int32x4_t,
) -> int64x2_t {
static_assert_uimm_bits!(LANE, 2);
unsafe { vmlsl_high_s32(a, b, simd_shuffle!(c, c, [LANE as u32; 4])) }
vmlsl_high_s32(a, b, vdupq_laneq_s32::<LANE>(c))
}
#[doc = "Multiply-subtract long"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_lane_u16)"]
@@ -14820,7 +14820,7 @@ pub fn vmlsl_high_lane_u16<const LANE: i32>(
c: uint16x4_t,
) -> uint32x4_t {
static_assert_uimm_bits!(LANE, 2);
unsafe { vmlsl_high_u16(a, b, simd_shuffle!(c, c, [LANE as u32; 8])) }
vmlsl_high_u16(a, b, vdupq_lane_u16::<LANE>(c))
}
#[doc = "Multiply-subtract long"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_laneq_u16)"]
@@ -14835,7 +14835,7 @@ pub fn vmlsl_high_laneq_u16<const LANE: i32>(
c: uint16x8_t,
) -> uint32x4_t {
static_assert_uimm_bits!(LANE, 3);
unsafe { vmlsl_high_u16(a, b, simd_shuffle!(c, c, [LANE as u32; 8])) }
vmlsl_high_u16(a, b, vdupq_laneq_u16::<LANE>(c))
}
#[doc = "Multiply-subtract long"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_lane_u32)"]
@@ -14850,7 +14850,7 @@ pub fn vmlsl_high_lane_u32<const LANE: i32>(
c: uint32x2_t,
) -> uint64x2_t {
static_assert_uimm_bits!(LANE, 1);
unsafe { vmlsl_high_u32(a, b, simd_shuffle!(c, c, [LANE as u32; 4])) }
vmlsl_high_u32(a, b, vdupq_lane_u32::<LANE>(c))
}
#[doc = "Multiply-subtract long"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_laneq_u32)"]
@@ -14865,7 +14865,7 @@ pub fn vmlsl_high_laneq_u32<const LANE: i32>(
c: uint32x4_t,
) -> uint64x2_t {
static_assert_uimm_bits!(LANE, 2);
unsafe { vmlsl_high_u32(a, b, simd_shuffle!(c, c, [LANE as u32; 4])) }
vmlsl_high_u32(a, b, vdupq_laneq_u32::<LANE>(c))
}
#[doc = "Multiply-subtract long"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_n_s16)"]
@@ -15270,7 +15270,7 @@ pub fn vmulh_laneq_f16<const LANE: i32>(a: f16, b: float16x8_t) -> f16 {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
pub fn vmull_high_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int32x4_t {
static_assert_uimm_bits!(LANE, 2);
unsafe { vmull_high_s16(a, simd_shuffle!(b, b, [LANE as u32; 8])) }
vmull_high_s16(a, vdupq_lane_s16::<LANE>(b))
}
#[doc = "Multiply long"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_laneq_s16)"]
@@ -15281,7 +15281,7 @@ pub fn vmull_high_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int32
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
pub fn vmull_high_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int32x4_t {
static_assert_uimm_bits!(LANE, 3);
unsafe { vmull_high_s16(a, simd_shuffle!(b, b, [LANE as u32; 8])) }
vmull_high_s16(a, vdupq_laneq_s16::<LANE>(b))
}
#[doc = "Multiply long"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_lane_s32)"]
@@ -15292,7 +15292,7 @@ pub fn vmull_high_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int3
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
pub fn vmull_high_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int64x2_t {
static_assert_uimm_bits!(LANE, 1);
unsafe { vmull_high_s32(a, simd_shuffle!(b, b, [LANE as u32; 4])) }
vmull_high_s32(a, vdupq_lane_s32::<LANE>(b))
}
#[doc = "Multiply long"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_laneq_s32)"]
@@ -15303,7 +15303,7 @@ pub fn vmull_high_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int64
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
pub fn vmull_high_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int64x2_t {
static_assert_uimm_bits!(LANE, 2);
unsafe { vmull_high_s32(a, simd_shuffle!(b, b, [LANE as u32; 4])) }
vmull_high_s32(a, vdupq_laneq_s32::<LANE>(b))
}
#[doc = "Multiply long"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_lane_u16)"]
@@ -15314,7 +15314,7 @@ pub fn vmull_high_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int6
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
pub fn vmull_high_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x4_t) -> uint32x4_t {
static_assert_uimm_bits!(LANE, 2);
unsafe { vmull_high_u16(a, simd_shuffle!(b, b, [LANE as u32; 8])) }
vmull_high_u16(a, vdupq_lane_u16::<LANE>(b))
}
#[doc = "Multiply long"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_laneq_u16)"]
@@ -15325,7 +15325,7 @@ pub fn vmull_high_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x4_t) -> uin
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
pub fn vmull_high_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
static_assert_uimm_bits!(LANE, 3);
unsafe { vmull_high_u16(a, simd_shuffle!(b, b, [LANE as u32; 8])) }
vmull_high_u16(a, vdupq_laneq_u16::<LANE>(b))
}
#[doc = "Multiply long"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_lane_u32)"]
@@ -15336,7 +15336,7 @@ pub fn vmull_high_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t) -> ui
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
pub fn vmull_high_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x2_t) -> uint64x2_t {
static_assert_uimm_bits!(LANE, 1);
unsafe { vmull_high_u32(a, simd_shuffle!(b, b, [LANE as u32; 4])) }
vmull_high_u32(a, vdupq_lane_u32::<LANE>(b))
}
#[doc = "Multiply long"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_laneq_u32)"]
@@ -15347,7 +15347,7 @@ pub fn vmull_high_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x2_t) -> uin
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
pub fn vmull_high_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
static_assert_uimm_bits!(LANE, 2);
unsafe { vmull_high_u32(a, simd_shuffle!(b, b, [LANE as u32; 4])) }
vmull_high_u32(a, vdupq_laneq_u32::<LANE>(b))
}
#[doc = "Multiply long"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_n_s16)"]
@@ -11419,17 +11419,17 @@ intrinsics:
static_defs: ['const LANE: i32']
safety: safe
types:
- [int32x4_t, int16x8_t, int16x4_t, '2', '[LANE as u32; 8]']
- [int32x4_t, int16x8_t, int16x8_t, '3', '[LANE as u32; 8]']
- [int64x2_t, int32x4_t, int32x2_t, '1', '[LANE as u32; 4]']
- [int64x2_t, int32x4_t, int32x4_t, '2', '[LANE as u32; 4]']
- [int32x4_t, int16x8_t, int16x4_t, '2']
- [int32x4_t, int16x8_t, int16x8_t, '3']
- [int64x2_t, int32x4_t, int32x2_t, '1']
- [int64x2_t, int32x4_t, int32x4_t, '2']
compose:
- FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
- FnCall:
- "vmlsl_high_{neon_type[1]}"
- - a
- b
- FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]
- FnCall: ['vdupq_lane{neon_type[2].no}', [c], [LANE]]
- name: "vmlsl_high_lane{neon_type[2].no}"
doc: "Multiply-subtract long"
@@ -11442,17 +11442,17 @@ intrinsics:
static_defs: ['const LANE: i32']
safety: safe
types:
- [uint32x4_t, uint16x8_t, uint16x4_t, '2', '[LANE as u32; 8]']
- [uint32x4_t, uint16x8_t, uint16x8_t, '3', '[LANE as u32; 8]']
- [uint64x2_t, uint32x4_t, uint32x2_t, '1', '[LANE as u32; 4]']
- [uint64x2_t, uint32x4_t, uint32x4_t, '2', '[LANE as u32; 4]']
- [uint32x4_t, uint16x8_t, uint16x4_t, '2']
- [uint32x4_t, uint16x8_t, uint16x8_t, '3']
- [uint64x2_t, uint32x4_t, uint32x2_t, '1']
- [uint64x2_t, uint32x4_t, uint32x4_t, '2']
compose:
- FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
- FnCall:
- "vmlsl_high_{neon_type[1]}"
- - a
- b
- FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]
- FnCall: ['vdupq_lane{neon_type[2].no}', [c], [LANE]]
- name: "vclt{neon_type[0].no}"
doc: "Floating-point compare less than"
@@ -11830,16 +11830,16 @@ intrinsics:
static_defs: ['const LANE: i32']
safety: safe
types:
- [int16x8_t, int16x4_t, int32x4_t, '2', '[LANE as u32; 8]']
- [int16x8_t, int16x8_t, int32x4_t, '3', '[LANE as u32; 8]']
- [int32x4_t, int32x2_t, int64x2_t, '1', '[LANE as u32; 4]']
- [int32x4_t, int32x4_t, int64x2_t, '2', '[LANE as u32; 4]']
- [int16x8_t, int16x4_t, int32x4_t, '2']
- [int16x8_t, int16x8_t, int32x4_t, '3']
- [int32x4_t, int32x2_t, int64x2_t, '1']
- [int32x4_t, int32x4_t, int64x2_t, '2']
compose:
- FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]]
- FnCall:
- "vmull_high_{neon_type[0]}"
- - a
- FnCall: [simd_shuffle!, [b, b, '{type[4]}']]
- FnCall: ['vdupq_lane{neon_type[1].no}', [b], [LANE]]
- name: "vmull_high_lane{neon_type[1].no}"
doc: "Multiply long"
@@ -11852,16 +11852,16 @@ intrinsics:
static_defs: ['const LANE: i32']
safety: safe
types:
- [uint16x8_t, uint16x4_t, uint32x4_t, '2', '[LANE as u32; 8]']
- [uint16x8_t, uint16x8_t, uint32x4_t, '3', '[LANE as u32; 8]']
- [uint32x4_t, uint32x2_t, uint64x2_t, '1', '[LANE as u32; 4]']
- [uint32x4_t, uint32x4_t, uint64x2_t, '2', '[LANE as u32; 4]']
- [uint16x8_t, uint16x4_t, uint32x4_t, '2']
- [uint16x8_t, uint16x8_t, uint32x4_t, '3']
- [uint32x4_t, uint32x2_t, uint64x2_t, '1']
- [uint32x4_t, uint32x4_t, uint64x2_t, '2']
compose:
- FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]]
- FnCall:
- "vmull_high_{neon_type[0]}"
- - a
- FnCall: [simd_shuffle!, [b, b, '{type[4]}']]
- FnCall: ['vdupq_lane{neon_type[1].no}', [b], [LANE]]
- name: "vrsqrte{neon_type.no}"
doc: "Reciprocal square-root estimate."
@@ -12143,13 +12143,13 @@ intrinsics:
static_defs: ['const LANE: i32']
safety: safe
types:
- [int32x4_t, int16x8_t, int16x4_t, '2', '[LANE as u32; 8]']
- [int32x4_t, int16x8_t, int16x8_t, '3', '[LANE as u32; 8]']
- [int64x2_t, int32x4_t, int32x2_t, '1', '[LANE as u32; 4]']
- [int64x2_t, int32x4_t, int32x4_t, '2', '[LANE as u32; 4]']
- [int32x4_t, int16x8_t, int16x4_t, '2']
- [int32x4_t, int16x8_t, int16x8_t, '3']
- [int64x2_t, int32x4_t, int32x2_t, '1']
- [int64x2_t, int32x4_t, int32x4_t, '2']
compose:
- FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
- FnCall: ['vmlal_high_{neon_type[2]}', [a, b, {FnCall: [simd_shuffle!, [c, c, '{type[4]}']]}]]
- FnCall: ['vmlal_high_{neon_type[2]}', [a, b, {FnCall: ['vdupq_lane{neon_type[2].no}', [c], [LANE]]}]]
- name: "vmlal_high_lane{neon_type[2].no}"
doc: "Multiply-add long"
@@ -12162,13 +12162,13 @@ intrinsics:
static_defs: ['const LANE: i32']
safety: safe
types:
- [uint32x4_t, uint16x8_t, uint16x4_t, '2', '[LANE as u32; 8]']
- [uint32x4_t, uint16x8_t, uint16x8_t, '3', '[LANE as u32; 8]']
- [uint64x2_t, uint32x4_t, uint32x2_t, '1', '[LANE as u32; 4]']
- [uint64x2_t, uint32x4_t, uint32x4_t, '2', '[LANE as u32; 4]']
- [uint32x4_t, uint16x8_t, uint16x4_t, '2']
- [uint32x4_t, uint16x8_t, uint16x8_t, '3']
- [uint64x2_t, uint32x4_t, uint32x2_t, '1']
- [uint64x2_t, uint32x4_t, uint32x4_t, '2']
compose:
- FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
- FnCall: ['vmlal_high_{neon_type[2]}', [a, b, {FnCall: [simd_shuffle!, [c, c, '{type[4]}']]}]]
- FnCall: ['vmlal_high_{neon_type[2]}', [a, b, {FnCall: ['vdupq_lane{neon_type[2].no}', [c], [LANE]]}]]
- name: "vrsrad_n_u64"
doc: "Unsigned rounding shift right and accumulate."