Merge pull request #2103 from heiher/vadda

loongarch: Use `intrinsics::simd` for vadda
This commit is contained in:
Sayantan Chakraborty
2026-05-10 05:33:12 +00:00
committed by GitHub
8 changed files with 37 additions and 72 deletions
@@ -91,14 +91,6 @@
fn __lasx_xvsat_wu(a: __v8u32, b: u32) -> __v8u32;
#[link_name = "llvm.loongarch.lasx.xvsat.du"]
fn __lasx_xvsat_du(a: __v4u64, b: u32) -> __v4u64;
#[link_name = "llvm.loongarch.lasx.xvadda.b"]
fn __lasx_xvadda_b(a: __v32i8, b: __v32i8) -> __v32i8;
#[link_name = "llvm.loongarch.lasx.xvadda.h"]
fn __lasx_xvadda_h(a: __v16i16, b: __v16i16) -> __v16i16;
#[link_name = "llvm.loongarch.lasx.xvadda.w"]
fn __lasx_xvadda_w(a: __v8i32, b: __v8i32) -> __v8i32;
#[link_name = "llvm.loongarch.lasx.xvadda.d"]
fn __lasx_xvadda_d(a: __v4i64, b: __v4i64) -> __v4i64;
#[link_name = "llvm.loongarch.lasx.xvavg.b"]
fn __lasx_xvavg_b(a: __v32i8, b: __v32i8) -> __v32i8;
#[link_name = "llvm.loongarch.lasx.xvavg.h"]
@@ -1445,34 +1437,6 @@ pub fn lasx_xvsat_du<const IMM6: u32>(a: m256i) -> m256i {
unsafe { transmute(__lasx_xvsat_du(transmute(a), IMM6)) }
}
#[inline]
#[target_feature(enable = "lasx")]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub fn lasx_xvadda_b(a: m256i, b: m256i) -> m256i {
unsafe { transmute(__lasx_xvadda_b(transmute(a), transmute(b))) }
}
#[inline]
#[target_feature(enable = "lasx")]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub fn lasx_xvadda_h(a: m256i, b: m256i) -> m256i {
unsafe { transmute(__lasx_xvadda_h(transmute(a), transmute(b))) }
}
#[inline]
#[target_feature(enable = "lasx")]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub fn lasx_xvadda_w(a: m256i, b: m256i) -> m256i {
unsafe { transmute(__lasx_xvadda_w(transmute(a), transmute(b))) }
}
#[inline]
#[target_feature(enable = "lasx")]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub fn lasx_xvadda_d(a: m256i, b: m256i) -> m256i {
unsafe { transmute(__lasx_xvadda_d(transmute(a), transmute(b))) }
}
#[inline]
#[target_feature(enable = "lasx")]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
@@ -148,6 +148,10 @@
impl_vvv!("lasx", lasx_xvssub_hu, is::simd_saturating_sub, m256i, u16x16);
impl_vvv!("lasx", lasx_xvssub_wu, is::simd_saturating_sub, m256i, u32x8);
impl_vvv!("lasx", lasx_xvssub_du, is::simd_saturating_sub, m256i, u64x4);
impl_vvv!("lasx", lasx_xvadda_b, ls::simd_adda, m256i, i8x32);
impl_vvv!("lasx", lasx_xvadda_h, ls::simd_adda, m256i, i16x16);
impl_vvv!("lasx", lasx_xvadda_w, ls::simd_adda, m256i, i32x8);
impl_vvv!("lasx", lasx_xvadda_d, ls::simd_adda, m256i, i64x4);
impl_vuv!("lasx", lasx_xvslli_b, is::simd_shl, m256i, i8x32);
impl_vuv!("lasx", lasx_xvslli_h, is::simd_shl, m256i, i16x16);
@@ -91,14 +91,6 @@
fn __lsx_vsat_wu(a: __v4u32, b: u32) -> __v4u32;
#[link_name = "llvm.loongarch.lsx.vsat.du"]
fn __lsx_vsat_du(a: __v2u64, b: u32) -> __v2u64;
#[link_name = "llvm.loongarch.lsx.vadda.b"]
fn __lsx_vadda_b(a: __v16i8, b: __v16i8) -> __v16i8;
#[link_name = "llvm.loongarch.lsx.vadda.h"]
fn __lsx_vadda_h(a: __v8i16, b: __v8i16) -> __v8i16;
#[link_name = "llvm.loongarch.lsx.vadda.w"]
fn __lsx_vadda_w(a: __v4i32, b: __v4i32) -> __v4i32;
#[link_name = "llvm.loongarch.lsx.vadda.d"]
fn __lsx_vadda_d(a: __v2i64, b: __v2i64) -> __v2i64;
#[link_name = "llvm.loongarch.lsx.vavg.b"]
fn __lsx_vavg_b(a: __v16i8, b: __v16i8) -> __v16i8;
#[link_name = "llvm.loongarch.lsx.vavg.h"]
@@ -1357,34 +1349,6 @@ pub fn lsx_vsat_du<const IMM6: u32>(a: m128i) -> m128i {
unsafe { transmute(__lsx_vsat_du(transmute(a), IMM6)) }
}
#[inline]
#[target_feature(enable = "lsx")]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub fn lsx_vadda_b(a: m128i, b: m128i) -> m128i {
unsafe { transmute(__lsx_vadda_b(transmute(a), transmute(b))) }
}
#[inline]
#[target_feature(enable = "lsx")]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub fn lsx_vadda_h(a: m128i, b: m128i) -> m128i {
unsafe { transmute(__lsx_vadda_h(transmute(a), transmute(b))) }
}
#[inline]
#[target_feature(enable = "lsx")]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub fn lsx_vadda_w(a: m128i, b: m128i) -> m128i {
unsafe { transmute(__lsx_vadda_w(transmute(a), transmute(b))) }
}
#[inline]
#[target_feature(enable = "lsx")]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub fn lsx_vadda_d(a: m128i, b: m128i) -> m128i {
unsafe { transmute(__lsx_vadda_d(transmute(a), transmute(b))) }
}
#[inline]
#[target_feature(enable = "lsx")]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
@@ -148,6 +148,10 @@
impl_vvv!("lsx", lsx_vssub_hu, is::simd_saturating_sub, m128i, u16x8);
impl_vvv!("lsx", lsx_vssub_wu, is::simd_saturating_sub, m128i, u32x4);
impl_vvv!("lsx", lsx_vssub_du, is::simd_saturating_sub, m128i, u64x2);
impl_vvv!("lsx", lsx_vadda_b, ls::simd_adda, m128i, i8x16);
impl_vvv!("lsx", lsx_vadda_h, ls::simd_adda, m128i, i16x8);
impl_vvv!("lsx", lsx_vadda_w, ls::simd_adda, m128i, i32x4);
impl_vvv!("lsx", lsx_vadda_d, ls::simd_adda, m128i, i64x2);
impl_vuv!("lsx", lsx_vslli_b, is::simd_shl, m128i, i8x16);
impl_vuv!("lsx", lsx_vslli_h, is::simd_shl, m128i, i16x8);
@@ -45,6 +45,19 @@ unsafe fn splat(v: i64) -> Self {
impl_simd_ext!(u64x2, u64);
impl_simd_ext!(u64x4, u64);
#[inline(always)]
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
pub(crate) const unsafe fn simd_abs<T: Copy + const SimdExt>(a: T) -> T {
let m: T = is::simd_lt(a, ls::simd_splat(0));
is::simd_select(m, is::simd_neg(a), a)
}
#[inline(always)]
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
pub(crate) const unsafe fn simd_adda<T: Copy + const SimdExt>(a: T, b: T) -> T {
is::simd_add(ls::simd_abs(a), ls::simd_abs(b))
}
#[inline(always)]
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
pub(super) const unsafe fn simd_andn<T: Copy + const SimdExt>(a: T, b: T) -> T {
@@ -924,21 +924,25 @@ asm-fmts = xd, xj, ui6
data-types = UV4DI, UV4DI, UQI
/// lasx_xvadda_b
impl = portable
name = lasx_xvadda_b
asm-fmts = xd, xj, xk
data-types = V32QI, V32QI, V32QI
/// lasx_xvadda_h
impl = portable
name = lasx_xvadda_h
asm-fmts = xd, xj, xk
data-types = V16HI, V16HI, V16HI
/// lasx_xvadda_w
impl = portable
name = lasx_xvadda_w
asm-fmts = xd, xj, xk
data-types = V8SI, V8SI, V8SI
/// lasx_xvadda_d
impl = portable
name = lasx_xvadda_d
asm-fmts = xd, xj, xk
data-types = V4DI, V4DI, V4DI
@@ -924,21 +924,25 @@ asm-fmts = vd, vj, ui6
data-types = UV2DI, UV2DI, UQI
/// lsx_vadda_b
impl = portable
name = lsx_vadda_b
asm-fmts = vd, vj, vk
data-types = V16QI, V16QI, V16QI
/// lsx_vadda_h
impl = portable
name = lsx_vadda_h
asm-fmts = vd, vj, vk
data-types = V8HI, V8HI, V8HI
/// lsx_vadda_w
impl = portable
name = lsx_vadda_w
asm-fmts = vd, vj, vk
data-types = V4SI, V4SI, V4SI
/// lsx_vadda_d
impl = portable
name = lsx_vadda_d
asm-fmts = vd, vj, vk
data-types = V2DI, V2DI, V2DI
@@ -219,6 +219,10 @@ lsx_vssub_bu
lsx_vssub_hu
lsx_vssub_wu
lsx_vssub_du
lsx_vadda_b
lsx_vadda_h
lsx_vadda_w
lsx_vadda_d
# LASX intrinsics
lasx_xvsll_b
@@ -435,3 +439,7 @@ lasx_xvssub_bu
lasx_xvssub_hu
lasx_xvssub_wu
lasx_xvssub_du
lasx_xvadda_b
lasx_xvadda_h
lasx_xvadda_w
lasx_xvadda_d