mirror of
https://github.com/rust-lang/rust.git
synced 2026-05-31 05:26:23 +03:00
Rollup merge of #156709 - sayantn:stdarch-sync-2026-05-18, r=sayantn
stdarch subtree update Subtree update of `stdarch` to https://github.com/rust-lang/stdarch/commit/bb24cbdaa541dd1ca7723d2e172a6c43582055f3. Created using https://github.com/rust-lang/josh-sync. r? @ghost
This commit is contained in:
-1
@@ -279,7 +279,6 @@ jobs:
|
||||
- aarch64-unknown-linux-gnu
|
||||
- aarch64_be-unknown-linux-gnu
|
||||
- armv7-unknown-linux-gnueabihf
|
||||
- arm-unknown-linux-gnueabihf
|
||||
- x86_64-unknown-linux-gnu
|
||||
profile: [dev, release]
|
||||
include:
|
||||
|
||||
@@ -1,17 +1,21 @@
|
||||
FROM ubuntu:25.10
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc \
|
||||
g++ \
|
||||
ca-certificates \
|
||||
libc6-dev \
|
||||
gcc-aarch64-linux-gnu \
|
||||
g++-aarch64-linux-gnu \
|
||||
libc6-dev-arm64-cross \
|
||||
qemu-user \
|
||||
make \
|
||||
file \
|
||||
clang \
|
||||
lld
|
||||
xz-utils \
|
||||
wget
|
||||
|
||||
RUN wget https://mirrors.edge.kernel.org/pub/tools/llvm/files/llvm-22.1.4-x86_64.tar.gz -O llvm.tar.xz
|
||||
RUN mkdir llvm
|
||||
RUN tar -xvf llvm.tar.xz --strip-components=1 -C llvm
|
||||
|
||||
ENV PATH="/llvm/bin:$PATH"
|
||||
|
||||
ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc \
|
||||
CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64 -cpu max -L /usr/aarch64-linux-gnu" \
|
||||
|
||||
@@ -2,17 +2,15 @@ FROM ubuntu:25.10
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc \
|
||||
g++ \
|
||||
ca-certificates \
|
||||
libc6-dev \
|
||||
libc6-dev-arm64-cross \
|
||||
qemu-user \
|
||||
make \
|
||||
file \
|
||||
clang \
|
||||
curl \
|
||||
xz-utils \
|
||||
lld
|
||||
wget
|
||||
|
||||
ENV TOOLCHAIN="arm-gnu-toolchain-14.3.rel1-x86_64-aarch64_be-none-linux-gnu"
|
||||
|
||||
@@ -21,6 +19,12 @@ RUN curl -L "https://developer.arm.com/-/media/Files/downloads/gnu/14.3.rel1/bin
|
||||
RUN tar -xvf "${TOOLCHAIN}.tar.xz"
|
||||
RUN mkdir /toolchains && mv "./${TOOLCHAIN}" /toolchains
|
||||
|
||||
RUN wget https://mirrors.edge.kernel.org/pub/tools/llvm/files/llvm-22.1.4-x86_64.tar.gz -O llvm.tar.xz
|
||||
RUN mkdir llvm
|
||||
RUN tar -xvf llvm.tar.xz --strip-components=1 -C llvm
|
||||
|
||||
ENV PATH="/llvm/bin:$PATH"
|
||||
|
||||
ENV AARCH64_BE_TOOLCHAIN="/toolchains/${TOOLCHAIN}"
|
||||
ENV AARCH64_BE_LIBC="${AARCH64_BE_TOOLCHAIN}/aarch64_be-none-linux-gnu/libc"
|
||||
|
||||
|
||||
@@ -7,7 +7,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
libc6-dev-armhf-cross \
|
||||
qemu-user \
|
||||
make \
|
||||
file
|
||||
file \
|
||||
clang \
|
||||
lld
|
||||
ENV CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \
|
||||
CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -cpu max -L /usr/arm-linux-gnueabihf" \
|
||||
OBJDUMP=arm-linux-gnueabihf-objdump
|
||||
|
||||
@@ -1,17 +1,21 @@
|
||||
FROM ubuntu:24.04
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc \
|
||||
g++ \
|
||||
ca-certificates \
|
||||
libc6-dev \
|
||||
gcc-arm-linux-gnueabihf \
|
||||
g++-arm-linux-gnueabihf \
|
||||
libc6-dev-armhf-cross \
|
||||
qemu-user \
|
||||
make \
|
||||
file \
|
||||
clang \
|
||||
lld
|
||||
wget
|
||||
|
||||
RUN wget https://mirrors.edge.kernel.org/pub/tools/llvm/files/llvm-22.1.4-x86_64.tar.gz -O llvm.tar.xz
|
||||
RUN mkdir llvm
|
||||
RUN tar -xvf llvm.tar.xz --strip-components=1 -C llvm
|
||||
|
||||
ENV PATH="/llvm/bin:$PATH"
|
||||
|
||||
ENV CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \
|
||||
CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -cpu max -L /usr/arm-linux-gnueabihf" \
|
||||
OBJDUMP=arm-linux-gnueabihf-objdump
|
||||
|
||||
@@ -6,15 +6,18 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
make \
|
||||
ca-certificates \
|
||||
wget \
|
||||
xz-utils \
|
||||
clang \
|
||||
libstdc++-14-dev \
|
||||
build-essential \
|
||||
lld
|
||||
xz-utils
|
||||
|
||||
RUN wget http://ci-mirrors.rust-lang.org/sde-external-10.8.0-2026-03-15-lin.tar.xz -O sde.tar.xz
|
||||
RUN mkdir intel-sde
|
||||
RUN tar -xJf sde.tar.xz --strip-components=1 -C intel-sde
|
||||
|
||||
RUN wget https://mirrors.edge.kernel.org/pub/tools/llvm/files/llvm-22.1.4-x86_64.tar.gz -O llvm.tar.xz
|
||||
RUN mkdir llvm
|
||||
RUN tar -xvf llvm.tar.xz --strip-components=1 -C llvm
|
||||
|
||||
ENV PATH="/llvm/bin:$PATH"
|
||||
|
||||
ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="/intel-sde/sde64 \
|
||||
-cpuid-in /checkout/ci/docker/x86_64-unknown-linux-gnu/cpuid.def \
|
||||
-rtm-mode full -tsx --"
|
||||
|
||||
@@ -48,7 +48,7 @@ run() {
|
||||
--workdir /checkout \
|
||||
--privileged \
|
||||
stdarch \
|
||||
sh -c "HOME=/tmp PATH=\$PATH:/rust/bin exec ci/intrinsic-test.sh ${1}"
|
||||
sh -c "HOME=/tmp PATH=\$PATH:/rust/bin exec ci/intrinsic-test.sh"
|
||||
}
|
||||
|
||||
if [ -z "$1" ]; then
|
||||
|
||||
@@ -5,127 +5,56 @@ set -ex
|
||||
: "${TARGET?The TARGET environment variable must be set.}"
|
||||
|
||||
export RUSTFLAGS="${RUSTFLAGS} -D warnings -Z merge-functions=disabled -Z verify-llvm-ir"
|
||||
export HOST_RUSTFLAGS="${RUSTFLAGS}"
|
||||
export PROFILE="${PROFILE:="release"}"
|
||||
|
||||
case ${TARGET} in
|
||||
# On 32-bit use a static relocation model which avoids some extra
|
||||
# instructions when dealing with static data, notably allowing some
|
||||
# instruction assertion checks to pass below the 20 instruction limit. If
|
||||
# this is the default, dynamic, then too many instructions are generated
|
||||
# when we assert the instruction for a function and it causes tests to fail.
|
||||
i686-* | i586-*)
|
||||
export RUSTFLAGS="${RUSTFLAGS} -C relocation-model=static"
|
||||
;;
|
||||
# Some x86_64 targets enable by default more features beyond SSE2,
|
||||
# which cause some instruction assertion checks to fail.
|
||||
x86_64-*)
|
||||
export RUSTFLAGS="${RUSTFLAGS} -C target-feature=-sse3"
|
||||
;;
|
||||
#Unoptimized build uses fast-isel which breaks with msa
|
||||
mips-* | mipsel-*)
|
||||
export RUSTFLAGS="${RUSTFLAGS} -C llvm-args=-fast-isel=false"
|
||||
;;
|
||||
armv7-*eabihf | thumbv7-*eabihf)
|
||||
export RUSTFLAGS="${RUSTFLAGS} -Ctarget-feature=+neon"
|
||||
;;
|
||||
# Some of our test dependencies use the deprecated `gcc` crates which
|
||||
# doesn't detect RISC-V compilers automatically, so do it manually here.
|
||||
riscv*)
|
||||
export RUSTFLAGS="${RUSTFLAGS} -Ctarget-feature=+zk,+zks,+zbb,+zbc"
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "RUSTFLAGS=${RUSTFLAGS}"
|
||||
echo "OBJDUMP=${OBJDUMP}"
|
||||
echo "PROFILE=${PROFILE}"
|
||||
|
||||
INTRINSIC_TEST="--manifest-path=crates/intrinsic-test/Cargo.toml"
|
||||
|
||||
# Test targets compiled with extra features.
|
||||
export CC="clang"
|
||||
|
||||
case ${TARGET} in
|
||||
# Setup aarch64 & armv7 specific variables, the runner, along with some
|
||||
# tests to skip
|
||||
aarch64-unknown-linux-gnu*)
|
||||
TEST_CPPFLAGS="-fuse-ld=lld -I/usr/aarch64-linux-gnu/include/ -I/usr/aarch64-linux-gnu/include/c++/9/aarch64-linux-gnu/"
|
||||
TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64.txt
|
||||
TEST_CXX_COMPILER="clang++"
|
||||
TEST_RUNNER="${CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER}"
|
||||
: "${TEST_SAMPLE_INTRINSICS_PERCENTAGE:=100}"
|
||||
;;
|
||||
|
||||
aarch64_be-unknown-linux-gnu*)
|
||||
TEST_CPPFLAGS="-fuse-ld=lld"
|
||||
aarch64_be*)
|
||||
export CFLAGS="-I${AARCH64_BE_TOOLCHAIN}/aarch64_be-none-linux-gnu/libc/usr/include --sysroot={AARCH64_BE_TOOLCHAIN}/aarch64_be-none-linux-gnu/libc -Wno-nonportable-vector-initialization"
|
||||
TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64_be.txt
|
||||
TEST_CXX_COMPILER="clang++"
|
||||
TEST_RUNNER="${CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_RUNNER}"
|
||||
: "${TEST_SAMPLE_INTRINSICS_PERCENTAGE:=100}"
|
||||
;;
|
||||
|
||||
armv7-unknown-linux-gnueabihf*)
|
||||
TEST_CPPFLAGS="-fuse-ld=lld -I/usr/arm-linux-gnueabihf/include/ -I/usr/arm-linux-gnueabihf/include/c++/9/arm-linux-gnueabihf/"
|
||||
aarch64*)
|
||||
export CFLAGS="-I/usr/aarch64-linux-gnu/include/"
|
||||
TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64.txt
|
||||
;;
|
||||
|
||||
armv7*)
|
||||
export CFLAGS="-I/usr/arm-linux-gnueabihf/include/"
|
||||
TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_arm.txt
|
||||
TEST_CXX_COMPILER="clang++"
|
||||
TEST_RUNNER="${CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER}"
|
||||
: "${TEST_SAMPLE_INTRINSICS_PERCENTAGE:=100}"
|
||||
;;
|
||||
|
||||
x86_64-unknown-linux-gnu*)
|
||||
TEST_CPPFLAGS="-fuse-ld=lld -I/usr/include/x86_64-linux-gnu/"
|
||||
TEST_CXX_COMPILER="clang++"
|
||||
TEST_RUNNER="${CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER}"
|
||||
x86_64*)
|
||||
export CFLAGS="-I/usr/include/x86_64-linux-gnu/"
|
||||
TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_x86.txt
|
||||
: "${TEST_SAMPLE_INTRINSICS_PERCENTAGE:=20}"
|
||||
;;
|
||||
*)
|
||||
;;
|
||||
|
||||
esac
|
||||
|
||||
# Arm specific
|
||||
case "${TARGET}" in
|
||||
aarch64-unknown-linux-gnu*|armv7-unknown-linux-gnueabihf*)
|
||||
CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" RUST_LOG=warn \
|
||||
cargo run "${INTRINSIC_TEST}" --release \
|
||||
--bin intrinsic-test -- intrinsics_data/arm_intrinsics.json \
|
||||
--runner "${TEST_RUNNER}" \
|
||||
--cppcompiler "${TEST_CXX_COMPILER}" \
|
||||
--skip "${TEST_SKIP_INTRINSICS}" \
|
||||
--target "${TARGET}" \
|
||||
--profile "${PROFILE}" \
|
||||
--sample-percentage "${TEST_SAMPLE_INTRINSICS_PERCENTAGE}"
|
||||
;;
|
||||
|
||||
aarch64_be-unknown-linux-gnu*)
|
||||
CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" RUST_LOG=warn \
|
||||
cargo run "${INTRINSIC_TEST}" --release \
|
||||
--bin intrinsic-test -- intrinsics_data/arm_intrinsics.json \
|
||||
--runner "${TEST_RUNNER}" \
|
||||
--cppcompiler "${TEST_CXX_COMPILER}" \
|
||||
--skip "${TEST_SKIP_INTRINSICS}" \
|
||||
--target "${TARGET}" \
|
||||
--profile "${PROFILE}" \
|
||||
--linker "${CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_LINKER}" \
|
||||
--cxx-toolchain-dir "${AARCH64_BE_TOOLCHAIN}" \
|
||||
--sample-percentage "${TEST_SAMPLE_INTRINSICS_PERCENTAGE}"
|
||||
;;
|
||||
|
||||
x86_64-unknown-linux-gnu*)
|
||||
# `CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER` is not necessary for `intrinsic-test`
|
||||
# because the binary needs to run directly on the host.
|
||||
# Hence the use of `env -u`.
|
||||
env -u CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER \
|
||||
CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" \
|
||||
RUST_LOG=warn RUST_BACKTRACE=1 \
|
||||
cargo run "${INTRINSIC_TEST}" --release \
|
||||
--bin intrinsic-test -- intrinsics_data/x86-intel.xml \
|
||||
--runner "${TEST_RUNNER}" \
|
||||
--skip "${TEST_SKIP_INTRINSICS}" \
|
||||
--cppcompiler "${TEST_CXX_COMPILER}" \
|
||||
--target "${TARGET}" \
|
||||
--profile "${PROFILE}" \
|
||||
--sample-percentage "${TEST_SAMPLE_INTRINSICS_PERCENTAGE}"
|
||||
--target "${TARGET}"
|
||||
|
||||
echo "${CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER}"
|
||||
;;
|
||||
*)
|
||||
*)
|
||||
cargo run "${INTRINSIC_TEST}" --release \
|
||||
--bin intrinsic-test -- intrinsics_data/arm_intrinsics.json \
|
||||
--skip "${TEST_SKIP_INTRINSICS}" \
|
||||
--target "${TARGET}"
|
||||
;;
|
||||
esac
|
||||
|
||||
cargo test --manifest-path=rust_programs/Cargo.toml --target "${TARGET}" --profile "${PROFILE}"
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -12,7 +12,6 @@
|
||||
|
||||
use crate::{
|
||||
core_arch::{arm_shared::*, simd::*},
|
||||
hint::unreachable_unchecked,
|
||||
intrinsics::{simd::*, *},
|
||||
mem::transmute,
|
||||
};
|
||||
@@ -94,117 +93,6 @@ macro_rules! shift_right_and_insert {
|
||||
|
||||
pub(crate) use shift_right_and_insert;
|
||||
|
||||
/// Duplicate vector element to vector or scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(nop, N1 = 0, N2 = 0))]
|
||||
#[rustc_legacy_const_generics(1, 3)]
|
||||
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
|
||||
pub fn vcopy_lane_s64<const N1: i32, const N2: i32>(_a: int64x1_t, b: int64x1_t) -> int64x1_t {
|
||||
static_assert!(N1 == 0);
|
||||
static_assert!(N2 == 0);
|
||||
b
|
||||
}
|
||||
|
||||
/// Duplicate vector element to vector or scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(nop, N1 = 0, N2 = 0))]
|
||||
#[rustc_legacy_const_generics(1, 3)]
|
||||
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
|
||||
pub fn vcopy_lane_u64<const N1: i32, const N2: i32>(_a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
|
||||
static_assert!(N1 == 0);
|
||||
static_assert!(N2 == 0);
|
||||
b
|
||||
}
|
||||
|
||||
/// Duplicate vector element to vector or scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(nop, N1 = 0, N2 = 0))]
|
||||
#[rustc_legacy_const_generics(1, 3)]
|
||||
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
|
||||
pub fn vcopy_lane_p64<const N1: i32, const N2: i32>(_a: poly64x1_t, b: poly64x1_t) -> poly64x1_t {
|
||||
static_assert!(N1 == 0);
|
||||
static_assert!(N2 == 0);
|
||||
b
|
||||
}
|
||||
|
||||
/// Duplicate vector element to vector or scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(nop, N1 = 0, N2 = 0))]
|
||||
#[rustc_legacy_const_generics(1, 3)]
|
||||
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
|
||||
pub fn vcopy_lane_f64<const N1: i32, const N2: i32>(
|
||||
_a: float64x1_t,
|
||||
b: float64x1_t,
|
||||
) -> float64x1_t {
|
||||
static_assert!(N1 == 0);
|
||||
static_assert!(N2 == 0);
|
||||
b
|
||||
}
|
||||
|
||||
/// Duplicate vector element to vector or scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 1))]
|
||||
#[rustc_legacy_const_generics(1, 3)]
|
||||
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
|
||||
pub fn vcopy_laneq_s64<const LANE1: i32, const LANE2: i32>(
|
||||
_a: int64x1_t,
|
||||
b: int64x2_t,
|
||||
) -> int64x1_t {
|
||||
static_assert!(LANE1 == 0);
|
||||
static_assert_uimm_bits!(LANE2, 1);
|
||||
unsafe { transmute::<i64, _>(simd_extract!(b, LANE2 as u32)) }
|
||||
}
|
||||
|
||||
/// Duplicate vector element to vector or scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 1))]
|
||||
#[rustc_legacy_const_generics(1, 3)]
|
||||
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
|
||||
pub fn vcopy_laneq_u64<const LANE1: i32, const LANE2: i32>(
|
||||
_a: uint64x1_t,
|
||||
b: uint64x2_t,
|
||||
) -> uint64x1_t {
|
||||
static_assert!(LANE1 == 0);
|
||||
static_assert_uimm_bits!(LANE2, 1);
|
||||
unsafe { transmute::<u64, _>(simd_extract!(b, LANE2 as u32)) }
|
||||
}
|
||||
|
||||
/// Duplicate vector element to vector or scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 1))]
|
||||
#[rustc_legacy_const_generics(1, 3)]
|
||||
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
|
||||
pub fn vcopy_laneq_p64<const LANE1: i32, const LANE2: i32>(
|
||||
_a: poly64x1_t,
|
||||
b: poly64x2_t,
|
||||
) -> poly64x1_t {
|
||||
static_assert!(LANE1 == 0);
|
||||
static_assert_uimm_bits!(LANE2, 1);
|
||||
unsafe { transmute::<u64, _>(simd_extract!(b, LANE2 as u32)) }
|
||||
}
|
||||
|
||||
/// Duplicate vector element to vector or scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 1))]
|
||||
#[rustc_legacy_const_generics(1, 3)]
|
||||
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
|
||||
pub fn vcopy_laneq_f64<const LANE1: i32, const LANE2: i32>(
|
||||
_a: float64x1_t,
|
||||
b: float64x2_t,
|
||||
) -> float64x1_t {
|
||||
static_assert!(LANE1 == 0);
|
||||
static_assert_uimm_bits!(LANE2, 1);
|
||||
unsafe { transmute::<f64, _>(simd_extract!(b, LANE2 as u32)) }
|
||||
}
|
||||
|
||||
/// Load multiple single-element structures to one, two, three, or four registers
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
@@ -443,42 +331,6 @@ pub fn vmovq_n_f64(value: f64) -> float64x2_t {
|
||||
vdupq_n_f64(value)
|
||||
}
|
||||
|
||||
/// Duplicate vector element to vector or scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(nop))]
|
||||
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
|
||||
pub fn vget_high_f64(a: float64x2_t) -> float64x1_t {
|
||||
unsafe { float64x1_t([simd_extract!(a, 1)]) }
|
||||
}
|
||||
|
||||
/// Duplicate vector element to vector or scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ext))]
|
||||
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
|
||||
pub fn vget_high_p64(a: poly64x2_t) -> poly64x1_t {
|
||||
unsafe { transmute(u64x1::new(simd_extract!(a, 1))) }
|
||||
}
|
||||
|
||||
/// Duplicate vector element to vector or scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(nop))]
|
||||
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
|
||||
pub fn vget_low_f64(a: float64x2_t) -> float64x1_t {
|
||||
unsafe { float64x1_t([simd_extract!(a, 0)]) }
|
||||
}
|
||||
|
||||
/// Duplicate vector element to vector or scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(nop))]
|
||||
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
|
||||
pub fn vget_low_p64(a: poly64x2_t) -> poly64x1_t {
|
||||
unsafe { transmute(u64x1::new(simd_extract!(a, 0))) }
|
||||
}
|
||||
|
||||
/// Duplicate vector element to vector or scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
@@ -493,29 +345,6 @@ pub fn vget_lane_f64<const IMM5: i32>(v: float64x1_t) -> f64 {
|
||||
unsafe { simd_extract!(v, IMM5 as u32) }
|
||||
}
|
||||
|
||||
/// Duplicate vector element to vector or scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
|
||||
#[cfg_attr(
|
||||
all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
|
||||
assert_instr(nop, IMM5 = 0)
|
||||
)]
|
||||
pub fn vgetq_lane_f64<const IMM5: i32>(v: float64x2_t) -> f64 {
|
||||
static_assert_uimm_bits!(IMM5, 1);
|
||||
unsafe { simd_extract!(v, IMM5 as u32) }
|
||||
}
|
||||
|
||||
/// Vector combine
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(mov))]
|
||||
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
|
||||
pub fn vcombine_f64(low: float64x1_t, high: float64x1_t) -> float64x2_t {
|
||||
unsafe { simd_shuffle!(low, high, [0, 1]) }
|
||||
}
|
||||
|
||||
/// Shift left
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
@@ -781,38 +610,6 @@ fn test_vmovq_n_f64() {
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vget_high_f64() {
|
||||
let a = f64x2::new(1.0, 2.0);
|
||||
let e = f64x1::new(2.0);
|
||||
let r = f64x1::from(vget_high_f64(a.into()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vget_high_p64() {
|
||||
let a = u64x2::new(1, 2);
|
||||
let e = u64x1::new(2);
|
||||
let r = u64x1::from(vget_high_p64(a.into()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vget_low_f64() {
|
||||
let a = f64x2::new(1.0, 2.0);
|
||||
let e = f64x1::new(1.0);
|
||||
let r = f64x1::from(vget_low_f64(a.into()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vget_low_p64() {
|
||||
let a = u64x2::new(1, 2);
|
||||
let e = u64x1::new(1);
|
||||
let r = u64x1::from(vget_low_p64(a.into()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vget_lane_f64() {
|
||||
let v = f64x1::new(1.0);
|
||||
@@ -820,15 +617,6 @@ fn test_vget_lane_f64() {
|
||||
assert_eq!(r, 1.0);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vgetq_lane_f64() {
|
||||
let v = f64x2::new(0.0, 1.0);
|
||||
let r = vgetq_lane_f64::<1>(v.into());
|
||||
assert_eq!(r, 1.0);
|
||||
let r = vgetq_lane_f64::<0>(v.into());
|
||||
assert_eq!(r, 0.0);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vcopy_lane_s64() {
|
||||
let a = i64x1::new(1);
|
||||
@@ -865,42 +653,6 @@ fn test_vcopy_lane_f64() {
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vcopy_laneq_s64() {
|
||||
let a = i64x1::new(1);
|
||||
let b = i64x2::new(0, 0x7F_FF_FF_FF_FF_FF_FF_FF);
|
||||
let e = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
|
||||
let r = i64x1::from(vcopy_laneq_s64::<0, 1>(a.into(), b.into()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vcopy_laneq_u64() {
|
||||
let a = u64x1::new(1);
|
||||
let b = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF);
|
||||
let e = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
|
||||
let r = u64x1::from(vcopy_laneq_u64::<0, 1>(a.into(), b.into()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vcopy_laneq_p64() {
|
||||
let a = u64x1::new(1);
|
||||
let b = u64x2::new(0, 0x7F_FF_FF_FF_FF_FF_FF_FF);
|
||||
let e = u64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
|
||||
let r = u64x1::from(vcopy_laneq_p64::<0, 1>(a.into(), b.into()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vcopy_laneq_f64() {
|
||||
let a = f64x1::from_array([1.]);
|
||||
let b = f64x2::from_array([0., 0.5]);
|
||||
let e = f64x1::from_array([0.5]);
|
||||
let r = f64x1::from(vcopy_laneq_f64::<0, 1>(a.into(), b.into()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vbsl_f64() {
|
||||
let a = u64x1::new(0x8000000000000000);
|
||||
@@ -1037,7 +789,7 @@ macro_rules! wide_store_load_roundtrip {
|
||||
macro_rules! wide_store_load_roundtrip_fp16 {
|
||||
($( $name:ident $args:tt);* $(;)?) => {
|
||||
$(
|
||||
#[cfg_attr(miri, ignore)]
|
||||
#[cfg_attr(miri, ignore)] // uses unsupported vendor intrinsics
|
||||
#[simd_test(enable = "neon,fp16")]
|
||||
#[cfg(not(target_arch = "arm64ec"))]
|
||||
unsafe fn $name() {
|
||||
@@ -1308,7 +1060,7 @@ macro_rules! lane_wide_store_load_roundtrip {
|
||||
macro_rules! lane_wide_store_load_roundtrip_neon {
|
||||
($( $name:ident $args:tt);* $(;)?) => {
|
||||
$(
|
||||
#[cfg_attr(miri, ignore)]
|
||||
#[cfg_attr(miri, ignore)] // uses unsupported vendor intrinsics
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn $name() {
|
||||
lane_wide_store_load_roundtrip! $args;
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -7,7 +7,7 @@
|
||||
#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
|
||||
pub use self::generated::*;
|
||||
|
||||
use crate::{core_arch::simd::*, hint::unreachable_unchecked, intrinsics::simd::*, mem::transmute};
|
||||
use crate::{core_arch::simd::*, intrinsics::simd::*, mem::transmute};
|
||||
#[cfg(test)]
|
||||
use stdarch_test::assert_instr;
|
||||
|
||||
@@ -1663,235 +1663,6 @@ fn test_vld1q_dup_f32() {
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vget_lane_u8() {
|
||||
let v = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let r = vget_lane_u8::<1>(v.into());
|
||||
assert_eq!(r, 2);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vgetq_lane_u32() {
|
||||
let v = u32x4::new(1, 2, 3, 4);
|
||||
let r = vgetq_lane_u32::<1>(v.into());
|
||||
assert_eq!(r, 2);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vgetq_lane_s32() {
|
||||
let v = i32x4::new(1, 2, 3, 4);
|
||||
let r = vgetq_lane_s32::<1>(v.into());
|
||||
assert_eq!(r, 2);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vget_lane_u64() {
|
||||
let v = u64x1::new(1);
|
||||
let r = vget_lane_u64::<0>(v.into());
|
||||
assert_eq!(r, 1);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vgetq_lane_u16() {
|
||||
let v = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let r = vgetq_lane_u16::<1>(v.into());
|
||||
assert_eq!(r, 2);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vget_lane_s8() {
|
||||
let v = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
|
||||
let r = vget_lane_s8::<2>(v.into());
|
||||
assert_eq!(r, 2);
|
||||
let r = vget_lane_s8::<4>(v.into());
|
||||
assert_eq!(r, 4);
|
||||
let r = vget_lane_s8::<5>(v.into());
|
||||
assert_eq!(r, 5);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vget_lane_p8() {
|
||||
let v = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
|
||||
let r = vget_lane_p8::<2>(v.into());
|
||||
assert_eq!(r, 2);
|
||||
let r = vget_lane_p8::<3>(v.into());
|
||||
assert_eq!(r, 3);
|
||||
let r = vget_lane_p8::<5>(v.into());
|
||||
assert_eq!(r, 5);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vget_lane_p16() {
|
||||
let v = u16x4::new(0, 1, 2, 3);
|
||||
let r = vget_lane_p16::<2>(v.into());
|
||||
assert_eq!(r, 2);
|
||||
let r = vget_lane_p16::<3>(v.into());
|
||||
assert_eq!(r, 3);
|
||||
let r = vget_lane_p16::<0>(v.into());
|
||||
assert_eq!(r, 0);
|
||||
let r = vget_lane_p16::<1>(v.into());
|
||||
assert_eq!(r, 1);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vget_lane_s16() {
|
||||
let v = i16x4::new(0, 1, 2, 3);
|
||||
let r = vget_lane_s16::<2>(v.into());
|
||||
assert_eq!(r, 2);
|
||||
let r = vget_lane_s16::<3>(v.into());
|
||||
assert_eq!(r, 3);
|
||||
let r = vget_lane_s16::<0>(v.into());
|
||||
assert_eq!(r, 0);
|
||||
let r = vget_lane_s16::<1>(v.into());
|
||||
assert_eq!(r, 1);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vget_lane_u16() {
|
||||
let v = u16x4::new(0, 1, 2, 3);
|
||||
let r = vget_lane_u16::<2>(v.into());
|
||||
assert_eq!(r, 2);
|
||||
let r = vget_lane_u16::<3>(v.into());
|
||||
assert_eq!(r, 3);
|
||||
let r = vget_lane_u16::<0>(v.into());
|
||||
assert_eq!(r, 0);
|
||||
let r = vget_lane_u16::<1>(v.into());
|
||||
assert_eq!(r, 1);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vget_lane_f32() {
|
||||
let v = f32x2::new(0.0, 1.0);
|
||||
let r = vget_lane_f32::<1>(v.into());
|
||||
assert_eq!(r, 1.0);
|
||||
let r = vget_lane_f32::<0>(v.into());
|
||||
assert_eq!(r, 0.0);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vget_lane_s32() {
|
||||
let v = i32x2::new(0, 1);
|
||||
let r = vget_lane_s32::<1>(v.into());
|
||||
assert_eq!(r, 1);
|
||||
let r = vget_lane_s32::<0>(v.into());
|
||||
assert_eq!(r, 0);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vget_lane_u32() {
|
||||
let v = u32x2::new(0, 1);
|
||||
let r = vget_lane_u32::<1>(v.into());
|
||||
assert_eq!(r, 1);
|
||||
let r = vget_lane_u32::<0>(v.into());
|
||||
assert_eq!(r, 0);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vget_lane_s64() {
|
||||
let v = i64x1::new(1);
|
||||
let r = vget_lane_s64::<0>(v.into());
|
||||
assert_eq!(r, 1);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vget_lane_p64() {
|
||||
let v = u64x1::new(1);
|
||||
let r = vget_lane_p64::<0>(v.into());
|
||||
assert_eq!(r, 1);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vgetq_lane_s8() {
|
||||
let v = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
let r = vgetq_lane_s8::<7>(v.into());
|
||||
assert_eq!(r, 7);
|
||||
let r = vgetq_lane_s8::<13>(v.into());
|
||||
assert_eq!(r, 13);
|
||||
let r = vgetq_lane_s8::<3>(v.into());
|
||||
assert_eq!(r, 3);
|
||||
let r = vgetq_lane_s8::<0>(v.into());
|
||||
assert_eq!(r, 0);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vgetq_lane_p8() {
|
||||
let v = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
let r = vgetq_lane_p8::<7>(v.into());
|
||||
assert_eq!(r, 7);
|
||||
let r = vgetq_lane_p8::<13>(v.into());
|
||||
assert_eq!(r, 13);
|
||||
let r = vgetq_lane_p8::<3>(v.into());
|
||||
assert_eq!(r, 3);
|
||||
let r = vgetq_lane_p8::<0>(v.into());
|
||||
assert_eq!(r, 0);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vgetq_lane_u8() {
|
||||
let v = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
let r = vgetq_lane_u8::<7>(v.into());
|
||||
assert_eq!(r, 7);
|
||||
let r = vgetq_lane_u8::<13>(v.into());
|
||||
assert_eq!(r, 13);
|
||||
let r = vgetq_lane_u8::<3>(v.into());
|
||||
assert_eq!(r, 3);
|
||||
let r = vgetq_lane_u8::<0>(v.into());
|
||||
assert_eq!(r, 0);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vgetq_lane_s16() {
|
||||
let v = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
|
||||
let r = vgetq_lane_s16::<3>(v.into());
|
||||
assert_eq!(r, 3);
|
||||
let r = vgetq_lane_s16::<6>(v.into());
|
||||
assert_eq!(r, 6);
|
||||
let r = vgetq_lane_s16::<0>(v.into());
|
||||
assert_eq!(r, 0);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vgetq_lane_p16() {
|
||||
let v = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
|
||||
let r = vgetq_lane_p16::<3>(v.into());
|
||||
assert_eq!(r, 3);
|
||||
let r = vgetq_lane_p16::<7>(v.into());
|
||||
assert_eq!(r, 7);
|
||||
let r = vgetq_lane_p16::<1>(v.into());
|
||||
assert_eq!(r, 1);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vgetq_lane_f32() {
|
||||
let v = f32x4::new(0.0, 1.0, 2.0, 3.0);
|
||||
let r = vgetq_lane_f32::<3>(v.into());
|
||||
assert_eq!(r, 3.0);
|
||||
let r = vgetq_lane_f32::<0>(v.into());
|
||||
assert_eq!(r, 0.0);
|
||||
let r = vgetq_lane_f32::<2>(v.into());
|
||||
assert_eq!(r, 2.0);
|
||||
let r = vgetq_lane_f32::<1>(v.into());
|
||||
assert_eq!(r, 1.0);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vgetq_lane_s64() {
|
||||
let v = i64x2::new(0, 1);
|
||||
let r = vgetq_lane_s64::<1>(v.into());
|
||||
assert_eq!(r, 1);
|
||||
let r = vgetq_lane_s64::<0>(v.into());
|
||||
assert_eq!(r, 0);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vgetq_lane_p64() {
|
||||
let v = u64x2::new(0, 1);
|
||||
let r = vgetq_lane_p64::<1>(v.into());
|
||||
assert_eq!(r, 1);
|
||||
let r = vgetq_lane_p64::<0>(v.into());
|
||||
assert_eq!(r, 0);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vext_s64() {
|
||||
let a: i64x1 = i64x1::new(0);
|
||||
@@ -1910,182 +1681,6 @@ fn test_vext_u64() {
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vget_high_s8() {
|
||||
let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
|
||||
let e = i8x8::new(9, 10, 11, 12, 13, 14, 15, 16);
|
||||
let r = i8x8::from(vget_high_s8(a.into()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vget_high_s16() {
|
||||
let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let e = i16x4::new(5, 6, 7, 8);
|
||||
let r = i16x4::from(vget_high_s16(a.into()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vget_high_s32() {
|
||||
let a = i32x4::new(1, 2, 3, 4);
|
||||
let e = i32x2::new(3, 4);
|
||||
let r = i32x2::from(vget_high_s32(a.into()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vget_high_s64() {
|
||||
let a = i64x2::new(1, 2);
|
||||
let e = i64x1::new(2);
|
||||
let r = i64x1::from(vget_high_s64(a.into()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vget_high_u8() {
|
||||
let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
|
||||
let e = u8x8::new(9, 10, 11, 12, 13, 14, 15, 16);
|
||||
let r = u8x8::from(vget_high_u8(a.into()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vget_high_u16() {
|
||||
let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let e = u16x4::new(5, 6, 7, 8);
|
||||
let r = u16x4::from(vget_high_u16(a.into()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vget_high_u32() {
|
||||
let a = u32x4::new(1, 2, 3, 4);
|
||||
let e = u32x2::new(3, 4);
|
||||
let r = u32x2::from(vget_high_u32(a.into()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vget_high_u64() {
|
||||
let a = u64x2::new(1, 2);
|
||||
let e = u64x1::new(2);
|
||||
let r = u64x1::from(vget_high_u64(a.into()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vget_high_p8() {
|
||||
let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
|
||||
let e = u8x8::new(9, 10, 11, 12, 13, 14, 15, 16);
|
||||
let r = u8x8::from(vget_high_p8(a.into()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vget_high_p16() {
|
||||
let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let e = u16x4::new(5, 6, 7, 8);
|
||||
let r = u16x4::from(vget_high_p16(a.into()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vget_high_f32() {
|
||||
let a = f32x4::new(1.0, 2.0, 3.0, 4.0);
|
||||
let e = f32x2::new(3.0, 4.0);
|
||||
let r = f32x2::from(vget_high_f32(a.into()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vget_low_s8() {
|
||||
let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
|
||||
let e = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let r = i8x8::from(vget_low_s8(a.into()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vget_low_s16() {
|
||||
let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let e = i16x4::new(1, 2, 3, 4);
|
||||
let r = i16x4::from(vget_low_s16(a.into()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vget_low_s32() {
|
||||
let a = i32x4::new(1, 2, 3, 4);
|
||||
let e = i32x2::new(1, 2);
|
||||
let r = i32x2::from(vget_low_s32(a.into()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vget_low_s64() {
|
||||
let a = i64x2::new(1, 2);
|
||||
let e = i64x1::new(1);
|
||||
let r = i64x1::from(vget_low_s64(a.into()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vget_low_u8() {
|
||||
let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
|
||||
let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let r = u8x8::from(vget_low_u8(a.into()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vget_low_u16() {
|
||||
let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let e = u16x4::new(1, 2, 3, 4);
|
||||
let r = u16x4::from(vget_low_u16(a.into()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vget_low_u32() {
|
||||
let a = u32x4::new(1, 2, 3, 4);
|
||||
let e = u32x2::new(1, 2);
|
||||
let r = u32x2::from(vget_low_u32(a.into()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vget_low_u64() {
|
||||
let a = u64x2::new(1, 2);
|
||||
let e = u64x1::new(1);
|
||||
let r = u64x1::from(vget_low_u64(a.into()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vget_low_p8() {
|
||||
let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
|
||||
let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let r = u8x8::from(vget_low_p8(a.into()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vget_low_p16() {
|
||||
let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let e = u16x4::new(1, 2, 3, 4);
|
||||
let r = u16x4::from(vget_low_p16(a.into()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vget_low_f32() {
|
||||
let a = f32x4::new(1.0, 2.0, 3.0, 4.0);
|
||||
let e = f32x2::new(1.0, 2.0);
|
||||
let r = f32x2::from(vget_low_f32(a.into()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vdupq_n_s8() {
|
||||
let v: i8 = 42;
|
||||
@@ -2469,13 +2064,6 @@ fn test_vmovq_n_f32() {
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vgetq_lane_u64() {
|
||||
let v = u64x2::new(1, 2);
|
||||
let r = vgetq_lane_u64::<1>(v.into());
|
||||
assert_eq!(r, 2);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vadd_s8() {
|
||||
test_ari_s8(
|
||||
@@ -2638,72 +2226,6 @@ fn test_vaddl_u32() {
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vaddl_high_s8() {
|
||||
let a = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
let x = i8::MAX;
|
||||
let b = i8x16::new(x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x);
|
||||
let x = x as i16;
|
||||
let e = i16x8::new(x + 8, x + 9, x + 10, x + 11, x + 12, x + 13, x + 14, x + 15);
|
||||
let r = i16x8::from(vaddl_high_s8(a.into(), b.into()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vaddl_high_s16() {
|
||||
let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
|
||||
let x = i16::MAX;
|
||||
let b = i16x8::new(x, x, x, x, x, x, x, x);
|
||||
let x = x as i32;
|
||||
let e = i32x4::new(x + 4, x + 5, x + 6, x + 7);
|
||||
let r = i32x4::from(vaddl_high_s16(a.into(), b.into()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vaddl_high_s32() {
|
||||
let a = i32x4::new(0, 1, 2, 3);
|
||||
let x = i32::MAX;
|
||||
let b = i32x4::new(x, x, x, x);
|
||||
let x = x as i64;
|
||||
let e = i64x2::new(x + 2, x + 3);
|
||||
let r = i64x2::from(vaddl_high_s32(a.into(), b.into()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vaddl_high_u8() {
|
||||
let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
let x = u8::MAX;
|
||||
let b = u8x16::new(x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x);
|
||||
let x = x as u16;
|
||||
let e = u16x8::new(x + 8, x + 9, x + 10, x + 11, x + 12, x + 13, x + 14, x + 15);
|
||||
let r = u16x8::from(vaddl_high_u8(a.into(), b.into()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vaddl_high_u16() {
|
||||
let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
|
||||
let x = u16::MAX;
|
||||
let b = u16x8::new(x, x, x, x, x, x, x, x);
|
||||
let x = x as u32;
|
||||
let e = u32x4::new(x + 4, x + 5, x + 6, x + 7);
|
||||
let r = u32x4::from(vaddl_high_u16(a.into(), b.into()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vaddl_high_u32() {
|
||||
let a = u32x4::new(0, 1, 2, 3);
|
||||
let x = u32::MAX;
|
||||
let b = u32x4::new(x, x, x, x);
|
||||
let x = x as u64;
|
||||
let e = u64x2::new(x + 2, x + 3);
|
||||
let r = u64x2::from(vaddl_high_u32(a.into(), b.into()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vaddw_s8() {
|
||||
let x = i16::MAX;
|
||||
@@ -2794,96 +2316,6 @@ fn test_vaddw_u32() {
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vaddw_high_s8() {
|
||||
let x = i16::MAX;
|
||||
let a = i16x8::new(x, 1, 2, 3, 4, 5, 6, 7);
|
||||
let y = i8::MAX;
|
||||
let b = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, y, y, y, y, y, y, y, y);
|
||||
let y = y as i16;
|
||||
let e = i16x8::new(
|
||||
x.wrapping_add(y),
|
||||
1 + y,
|
||||
2 + y,
|
||||
3 + y,
|
||||
4 + y,
|
||||
5 + y,
|
||||
6 + y,
|
||||
7 + y,
|
||||
);
|
||||
let r = i16x8::from(vaddw_high_s8(a.into(), b.into()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vaddw_high_s16() {
|
||||
let x = i32::MAX;
|
||||
let a = i32x4::new(x, 1, 2, 3);
|
||||
let y = i16::MAX;
|
||||
let b = i16x8::new(0, 0, 0, 0, y, y, y, y);
|
||||
let y = y as i32;
|
||||
let e = i32x4::new(x.wrapping_add(y), 1 + y, 2 + y, 3 + y);
|
||||
let r = i32x4::from(vaddw_high_s16(a.into(), b.into()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vaddw_high_s32() {
|
||||
let x = i64::MAX;
|
||||
let a = i64x2::new(x, 1);
|
||||
let y = i32::MAX;
|
||||
let b = i32x4::new(0, 0, y, y);
|
||||
let y = y as i64;
|
||||
let e = i64x2::new(x.wrapping_add(y), 1 + y);
|
||||
let r = i64x2::from(vaddw_high_s32(a.into(), b.into()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vaddw_high_u8() {
|
||||
let x = u16::MAX;
|
||||
let a = u16x8::new(x, 1, 2, 3, 4, 5, 6, 7);
|
||||
let y = u8::MAX;
|
||||
let b = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, y, y, y, y, y, y, y, y);
|
||||
let y = y as u16;
|
||||
let e = u16x8::new(
|
||||
x.wrapping_add(y),
|
||||
1 + y,
|
||||
2 + y,
|
||||
3 + y,
|
||||
4 + y,
|
||||
5 + y,
|
||||
6 + y,
|
||||
7 + y,
|
||||
);
|
||||
let r = u16x8::from(vaddw_high_u8(a.into(), b.into()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vaddw_high_u16() {
|
||||
let x = u32::MAX;
|
||||
let a = u32x4::new(x, 1, 2, 3);
|
||||
let y = u16::MAX;
|
||||
let b = u16x8::new(0, 0, 0, 0, y, y, y, y);
|
||||
let y = y as u32;
|
||||
let e = u32x4::new(x.wrapping_add(y), 1 + y, 2 + y, 3 + y);
|
||||
let r = u32x4::from(vaddw_high_u16(a.into(), b.into()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vaddw_high_u32() {
|
||||
let x = u64::MAX;
|
||||
let a = u64x2::new(x, 1);
|
||||
let y = u32::MAX;
|
||||
let b = u32x4::new(0, 0, y, y);
|
||||
let y = y as u64;
|
||||
let e = u64x2::new(x.wrapping_add(y), 1 + y);
|
||||
let r = u64x2::from(vaddw_high_u32(a.into(), b.into()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
fn test_vmvn_s8() {
|
||||
let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
|
||||
@@ -5766,42 +5198,9 @@ fn test_vrev64q_p16() {
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
macro_rules! test_vcombine {
|
||||
($test_id:ident => $fn_id:ident ([$($a:expr),*], [$($b:expr),*])) => {
|
||||
#[allow(unused_assignments)]
|
||||
#[simd_test(enable = "neon")]
|
||||
fn $test_id() {
|
||||
let a = Simd::from_array([$($a),*]);
|
||||
let b = Simd::from_array([$($b),*]);
|
||||
let e = Simd::from_array([$($a),* $(, $b)*]);
|
||||
let c = $fn_id(a.into(), b.into());
|
||||
let mut d = e;
|
||||
d = c.into();
|
||||
assert_eq!(d, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test_vcombine!(test_vcombine_s8 => vcombine_s8([3_i8, -4, 5, -6, 7, 8, 9, 10], [13_i8, -14, 15, -16, 17, 18, 19, 110]));
|
||||
test_vcombine!(test_vcombine_u8 => vcombine_u8([3_u8, 4, 5, 6, 7, 8, 9, 10], [13_u8, 14, 15, 16, 17, 18, 19, 110]));
|
||||
test_vcombine!(test_vcombine_p8 => vcombine_p8([3_u8, 4, 5, 6, 7, 8, 9, 10], [13_u8, 14, 15, 16, 17, 18, 19, 110]));
|
||||
|
||||
test_vcombine!(test_vcombine_s16 => vcombine_s16([3_i16, -4, 5, -6], [13_i16, -14, 15, -16]));
|
||||
test_vcombine!(test_vcombine_u16 => vcombine_u16([3_u16, 4, 5, 6], [13_u16, 14, 15, 16]));
|
||||
test_vcombine!(test_vcombine_p16 => vcombine_p16([3_u16, 4, 5, 6], [13_u16, 14, 15, 16]));
|
||||
|
||||
#[cfg(not(target_arch = "arm64ec"))]
|
||||
mod fp16 {
|
||||
use super::*;
|
||||
#[simd_test(enable = "neon,fp16")]
|
||||
fn test_vcombine_f16() {
|
||||
let a = f16x4::from_array([3_f16, 4., 5., 6.]);
|
||||
let b = f16x4::from_array([13_f16, 14., 15., 16.]);
|
||||
let e = f16x8::from_array([3_f16, 4., 5., 6., 13_f16, 14., 15., 16.]);
|
||||
let c = f16x8::from(vcombine_f16(a.into(), b.into()));
|
||||
assert_eq!(c, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon,fp16")]
|
||||
fn test_vld1_lane_f16() {
|
||||
let a = f16x4::new(0., 1., 2., 3.);
|
||||
@@ -5837,17 +5236,6 @@ fn test_vld1q_dup_f16() {
|
||||
}
|
||||
}
|
||||
|
||||
test_vcombine!(test_vcombine_s32 => vcombine_s32([3_i32, -4], [13_i32, -14]));
|
||||
test_vcombine!(test_vcombine_u32 => vcombine_u32([3_u32, 4], [13_u32, 14]));
|
||||
// note: poly32x4 does not exist, and neither does vcombine_p32
|
||||
test_vcombine!(test_vcombine_f32 => vcombine_f32([3_f32, -4.], [13_f32, -14.]));
|
||||
|
||||
test_vcombine!(test_vcombine_s64 => vcombine_s64([-3_i64], [13_i64]));
|
||||
test_vcombine!(test_vcombine_u64 => vcombine_u64([3_u64], [13_u64]));
|
||||
test_vcombine!(test_vcombine_p64 => vcombine_p64([3_u64], [13_u64]));
|
||||
#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
|
||||
test_vcombine!(test_vcombine_f64 => vcombine_f64([-3_f64], [13_f64]));
|
||||
|
||||
macro_rules! lane_wide_store_load_roundtrip {
|
||||
($elem_ty:ty, $len:expr, $idx:expr, $vec_ty:ty, $store:ident, $load:ident) => {
|
||||
let vals: [$elem_ty; $len] = crate::array::from_fn(|i| i as $elem_ty);
|
||||
@@ -5860,11 +5248,10 @@ macro_rules! lane_wide_store_load_roundtrip {
|
||||
};
|
||||
}
|
||||
|
||||
// Most of these are implemented with builtins, which miri can't handle
|
||||
macro_rules! lane_wide_store_load_roundtrip_neon {
|
||||
($( $name:ident $args:tt);* $(;)?) => {
|
||||
$(
|
||||
#[cfg_attr(miri, ignore)]
|
||||
#[cfg_attr(miri, ignore)] // uses unsupported vendor intrinsics
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn $name() {
|
||||
lane_wide_store_load_roundtrip! $args;
|
||||
@@ -5876,7 +5263,7 @@ unsafe fn $name() {
|
||||
macro_rules! lane_wide_store_load_roundtrip_fp16 {
|
||||
($( $name:ident $args:tt);* $(;)?) => {
|
||||
$(
|
||||
#[cfg_attr(miri, ignore)]
|
||||
#[cfg_attr(miri, ignore)] // uses unsupported vendor intrinsics
|
||||
#[simd_test(enable = "neon,fp16")]
|
||||
#[cfg(not(target_arch = "arm64ec"))]
|
||||
unsafe fn $name() {
|
||||
|
||||
@@ -43,14 +43,6 @@
|
||||
fn __lasx_xvsrlri_w(a: __v8i32, b: u32) -> __v8i32;
|
||||
#[link_name = "llvm.loongarch.lasx.xvsrlri.d"]
|
||||
fn __lasx_xvsrlri_d(a: __v4i64, b: u32) -> __v4i64;
|
||||
#[link_name = "llvm.loongarch.lasx.xvbitclr.b"]
|
||||
fn __lasx_xvbitclr_b(a: __v32u8, b: __v32u8) -> __v32u8;
|
||||
#[link_name = "llvm.loongarch.lasx.xvbitclr.h"]
|
||||
fn __lasx_xvbitclr_h(a: __v16u16, b: __v16u16) -> __v16u16;
|
||||
#[link_name = "llvm.loongarch.lasx.xvbitclr.w"]
|
||||
fn __lasx_xvbitclr_w(a: __v8u32, b: __v8u32) -> __v8u32;
|
||||
#[link_name = "llvm.loongarch.lasx.xvbitclr.d"]
|
||||
fn __lasx_xvbitclr_d(a: __v4u64, b: __v4u64) -> __v4u64;
|
||||
#[link_name = "llvm.loongarch.lasx.xvbitclri.b"]
|
||||
fn __lasx_xvbitclri_b(a: __v32u8, b: u32) -> __v32u8;
|
||||
#[link_name = "llvm.loongarch.lasx.xvbitclri.h"]
|
||||
@@ -59,14 +51,6 @@
|
||||
fn __lasx_xvbitclri_w(a: __v8u32, b: u32) -> __v8u32;
|
||||
#[link_name = "llvm.loongarch.lasx.xvbitclri.d"]
|
||||
fn __lasx_xvbitclri_d(a: __v4u64, b: u32) -> __v4u64;
|
||||
#[link_name = "llvm.loongarch.lasx.xvbitset.b"]
|
||||
fn __lasx_xvbitset_b(a: __v32u8, b: __v32u8) -> __v32u8;
|
||||
#[link_name = "llvm.loongarch.lasx.xvbitset.h"]
|
||||
fn __lasx_xvbitset_h(a: __v16u16, b: __v16u16) -> __v16u16;
|
||||
#[link_name = "llvm.loongarch.lasx.xvbitset.w"]
|
||||
fn __lasx_xvbitset_w(a: __v8u32, b: __v8u32) -> __v8u32;
|
||||
#[link_name = "llvm.loongarch.lasx.xvbitset.d"]
|
||||
fn __lasx_xvbitset_d(a: __v4u64, b: __v4u64) -> __v4u64;
|
||||
#[link_name = "llvm.loongarch.lasx.xvbitseti.b"]
|
||||
fn __lasx_xvbitseti_b(a: __v32u8, b: u32) -> __v32u8;
|
||||
#[link_name = "llvm.loongarch.lasx.xvbitseti.h"]
|
||||
@@ -75,14 +59,6 @@
|
||||
fn __lasx_xvbitseti_w(a: __v8u32, b: u32) -> __v8u32;
|
||||
#[link_name = "llvm.loongarch.lasx.xvbitseti.d"]
|
||||
fn __lasx_xvbitseti_d(a: __v4u64, b: u32) -> __v4u64;
|
||||
#[link_name = "llvm.loongarch.lasx.xvbitrev.b"]
|
||||
fn __lasx_xvbitrev_b(a: __v32u8, b: __v32u8) -> __v32u8;
|
||||
#[link_name = "llvm.loongarch.lasx.xvbitrev.h"]
|
||||
fn __lasx_xvbitrev_h(a: __v16u16, b: __v16u16) -> __v16u16;
|
||||
#[link_name = "llvm.loongarch.lasx.xvbitrev.w"]
|
||||
fn __lasx_xvbitrev_w(a: __v8u32, b: __v8u32) -> __v8u32;
|
||||
#[link_name = "llvm.loongarch.lasx.xvbitrev.d"]
|
||||
fn __lasx_xvbitrev_d(a: __v4u64, b: __v4u64) -> __v4u64;
|
||||
#[link_name = "llvm.loongarch.lasx.xvbitrevi.b"]
|
||||
fn __lasx_xvbitrevi_b(a: __v32u8, b: u32) -> __v32u8;
|
||||
#[link_name = "llvm.loongarch.lasx.xvbitrevi.h"]
|
||||
@@ -115,30 +91,6 @@
|
||||
fn __lasx_xvsat_wu(a: __v8u32, b: u32) -> __v8u32;
|
||||
#[link_name = "llvm.loongarch.lasx.xvsat.du"]
|
||||
fn __lasx_xvsat_du(a: __v4u64, b: u32) -> __v4u64;
|
||||
#[link_name = "llvm.loongarch.lasx.xvadda.b"]
|
||||
fn __lasx_xvadda_b(a: __v32i8, b: __v32i8) -> __v32i8;
|
||||
#[link_name = "llvm.loongarch.lasx.xvadda.h"]
|
||||
fn __lasx_xvadda_h(a: __v16i16, b: __v16i16) -> __v16i16;
|
||||
#[link_name = "llvm.loongarch.lasx.xvadda.w"]
|
||||
fn __lasx_xvadda_w(a: __v8i32, b: __v8i32) -> __v8i32;
|
||||
#[link_name = "llvm.loongarch.lasx.xvadda.d"]
|
||||
fn __lasx_xvadda_d(a: __v4i64, b: __v4i64) -> __v4i64;
|
||||
#[link_name = "llvm.loongarch.lasx.xvsadd.b"]
|
||||
fn __lasx_xvsadd_b(a: __v32i8, b: __v32i8) -> __v32i8;
|
||||
#[link_name = "llvm.loongarch.lasx.xvsadd.h"]
|
||||
fn __lasx_xvsadd_h(a: __v16i16, b: __v16i16) -> __v16i16;
|
||||
#[link_name = "llvm.loongarch.lasx.xvsadd.w"]
|
||||
fn __lasx_xvsadd_w(a: __v8i32, b: __v8i32) -> __v8i32;
|
||||
#[link_name = "llvm.loongarch.lasx.xvsadd.d"]
|
||||
fn __lasx_xvsadd_d(a: __v4i64, b: __v4i64) -> __v4i64;
|
||||
#[link_name = "llvm.loongarch.lasx.xvsadd.bu"]
|
||||
fn __lasx_xvsadd_bu(a: __v32u8, b: __v32u8) -> __v32u8;
|
||||
#[link_name = "llvm.loongarch.lasx.xvsadd.hu"]
|
||||
fn __lasx_xvsadd_hu(a: __v16u16, b: __v16u16) -> __v16u16;
|
||||
#[link_name = "llvm.loongarch.lasx.xvsadd.wu"]
|
||||
fn __lasx_xvsadd_wu(a: __v8u32, b: __v8u32) -> __v8u32;
|
||||
#[link_name = "llvm.loongarch.lasx.xvsadd.du"]
|
||||
fn __lasx_xvsadd_du(a: __v4u64, b: __v4u64) -> __v4u64;
|
||||
#[link_name = "llvm.loongarch.lasx.xvavg.b"]
|
||||
fn __lasx_xvavg_b(a: __v32i8, b: __v32i8) -> __v32i8;
|
||||
#[link_name = "llvm.loongarch.lasx.xvavg.h"]
|
||||
@@ -171,38 +123,6 @@
|
||||
fn __lasx_xvavgr_wu(a: __v8u32, b: __v8u32) -> __v8u32;
|
||||
#[link_name = "llvm.loongarch.lasx.xvavgr.du"]
|
||||
fn __lasx_xvavgr_du(a: __v4u64, b: __v4u64) -> __v4u64;
|
||||
#[link_name = "llvm.loongarch.lasx.xvssub.b"]
|
||||
fn __lasx_xvssub_b(a: __v32i8, b: __v32i8) -> __v32i8;
|
||||
#[link_name = "llvm.loongarch.lasx.xvssub.h"]
|
||||
fn __lasx_xvssub_h(a: __v16i16, b: __v16i16) -> __v16i16;
|
||||
#[link_name = "llvm.loongarch.lasx.xvssub.w"]
|
||||
fn __lasx_xvssub_w(a: __v8i32, b: __v8i32) -> __v8i32;
|
||||
#[link_name = "llvm.loongarch.lasx.xvssub.d"]
|
||||
fn __lasx_xvssub_d(a: __v4i64, b: __v4i64) -> __v4i64;
|
||||
#[link_name = "llvm.loongarch.lasx.xvssub.bu"]
|
||||
fn __lasx_xvssub_bu(a: __v32u8, b: __v32u8) -> __v32u8;
|
||||
#[link_name = "llvm.loongarch.lasx.xvssub.hu"]
|
||||
fn __lasx_xvssub_hu(a: __v16u16, b: __v16u16) -> __v16u16;
|
||||
#[link_name = "llvm.loongarch.lasx.xvssub.wu"]
|
||||
fn __lasx_xvssub_wu(a: __v8u32, b: __v8u32) -> __v8u32;
|
||||
#[link_name = "llvm.loongarch.lasx.xvssub.du"]
|
||||
fn __lasx_xvssub_du(a: __v4u64, b: __v4u64) -> __v4u64;
|
||||
#[link_name = "llvm.loongarch.lasx.xvabsd.b"]
|
||||
fn __lasx_xvabsd_b(a: __v32i8, b: __v32i8) -> __v32i8;
|
||||
#[link_name = "llvm.loongarch.lasx.xvabsd.h"]
|
||||
fn __lasx_xvabsd_h(a: __v16i16, b: __v16i16) -> __v16i16;
|
||||
#[link_name = "llvm.loongarch.lasx.xvabsd.w"]
|
||||
fn __lasx_xvabsd_w(a: __v8i32, b: __v8i32) -> __v8i32;
|
||||
#[link_name = "llvm.loongarch.lasx.xvabsd.d"]
|
||||
fn __lasx_xvabsd_d(a: __v4i64, b: __v4i64) -> __v4i64;
|
||||
#[link_name = "llvm.loongarch.lasx.xvabsd.bu"]
|
||||
fn __lasx_xvabsd_bu(a: __v32u8, b: __v32u8) -> __v32u8;
|
||||
#[link_name = "llvm.loongarch.lasx.xvabsd.hu"]
|
||||
fn __lasx_xvabsd_hu(a: __v16u16, b: __v16u16) -> __v16u16;
|
||||
#[link_name = "llvm.loongarch.lasx.xvabsd.wu"]
|
||||
fn __lasx_xvabsd_wu(a: __v8u32, b: __v8u32) -> __v8u32;
|
||||
#[link_name = "llvm.loongarch.lasx.xvabsd.du"]
|
||||
fn __lasx_xvabsd_du(a: __v4u64, b: __v4u64) -> __v4u64;
|
||||
#[link_name = "llvm.loongarch.lasx.xvhaddw.h.b"]
|
||||
fn __lasx_xvhaddw_h_b(a: __v32i8, b: __v32i8) -> __v16i16;
|
||||
#[link_name = "llvm.loongarch.lasx.xvhaddw.w.h"]
|
||||
@@ -235,22 +155,6 @@
|
||||
fn __lasx_xvrepl128vei_w(a: __v8i32, b: u32) -> __v8i32;
|
||||
#[link_name = "llvm.loongarch.lasx.xvrepl128vei.d"]
|
||||
fn __lasx_xvrepl128vei_d(a: __v4i64, b: u32) -> __v4i64;
|
||||
#[link_name = "llvm.loongarch.lasx.xvpickev.b"]
|
||||
fn __lasx_xvpickev_b(a: __v32i8, b: __v32i8) -> __v32i8;
|
||||
#[link_name = "llvm.loongarch.lasx.xvpickev.h"]
|
||||
fn __lasx_xvpickev_h(a: __v16i16, b: __v16i16) -> __v16i16;
|
||||
#[link_name = "llvm.loongarch.lasx.xvpickev.w"]
|
||||
fn __lasx_xvpickev_w(a: __v8i32, b: __v8i32) -> __v8i32;
|
||||
#[link_name = "llvm.loongarch.lasx.xvpickev.d"]
|
||||
fn __lasx_xvpickev_d(a: __v4i64, b: __v4i64) -> __v4i64;
|
||||
#[link_name = "llvm.loongarch.lasx.xvpickod.b"]
|
||||
fn __lasx_xvpickod_b(a: __v32i8, b: __v32i8) -> __v32i8;
|
||||
#[link_name = "llvm.loongarch.lasx.xvpickod.h"]
|
||||
fn __lasx_xvpickod_h(a: __v16i16, b: __v16i16) -> __v16i16;
|
||||
#[link_name = "llvm.loongarch.lasx.xvpickod.w"]
|
||||
fn __lasx_xvpickod_w(a: __v8i32, b: __v8i32) -> __v8i32;
|
||||
#[link_name = "llvm.loongarch.lasx.xvpickod.d"]
|
||||
fn __lasx_xvpickod_d(a: __v4i64, b: __v4i64) -> __v4i64;
|
||||
#[link_name = "llvm.loongarch.lasx.xvilvh.b"]
|
||||
fn __lasx_xvilvh_b(a: __v32i8, b: __v32i8) -> __v32i8;
|
||||
#[link_name = "llvm.loongarch.lasx.xvilvh.h"]
|
||||
@@ -1285,34 +1189,6 @@ pub fn lasx_xvsrlri_d<const IMM6: u32>(a: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvsrlri_d(transmute(a), IMM6)) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lasx_xvbitclr_b(a: m256i, b: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvbitclr_b(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lasx_xvbitclr_h(a: m256i, b: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvbitclr_h(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lasx_xvbitclr_w(a: m256i, b: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvbitclr_w(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lasx_xvbitclr_d(a: m256i, b: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvbitclr_d(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
@@ -1349,34 +1225,6 @@ pub fn lasx_xvbitclri_d<const IMM6: u32>(a: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvbitclri_d(transmute(a), IMM6)) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lasx_xvbitset_b(a: m256i, b: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvbitset_b(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lasx_xvbitset_h(a: m256i, b: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvbitset_h(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lasx_xvbitset_w(a: m256i, b: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvbitset_w(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lasx_xvbitset_d(a: m256i, b: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvbitset_d(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
@@ -1413,34 +1261,6 @@ pub fn lasx_xvbitseti_d<const IMM6: u32>(a: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvbitseti_d(transmute(a), IMM6)) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lasx_xvbitrev_b(a: m256i, b: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvbitrev_b(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lasx_xvbitrev_h(a: m256i, b: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvbitrev_h(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lasx_xvbitrev_w(a: m256i, b: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvbitrev_w(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lasx_xvbitrev_d(a: m256i, b: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvbitrev_d(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
@@ -1585,90 +1405,6 @@ pub fn lasx_xvsat_du<const IMM6: u32>(a: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvsat_du(transmute(a), IMM6)) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lasx_xvadda_b(a: m256i, b: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvadda_b(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lasx_xvadda_h(a: m256i, b: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvadda_h(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lasx_xvadda_w(a: m256i, b: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvadda_w(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lasx_xvadda_d(a: m256i, b: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvadda_d(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lasx_xvsadd_b(a: m256i, b: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvsadd_b(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lasx_xvsadd_h(a: m256i, b: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvsadd_h(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lasx_xvsadd_w(a: m256i, b: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvsadd_w(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lasx_xvsadd_d(a: m256i, b: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvsadd_d(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lasx_xvsadd_bu(a: m256i, b: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvsadd_bu(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lasx_xvsadd_hu(a: m256i, b: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvsadd_hu(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lasx_xvsadd_wu(a: m256i, b: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvsadd_wu(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lasx_xvsadd_du(a: m256i, b: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvsadd_du(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
@@ -1781,118 +1517,6 @@ pub fn lasx_xvavgr_du(a: m256i, b: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvavgr_du(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lasx_xvssub_b(a: m256i, b: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvssub_b(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lasx_xvssub_h(a: m256i, b: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvssub_h(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lasx_xvssub_w(a: m256i, b: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvssub_w(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lasx_xvssub_d(a: m256i, b: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvssub_d(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lasx_xvssub_bu(a: m256i, b: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvssub_bu(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lasx_xvssub_hu(a: m256i, b: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvssub_hu(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lasx_xvssub_wu(a: m256i, b: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvssub_wu(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lasx_xvssub_du(a: m256i, b: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvssub_du(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lasx_xvabsd_b(a: m256i, b: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvabsd_b(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lasx_xvabsd_h(a: m256i, b: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvabsd_h(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lasx_xvabsd_w(a: m256i, b: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvabsd_w(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lasx_xvabsd_d(a: m256i, b: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvabsd_d(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lasx_xvabsd_bu(a: m256i, b: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvabsd_bu(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lasx_xvabsd_hu(a: m256i, b: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvabsd_hu(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lasx_xvabsd_wu(a: m256i, b: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvabsd_wu(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lasx_xvabsd_du(a: m256i, b: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvabsd_du(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
@@ -2013,62 +1637,6 @@ pub fn lasx_xvrepl128vei_d<const IMM1: u32>(a: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvrepl128vei_d(transmute(a), IMM1)) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lasx_xvpickev_b(a: m256i, b: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvpickev_b(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lasx_xvpickev_h(a: m256i, b: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvpickev_h(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lasx_xvpickev_w(a: m256i, b: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvpickev_w(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lasx_xvpickev_d(a: m256i, b: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvpickev_d(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lasx_xvpickod_b(a: m256i, b: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvpickod_b(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lasx_xvpickod_h(a: m256i, b: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvpickod_h(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lasx_xvpickod_w(a: m256i, b: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvpickod_w(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lasx_xvpickod_d(a: m256i, b: m256i) -> m256i {
|
||||
unsafe { transmute(__lasx_xvpickod_d(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lasx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
|
||||
@@ -5,6 +5,68 @@
|
||||
use crate::intrinsics::simd as is;
|
||||
use crate::mem::transmute;
|
||||
|
||||
#[inline(always)]
|
||||
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
|
||||
const unsafe fn simd_pickev_b<T: Copy>(a: T, b: T) -> T {
|
||||
simd_shuffle!(
|
||||
b,
|
||||
a,
|
||||
[
|
||||
0, 2, 4, 6, 8, 10, 12, 14, 32, 34, 36, 38, 40, 42, 44, 46,
|
||||
16, 18, 20, 22, 24, 26, 28, 30, 48, 50, 52, 54, 56, 58, 60, 62
|
||||
]
|
||||
)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
|
||||
const unsafe fn simd_pickev_d<T: Copy>(a: T, b: T) -> T {
|
||||
simd_shuffle!(b, a, [0, 4, 2, 6])
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
|
||||
const unsafe fn simd_pickev_w<T: Copy>(a: T, b: T) -> T {
|
||||
simd_shuffle!(b, a, [0, 2, 8, 10, 4, 6, 12, 14])
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
|
||||
const unsafe fn simd_pickev_h<T: Copy>(a: T, b: T) -> T {
|
||||
simd_shuffle!(b, a, [0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30])
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
|
||||
const unsafe fn simd_pickod_b<T: Copy>(a: T, b: T) -> T {
|
||||
simd_shuffle!(
|
||||
b,
|
||||
a,
|
||||
[
|
||||
1, 3, 5, 7, 9, 11, 13, 15, 33, 35, 37, 39, 41, 43, 45, 47,
|
||||
17, 19, 21, 23, 25, 27, 29, 31, 49, 51, 53, 55, 57, 59, 61, 63
|
||||
]
|
||||
)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
|
||||
const unsafe fn simd_pickod_d<T: Copy>(a: T, b: T) -> T {
|
||||
simd_shuffle!(b, a, [1, 5, 3, 7])
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
|
||||
const unsafe fn simd_pickod_w<T: Copy>(a: T, b: T) -> T {
|
||||
simd_shuffle!(b, a, [1, 3, 9, 11, 5, 7, 13, 15])
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
|
||||
const unsafe fn simd_pickod_h<T: Copy>(a: T, b: T) -> T {
|
||||
simd_shuffle!(b, a, [1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31])
|
||||
}
|
||||
|
||||
impl_vv!("lasx", lasx_xvpcnt_b, is::simd_ctpop, m256i, i8x32);
|
||||
impl_vv!("lasx", lasx_xvpcnt_h, is::simd_ctpop, m256i, i16x16);
|
||||
impl_vv!("lasx", lasx_xvpcnt_w, is::simd_ctpop, m256i, i32x8);
|
||||
@@ -120,6 +182,54 @@
|
||||
impl_vvv!("lasx", lasx_xvsrl_h, ls::simd_shr, m256i, u16x16);
|
||||
impl_vvv!("lasx", lasx_xvsrl_w, ls::simd_shr, m256i, u32x8);
|
||||
impl_vvv!("lasx", lasx_xvsrl_d, ls::simd_shr, m256i, u64x4);
|
||||
impl_vvv!("lasx", lasx_xvbitclr_b, ls::simd_bitclr, m256i, u8x32);
|
||||
impl_vvv!("lasx", lasx_xvbitclr_h, ls::simd_bitclr, m256i, u16x16);
|
||||
impl_vvv!("lasx", lasx_xvbitclr_w, ls::simd_bitclr, m256i, u32x8);
|
||||
impl_vvv!("lasx", lasx_xvbitclr_d, ls::simd_bitclr, m256i, u64x4);
|
||||
impl_vvv!("lasx", lasx_xvbitset_b, ls::simd_bitset, m256i, u8x32);
|
||||
impl_vvv!("lasx", lasx_xvbitset_h, ls::simd_bitset, m256i, u16x16);
|
||||
impl_vvv!("lasx", lasx_xvbitset_w, ls::simd_bitset, m256i, u32x8);
|
||||
impl_vvv!("lasx", lasx_xvbitset_d, ls::simd_bitset, m256i, u64x4);
|
||||
impl_vvv!("lasx", lasx_xvbitrev_b, ls::simd_bitrev, m256i, u8x32);
|
||||
impl_vvv!("lasx", lasx_xvbitrev_h, ls::simd_bitrev, m256i, u16x16);
|
||||
impl_vvv!("lasx", lasx_xvbitrev_w, ls::simd_bitrev, m256i, u32x8);
|
||||
impl_vvv!("lasx", lasx_xvbitrev_d, ls::simd_bitrev, m256i, u64x4);
|
||||
impl_vvv!("lasx", lasx_xvsadd_b, is::simd_saturating_add, m256i, i8x32);
|
||||
impl_vvv!("lasx", lasx_xvsadd_h, is::simd_saturating_add, m256i, i16x16);
|
||||
impl_vvv!("lasx", lasx_xvsadd_w, is::simd_saturating_add, m256i, i32x8);
|
||||
impl_vvv!("lasx", lasx_xvsadd_d, is::simd_saturating_add, m256i, i64x4);
|
||||
impl_vvv!("lasx", lasx_xvsadd_bu, is::simd_saturating_add, m256i, u8x32);
|
||||
impl_vvv!("lasx", lasx_xvsadd_hu, is::simd_saturating_add, m256i, u16x16);
|
||||
impl_vvv!("lasx", lasx_xvsadd_wu, is::simd_saturating_add, m256i, u32x8);
|
||||
impl_vvv!("lasx", lasx_xvsadd_du, is::simd_saturating_add, m256i, u64x4);
|
||||
impl_vvv!("lasx", lasx_xvssub_b, is::simd_saturating_sub, m256i, i8x32);
|
||||
impl_vvv!("lasx", lasx_xvssub_h, is::simd_saturating_sub, m256i, i16x16);
|
||||
impl_vvv!("lasx", lasx_xvssub_w, is::simd_saturating_sub, m256i, i32x8);
|
||||
impl_vvv!("lasx", lasx_xvssub_d, is::simd_saturating_sub, m256i, i64x4);
|
||||
impl_vvv!("lasx", lasx_xvssub_bu, is::simd_saturating_sub, m256i, u8x32);
|
||||
impl_vvv!("lasx", lasx_xvssub_hu, is::simd_saturating_sub, m256i, u16x16);
|
||||
impl_vvv!("lasx", lasx_xvssub_wu, is::simd_saturating_sub, m256i, u32x8);
|
||||
impl_vvv!("lasx", lasx_xvssub_du, is::simd_saturating_sub, m256i, u64x4);
|
||||
impl_vvv!("lasx", lasx_xvadda_b, ls::simd_adda, m256i, i8x32);
|
||||
impl_vvv!("lasx", lasx_xvadda_h, ls::simd_adda, m256i, i16x16);
|
||||
impl_vvv!("lasx", lasx_xvadda_w, ls::simd_adda, m256i, i32x8);
|
||||
impl_vvv!("lasx", lasx_xvadda_d, ls::simd_adda, m256i, i64x4);
|
||||
impl_vvv!("lasx", lasx_xvabsd_b, ls::simd_absd, m256i, i8x32);
|
||||
impl_vvv!("lasx", lasx_xvabsd_h, ls::simd_absd, m256i, i16x16);
|
||||
impl_vvv!("lasx", lasx_xvabsd_w, ls::simd_absd, m256i, i32x8);
|
||||
impl_vvv!("lasx", lasx_xvabsd_d, ls::simd_absd, m256i, i64x4);
|
||||
impl_vvv!("lasx", lasx_xvabsd_bu, ls::simd_absd, m256i, u8x32);
|
||||
impl_vvv!("lasx", lasx_xvabsd_hu, ls::simd_absd, m256i, u16x16);
|
||||
impl_vvv!("lasx", lasx_xvabsd_wu, ls::simd_absd, m256i, u32x8);
|
||||
impl_vvv!("lasx", lasx_xvabsd_du, ls::simd_absd, m256i, u64x4);
|
||||
impl_vvv!("lasx", lasx_xvpickev_b, simd_pickev_b, m256i, i8x32);
|
||||
impl_vvv!("lasx", lasx_xvpickev_h, simd_pickev_h, m256i, i16x16);
|
||||
impl_vvv!("lasx", lasx_xvpickev_w, simd_pickev_w, m256i, i32x8);
|
||||
impl_vvv!("lasx", lasx_xvpickev_d, simd_pickev_d, m256i, i64x4);
|
||||
impl_vvv!("lasx", lasx_xvpickod_b, simd_pickod_b, m256i, i8x32);
|
||||
impl_vvv!("lasx", lasx_xvpickod_h, simd_pickod_h, m256i, i16x16);
|
||||
impl_vvv!("lasx", lasx_xvpickod_w, simd_pickod_w, m256i, i32x8);
|
||||
impl_vvv!("lasx", lasx_xvpickod_d, simd_pickod_d, m256i, i64x4);
|
||||
|
||||
impl_vuv!("lasx", lasx_xvslli_b, is::simd_shl, m256i, i8x32);
|
||||
impl_vuv!("lasx", lasx_xvslli_h, is::simd_shl, m256i, i16x16);
|
||||
|
||||
@@ -43,14 +43,6 @@
|
||||
fn __lsx_vsrlri_w(a: __v4i32, b: u32) -> __v4i32;
|
||||
#[link_name = "llvm.loongarch.lsx.vsrlri.d"]
|
||||
fn __lsx_vsrlri_d(a: __v2i64, b: u32) -> __v2i64;
|
||||
#[link_name = "llvm.loongarch.lsx.vbitclr.b"]
|
||||
fn __lsx_vbitclr_b(a: __v16u8, b: __v16u8) -> __v16u8;
|
||||
#[link_name = "llvm.loongarch.lsx.vbitclr.h"]
|
||||
fn __lsx_vbitclr_h(a: __v8u16, b: __v8u16) -> __v8u16;
|
||||
#[link_name = "llvm.loongarch.lsx.vbitclr.w"]
|
||||
fn __lsx_vbitclr_w(a: __v4u32, b: __v4u32) -> __v4u32;
|
||||
#[link_name = "llvm.loongarch.lsx.vbitclr.d"]
|
||||
fn __lsx_vbitclr_d(a: __v2u64, b: __v2u64) -> __v2u64;
|
||||
#[link_name = "llvm.loongarch.lsx.vbitclri.b"]
|
||||
fn __lsx_vbitclri_b(a: __v16u8, b: u32) -> __v16u8;
|
||||
#[link_name = "llvm.loongarch.lsx.vbitclri.h"]
|
||||
@@ -59,14 +51,6 @@
|
||||
fn __lsx_vbitclri_w(a: __v4u32, b: u32) -> __v4u32;
|
||||
#[link_name = "llvm.loongarch.lsx.vbitclri.d"]
|
||||
fn __lsx_vbitclri_d(a: __v2u64, b: u32) -> __v2u64;
|
||||
#[link_name = "llvm.loongarch.lsx.vbitset.b"]
|
||||
fn __lsx_vbitset_b(a: __v16u8, b: __v16u8) -> __v16u8;
|
||||
#[link_name = "llvm.loongarch.lsx.vbitset.h"]
|
||||
fn __lsx_vbitset_h(a: __v8u16, b: __v8u16) -> __v8u16;
|
||||
#[link_name = "llvm.loongarch.lsx.vbitset.w"]
|
||||
fn __lsx_vbitset_w(a: __v4u32, b: __v4u32) -> __v4u32;
|
||||
#[link_name = "llvm.loongarch.lsx.vbitset.d"]
|
||||
fn __lsx_vbitset_d(a: __v2u64, b: __v2u64) -> __v2u64;
|
||||
#[link_name = "llvm.loongarch.lsx.vbitseti.b"]
|
||||
fn __lsx_vbitseti_b(a: __v16u8, b: u32) -> __v16u8;
|
||||
#[link_name = "llvm.loongarch.lsx.vbitseti.h"]
|
||||
@@ -75,14 +59,6 @@
|
||||
fn __lsx_vbitseti_w(a: __v4u32, b: u32) -> __v4u32;
|
||||
#[link_name = "llvm.loongarch.lsx.vbitseti.d"]
|
||||
fn __lsx_vbitseti_d(a: __v2u64, b: u32) -> __v2u64;
|
||||
#[link_name = "llvm.loongarch.lsx.vbitrev.b"]
|
||||
fn __lsx_vbitrev_b(a: __v16u8, b: __v16u8) -> __v16u8;
|
||||
#[link_name = "llvm.loongarch.lsx.vbitrev.h"]
|
||||
fn __lsx_vbitrev_h(a: __v8u16, b: __v8u16) -> __v8u16;
|
||||
#[link_name = "llvm.loongarch.lsx.vbitrev.w"]
|
||||
fn __lsx_vbitrev_w(a: __v4u32, b: __v4u32) -> __v4u32;
|
||||
#[link_name = "llvm.loongarch.lsx.vbitrev.d"]
|
||||
fn __lsx_vbitrev_d(a: __v2u64, b: __v2u64) -> __v2u64;
|
||||
#[link_name = "llvm.loongarch.lsx.vbitrevi.b"]
|
||||
fn __lsx_vbitrevi_b(a: __v16u8, b: u32) -> __v16u8;
|
||||
#[link_name = "llvm.loongarch.lsx.vbitrevi.h"]
|
||||
@@ -115,30 +91,6 @@
|
||||
fn __lsx_vsat_wu(a: __v4u32, b: u32) -> __v4u32;
|
||||
#[link_name = "llvm.loongarch.lsx.vsat.du"]
|
||||
fn __lsx_vsat_du(a: __v2u64, b: u32) -> __v2u64;
|
||||
#[link_name = "llvm.loongarch.lsx.vadda.b"]
|
||||
fn __lsx_vadda_b(a: __v16i8, b: __v16i8) -> __v16i8;
|
||||
#[link_name = "llvm.loongarch.lsx.vadda.h"]
|
||||
fn __lsx_vadda_h(a: __v8i16, b: __v8i16) -> __v8i16;
|
||||
#[link_name = "llvm.loongarch.lsx.vadda.w"]
|
||||
fn __lsx_vadda_w(a: __v4i32, b: __v4i32) -> __v4i32;
|
||||
#[link_name = "llvm.loongarch.lsx.vadda.d"]
|
||||
fn __lsx_vadda_d(a: __v2i64, b: __v2i64) -> __v2i64;
|
||||
#[link_name = "llvm.loongarch.lsx.vsadd.b"]
|
||||
fn __lsx_vsadd_b(a: __v16i8, b: __v16i8) -> __v16i8;
|
||||
#[link_name = "llvm.loongarch.lsx.vsadd.h"]
|
||||
fn __lsx_vsadd_h(a: __v8i16, b: __v8i16) -> __v8i16;
|
||||
#[link_name = "llvm.loongarch.lsx.vsadd.w"]
|
||||
fn __lsx_vsadd_w(a: __v4i32, b: __v4i32) -> __v4i32;
|
||||
#[link_name = "llvm.loongarch.lsx.vsadd.d"]
|
||||
fn __lsx_vsadd_d(a: __v2i64, b: __v2i64) -> __v2i64;
|
||||
#[link_name = "llvm.loongarch.lsx.vsadd.bu"]
|
||||
fn __lsx_vsadd_bu(a: __v16u8, b: __v16u8) -> __v16u8;
|
||||
#[link_name = "llvm.loongarch.lsx.vsadd.hu"]
|
||||
fn __lsx_vsadd_hu(a: __v8u16, b: __v8u16) -> __v8u16;
|
||||
#[link_name = "llvm.loongarch.lsx.vsadd.wu"]
|
||||
fn __lsx_vsadd_wu(a: __v4u32, b: __v4u32) -> __v4u32;
|
||||
#[link_name = "llvm.loongarch.lsx.vsadd.du"]
|
||||
fn __lsx_vsadd_du(a: __v2u64, b: __v2u64) -> __v2u64;
|
||||
#[link_name = "llvm.loongarch.lsx.vavg.b"]
|
||||
fn __lsx_vavg_b(a: __v16i8, b: __v16i8) -> __v16i8;
|
||||
#[link_name = "llvm.loongarch.lsx.vavg.h"]
|
||||
@@ -171,38 +123,6 @@
|
||||
fn __lsx_vavgr_wu(a: __v4u32, b: __v4u32) -> __v4u32;
|
||||
#[link_name = "llvm.loongarch.lsx.vavgr.du"]
|
||||
fn __lsx_vavgr_du(a: __v2u64, b: __v2u64) -> __v2u64;
|
||||
#[link_name = "llvm.loongarch.lsx.vssub.b"]
|
||||
fn __lsx_vssub_b(a: __v16i8, b: __v16i8) -> __v16i8;
|
||||
#[link_name = "llvm.loongarch.lsx.vssub.h"]
|
||||
fn __lsx_vssub_h(a: __v8i16, b: __v8i16) -> __v8i16;
|
||||
#[link_name = "llvm.loongarch.lsx.vssub.w"]
|
||||
fn __lsx_vssub_w(a: __v4i32, b: __v4i32) -> __v4i32;
|
||||
#[link_name = "llvm.loongarch.lsx.vssub.d"]
|
||||
fn __lsx_vssub_d(a: __v2i64, b: __v2i64) -> __v2i64;
|
||||
#[link_name = "llvm.loongarch.lsx.vssub.bu"]
|
||||
fn __lsx_vssub_bu(a: __v16u8, b: __v16u8) -> __v16u8;
|
||||
#[link_name = "llvm.loongarch.lsx.vssub.hu"]
|
||||
fn __lsx_vssub_hu(a: __v8u16, b: __v8u16) -> __v8u16;
|
||||
#[link_name = "llvm.loongarch.lsx.vssub.wu"]
|
||||
fn __lsx_vssub_wu(a: __v4u32, b: __v4u32) -> __v4u32;
|
||||
#[link_name = "llvm.loongarch.lsx.vssub.du"]
|
||||
fn __lsx_vssub_du(a: __v2u64, b: __v2u64) -> __v2u64;
|
||||
#[link_name = "llvm.loongarch.lsx.vabsd.b"]
|
||||
fn __lsx_vabsd_b(a: __v16i8, b: __v16i8) -> __v16i8;
|
||||
#[link_name = "llvm.loongarch.lsx.vabsd.h"]
|
||||
fn __lsx_vabsd_h(a: __v8i16, b: __v8i16) -> __v8i16;
|
||||
#[link_name = "llvm.loongarch.lsx.vabsd.w"]
|
||||
fn __lsx_vabsd_w(a: __v4i32, b: __v4i32) -> __v4i32;
|
||||
#[link_name = "llvm.loongarch.lsx.vabsd.d"]
|
||||
fn __lsx_vabsd_d(a: __v2i64, b: __v2i64) -> __v2i64;
|
||||
#[link_name = "llvm.loongarch.lsx.vabsd.bu"]
|
||||
fn __lsx_vabsd_bu(a: __v16u8, b: __v16u8) -> __v16u8;
|
||||
#[link_name = "llvm.loongarch.lsx.vabsd.hu"]
|
||||
fn __lsx_vabsd_hu(a: __v8u16, b: __v8u16) -> __v8u16;
|
||||
#[link_name = "llvm.loongarch.lsx.vabsd.wu"]
|
||||
fn __lsx_vabsd_wu(a: __v4u32, b: __v4u32) -> __v4u32;
|
||||
#[link_name = "llvm.loongarch.lsx.vabsd.du"]
|
||||
fn __lsx_vabsd_du(a: __v2u64, b: __v2u64) -> __v2u64;
|
||||
#[link_name = "llvm.loongarch.lsx.vhaddw.h.b"]
|
||||
fn __lsx_vhaddw_h_b(a: __v16i8, b: __v16i8) -> __v8i16;
|
||||
#[link_name = "llvm.loongarch.lsx.vhaddw.w.h"]
|
||||
@@ -243,22 +163,6 @@
|
||||
fn __lsx_vreplvei_w(a: __v4i32, b: u32) -> __v4i32;
|
||||
#[link_name = "llvm.loongarch.lsx.vreplvei.d"]
|
||||
fn __lsx_vreplvei_d(a: __v2i64, b: u32) -> __v2i64;
|
||||
#[link_name = "llvm.loongarch.lsx.vpickev.b"]
|
||||
fn __lsx_vpickev_b(a: __v16i8, b: __v16i8) -> __v16i8;
|
||||
#[link_name = "llvm.loongarch.lsx.vpickev.h"]
|
||||
fn __lsx_vpickev_h(a: __v8i16, b: __v8i16) -> __v8i16;
|
||||
#[link_name = "llvm.loongarch.lsx.vpickev.w"]
|
||||
fn __lsx_vpickev_w(a: __v4i32, b: __v4i32) -> __v4i32;
|
||||
#[link_name = "llvm.loongarch.lsx.vpickev.d"]
|
||||
fn __lsx_vpickev_d(a: __v2i64, b: __v2i64) -> __v2i64;
|
||||
#[link_name = "llvm.loongarch.lsx.vpickod.b"]
|
||||
fn __lsx_vpickod_b(a: __v16i8, b: __v16i8) -> __v16i8;
|
||||
#[link_name = "llvm.loongarch.lsx.vpickod.h"]
|
||||
fn __lsx_vpickod_h(a: __v8i16, b: __v8i16) -> __v8i16;
|
||||
#[link_name = "llvm.loongarch.lsx.vpickod.w"]
|
||||
fn __lsx_vpickod_w(a: __v4i32, b: __v4i32) -> __v4i32;
|
||||
#[link_name = "llvm.loongarch.lsx.vpickod.d"]
|
||||
fn __lsx_vpickod_d(a: __v2i64, b: __v2i64) -> __v2i64;
|
||||
#[link_name = "llvm.loongarch.lsx.vilvh.b"]
|
||||
fn __lsx_vilvh_b(a: __v16i8, b: __v16i8) -> __v16i8;
|
||||
#[link_name = "llvm.loongarch.lsx.vilvh.h"]
|
||||
@@ -1197,34 +1101,6 @@ pub fn lsx_vsrlri_d<const IMM6: u32>(a: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vsrlri_d(transmute(a), IMM6)) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lsx_vbitclr_b(a: m128i, b: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vbitclr_b(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lsx_vbitclr_h(a: m128i, b: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vbitclr_h(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lsx_vbitclr_w(a: m128i, b: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vbitclr_w(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lsx_vbitclr_d(a: m128i, b: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vbitclr_d(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
@@ -1261,34 +1137,6 @@ pub fn lsx_vbitclri_d<const IMM6: u32>(a: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vbitclri_d(transmute(a), IMM6)) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lsx_vbitset_b(a: m128i, b: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vbitset_b(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lsx_vbitset_h(a: m128i, b: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vbitset_h(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lsx_vbitset_w(a: m128i, b: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vbitset_w(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lsx_vbitset_d(a: m128i, b: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vbitset_d(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
@@ -1325,34 +1173,6 @@ pub fn lsx_vbitseti_d<const IMM6: u32>(a: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vbitseti_d(transmute(a), IMM6)) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lsx_vbitrev_b(a: m128i, b: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vbitrev_b(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lsx_vbitrev_h(a: m128i, b: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vbitrev_h(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lsx_vbitrev_w(a: m128i, b: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vbitrev_w(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lsx_vbitrev_d(a: m128i, b: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vbitrev_d(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
@@ -1497,90 +1317,6 @@ pub fn lsx_vsat_du<const IMM6: u32>(a: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vsat_du(transmute(a), IMM6)) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lsx_vadda_b(a: m128i, b: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vadda_b(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lsx_vadda_h(a: m128i, b: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vadda_h(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lsx_vadda_w(a: m128i, b: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vadda_w(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lsx_vadda_d(a: m128i, b: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vadda_d(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lsx_vsadd_b(a: m128i, b: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vsadd_b(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lsx_vsadd_h(a: m128i, b: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vsadd_h(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lsx_vsadd_w(a: m128i, b: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vsadd_w(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lsx_vsadd_d(a: m128i, b: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vsadd_d(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lsx_vsadd_bu(a: m128i, b: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vsadd_bu(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lsx_vsadd_hu(a: m128i, b: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vsadd_hu(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lsx_vsadd_wu(a: m128i, b: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vsadd_wu(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lsx_vsadd_du(a: m128i, b: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vsadd_du(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
@@ -1693,118 +1429,6 @@ pub fn lsx_vavgr_du(a: m128i, b: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vavgr_du(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lsx_vssub_b(a: m128i, b: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vssub_b(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lsx_vssub_h(a: m128i, b: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vssub_h(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lsx_vssub_w(a: m128i, b: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vssub_w(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lsx_vssub_d(a: m128i, b: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vssub_d(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lsx_vssub_bu(a: m128i, b: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vssub_bu(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lsx_vssub_hu(a: m128i, b: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vssub_hu(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lsx_vssub_wu(a: m128i, b: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vssub_wu(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lsx_vssub_du(a: m128i, b: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vssub_du(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lsx_vabsd_b(a: m128i, b: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vabsd_b(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lsx_vabsd_h(a: m128i, b: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vabsd_h(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lsx_vabsd_w(a: m128i, b: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vabsd_w(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lsx_vabsd_d(a: m128i, b: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vabsd_d(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lsx_vabsd_bu(a: m128i, b: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vabsd_bu(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lsx_vabsd_hu(a: m128i, b: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vabsd_hu(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lsx_vabsd_wu(a: m128i, b: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vabsd_wu(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lsx_vabsd_du(a: m128i, b: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vabsd_du(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
@@ -1953,62 +1577,6 @@ pub fn lsx_vreplvei_d<const IMM1: u32>(a: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vreplvei_d(transmute(a), IMM1)) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lsx_vpickev_b(a: m128i, b: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vpickev_b(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lsx_vpickev_h(a: m128i, b: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vpickev_h(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lsx_vpickev_w(a: m128i, b: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vpickev_w(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lsx_vpickev_d(a: m128i, b: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vpickev_d(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lsx_vpickod_b(a: m128i, b: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vpickod_b(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lsx_vpickod_h(a: m128i, b: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vpickod_h(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lsx_vpickod_w(a: m128i, b: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vpickod_w(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
pub fn lsx_vpickod_d(a: m128i, b: m128i) -> m128i {
|
||||
unsafe { transmute(__lsx_vpickod_d(transmute(a), transmute(b))) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "lsx")]
|
||||
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
|
||||
|
||||
@@ -5,6 +5,54 @@
|
||||
use crate::intrinsics::simd as is;
|
||||
use crate::mem::transmute;
|
||||
|
||||
#[inline(always)]
|
||||
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
|
||||
const unsafe fn simd_pickev_b<T: Copy>(a: T, b: T) -> T {
|
||||
simd_shuffle!(b, a, [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30])
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
|
||||
const unsafe fn simd_pickev_h<T: Copy>(a: T, b: T) -> T {
|
||||
simd_shuffle!(b, a, [0, 2, 4, 6, 8, 10, 12, 14])
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
|
||||
const unsafe fn simd_pickev_w<T: Copy>(a: T, b: T) -> T {
|
||||
simd_shuffle!(b, a, [0, 2, 4, 6])
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
|
||||
const unsafe fn simd_pickev_d<T: Copy>(a: T, b: T) -> T {
|
||||
simd_shuffle!(b, a, [0, 2])
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
|
||||
const unsafe fn simd_pickod_b<T: Copy>(a: T, b: T) -> T {
|
||||
simd_shuffle!(b, a, [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31])
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
|
||||
const unsafe fn simd_pickod_h<T: Copy>(a: T, b: T) -> T {
|
||||
simd_shuffle!(b, a, [1, 3, 5, 7, 9, 11, 13, 15])
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
|
||||
const unsafe fn simd_pickod_w<T: Copy>(a: T, b: T) -> T {
|
||||
simd_shuffle!(b, a, [1, 3, 5, 7])
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
|
||||
const unsafe fn simd_pickod_d<T: Copy>(a: T, b: T) -> T {
|
||||
simd_shuffle!(b, a, [1, 3])
|
||||
}
|
||||
|
||||
impl_vv!("lsx", lsx_vpcnt_b, is::simd_ctpop, m128i, i8x16);
|
||||
impl_vv!("lsx", lsx_vpcnt_h, is::simd_ctpop, m128i, i16x8);
|
||||
impl_vv!("lsx", lsx_vpcnt_w, is::simd_ctpop, m128i, i32x4);
|
||||
@@ -120,6 +168,54 @@
|
||||
impl_vvv!("lsx", lsx_vsrl_h, ls::simd_shr, m128i, u16x8);
|
||||
impl_vvv!("lsx", lsx_vsrl_w, ls::simd_shr, m128i, u32x4);
|
||||
impl_vvv!("lsx", lsx_vsrl_d, ls::simd_shr, m128i, u64x2);
|
||||
impl_vvv!("lsx", lsx_vbitclr_b, ls::simd_bitclr, m128i, u8x16);
|
||||
impl_vvv!("lsx", lsx_vbitclr_h, ls::simd_bitclr, m128i, u16x8);
|
||||
impl_vvv!("lsx", lsx_vbitclr_w, ls::simd_bitclr, m128i, u32x4);
|
||||
impl_vvv!("lsx", lsx_vbitclr_d, ls::simd_bitclr, m128i, u64x2);
|
||||
impl_vvv!("lsx", lsx_vbitset_b, ls::simd_bitset, m128i, u8x16);
|
||||
impl_vvv!("lsx", lsx_vbitset_h, ls::simd_bitset, m128i, u16x8);
|
||||
impl_vvv!("lsx", lsx_vbitset_w, ls::simd_bitset, m128i, u32x4);
|
||||
impl_vvv!("lsx", lsx_vbitset_d, ls::simd_bitset, m128i, u64x2);
|
||||
impl_vvv!("lsx", lsx_vbitrev_b, ls::simd_bitrev, m128i, u8x16);
|
||||
impl_vvv!("lsx", lsx_vbitrev_h, ls::simd_bitrev, m128i, u16x8);
|
||||
impl_vvv!("lsx", lsx_vbitrev_w, ls::simd_bitrev, m128i, u32x4);
|
||||
impl_vvv!("lsx", lsx_vbitrev_d, ls::simd_bitrev, m128i, u64x2);
|
||||
impl_vvv!("lsx", lsx_vsadd_b, is::simd_saturating_add, m128i, i8x16);
|
||||
impl_vvv!("lsx", lsx_vsadd_h, is::simd_saturating_add, m128i, i16x8);
|
||||
impl_vvv!("lsx", lsx_vsadd_w, is::simd_saturating_add, m128i, i32x4);
|
||||
impl_vvv!("lsx", lsx_vsadd_d, is::simd_saturating_add, m128i, i64x2);
|
||||
impl_vvv!("lsx", lsx_vsadd_bu, is::simd_saturating_add, m128i, u8x16);
|
||||
impl_vvv!("lsx", lsx_vsadd_hu, is::simd_saturating_add, m128i, u16x8);
|
||||
impl_vvv!("lsx", lsx_vsadd_wu, is::simd_saturating_add, m128i, u32x4);
|
||||
impl_vvv!("lsx", lsx_vsadd_du, is::simd_saturating_add, m128i, u64x2);
|
||||
impl_vvv!("lsx", lsx_vssub_b, is::simd_saturating_sub, m128i, i8x16);
|
||||
impl_vvv!("lsx", lsx_vssub_h, is::simd_saturating_sub, m128i, i16x8);
|
||||
impl_vvv!("lsx", lsx_vssub_w, is::simd_saturating_sub, m128i, i32x4);
|
||||
impl_vvv!("lsx", lsx_vssub_d, is::simd_saturating_sub, m128i, i64x2);
|
||||
impl_vvv!("lsx", lsx_vssub_bu, is::simd_saturating_sub, m128i, u8x16);
|
||||
impl_vvv!("lsx", lsx_vssub_hu, is::simd_saturating_sub, m128i, u16x8);
|
||||
impl_vvv!("lsx", lsx_vssub_wu, is::simd_saturating_sub, m128i, u32x4);
|
||||
impl_vvv!("lsx", lsx_vssub_du, is::simd_saturating_sub, m128i, u64x2);
|
||||
impl_vvv!("lsx", lsx_vadda_b, ls::simd_adda, m128i, i8x16);
|
||||
impl_vvv!("lsx", lsx_vadda_h, ls::simd_adda, m128i, i16x8);
|
||||
impl_vvv!("lsx", lsx_vadda_w, ls::simd_adda, m128i, i32x4);
|
||||
impl_vvv!("lsx", lsx_vadda_d, ls::simd_adda, m128i, i64x2);
|
||||
impl_vvv!("lsx", lsx_vabsd_b, ls::simd_absd, m128i, i8x16);
|
||||
impl_vvv!("lsx", lsx_vabsd_h, ls::simd_absd, m128i, i16x8);
|
||||
impl_vvv!("lsx", lsx_vabsd_w, ls::simd_absd, m128i, i32x4);
|
||||
impl_vvv!("lsx", lsx_vabsd_d, ls::simd_absd, m128i, i64x2);
|
||||
impl_vvv!("lsx", lsx_vabsd_bu, ls::simd_absd, m128i, u8x16);
|
||||
impl_vvv!("lsx", lsx_vabsd_hu, ls::simd_absd, m128i, u16x8);
|
||||
impl_vvv!("lsx", lsx_vabsd_wu, ls::simd_absd, m128i, u32x4);
|
||||
impl_vvv!("lsx", lsx_vabsd_du, ls::simd_absd, m128i, u64x2);
|
||||
impl_vvv!("lsx", lsx_vpickev_b, simd_pickev_b, m128i, i8x16);
|
||||
impl_vvv!("lsx", lsx_vpickev_h, simd_pickev_h, m128i, i16x8);
|
||||
impl_vvv!("lsx", lsx_vpickev_w, simd_pickev_w, m128i, i32x4);
|
||||
impl_vvv!("lsx", lsx_vpickev_d, simd_pickev_d, m128i, i64x2);
|
||||
impl_vvv!("lsx", lsx_vpickod_b, simd_pickod_b, m128i, i8x16);
|
||||
impl_vvv!("lsx", lsx_vpickod_h, simd_pickod_h, m128i, i16x8);
|
||||
impl_vvv!("lsx", lsx_vpickod_w, simd_pickod_w, m128i, i32x4);
|
||||
impl_vvv!("lsx", lsx_vpickod_d, simd_pickod_d, m128i, i64x2);
|
||||
|
||||
impl_vuv!("lsx", lsx_vslli_b, is::simd_shl, m128i, i8x16);
|
||||
impl_vuv!("lsx", lsx_vslli_h, is::simd_shl, m128i, i16x8);
|
||||
|
||||
@@ -45,12 +45,50 @@ unsafe fn splat(v: i64) -> Self {
|
||||
impl_simd_ext!(u64x2, u64);
|
||||
impl_simd_ext!(u64x4, u64);
|
||||
|
||||
#[inline(always)]
|
||||
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
|
||||
pub(crate) const unsafe fn simd_abs<T: Copy + const SimdExt>(a: T) -> T {
|
||||
let m: T = is::simd_lt(a, ls::simd_splat(0));
|
||||
is::simd_select(m, is::simd_neg(a), a)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
|
||||
pub(crate) const unsafe fn simd_absd<T: Copy>(a: T, b: T) -> T {
|
||||
let m: T = is::simd_gt(a, b);
|
||||
is::simd_select(m, is::simd_sub(a, b), is::simd_sub(b, a))
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
|
||||
pub(crate) const unsafe fn simd_adda<T: Copy + const SimdExt>(a: T, b: T) -> T {
|
||||
is::simd_add(ls::simd_abs(a), ls::simd_abs(b))
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
|
||||
pub(super) const unsafe fn simd_andn<T: Copy + const SimdExt>(a: T, b: T) -> T {
|
||||
is::simd_and(ls::simd_not(a), b)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
|
||||
pub(super) const unsafe fn simd_bitclr<T: Copy + const SimdExt>(a: T, b: T) -> T {
|
||||
ls::simd_andn(ls::simd_shl(ls::simd_splat(1), b), a)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
|
||||
pub(super) const unsafe fn simd_bitrev<T: Copy + const SimdExt>(a: T, b: T) -> T {
|
||||
is::simd_xor(ls::simd_shl(ls::simd_splat(1), b), a)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
|
||||
pub(super) const unsafe fn simd_bitset<T: Copy + const SimdExt>(a: T, b: T) -> T {
|
||||
is::simd_or(ls::simd_shl(ls::simd_splat(1), b), a)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
|
||||
pub(super) const unsafe fn simd_fmsub<T: Copy>(a: T, b: T, c: T) -> T {
|
||||
|
||||
@@ -1097,6 +1097,9 @@ pub const fn _mm256_cvtsi256_si32(a: __m256i) -> i32 {
|
||||
|
||||
/// Zeroes the contents of all XMM or YMM registers.
|
||||
///
|
||||
/// This operation is purely a performance hint for the CPU and has no effect on the Abstract
|
||||
/// Machine state.
|
||||
///
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zeroall)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx")]
|
||||
@@ -1109,6 +1112,9 @@ pub fn _mm256_zeroall() {
|
||||
/// Zeroes the upper 128 bits of all YMM registers;
|
||||
/// the lower 128-bits of the registers are unmodified.
|
||||
///
|
||||
/// This operation is purely a performance hint for the CPU and has no effect on the Abstract
|
||||
/// Machine state.
|
||||
///
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zeroupper)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx")]
|
||||
@@ -4007,13 +4013,11 @@ const fn test_mm256_cvtsi256_si32() {
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx")]
|
||||
#[cfg_attr(miri, ignore)] // Register-level operation not supported by Miri
|
||||
fn test_mm256_zeroall() {
|
||||
_mm256_zeroall();
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx")]
|
||||
#[cfg_attr(miri, ignore)] // Register-level operation not supported by Miri
|
||||
fn test_mm256_zeroupper() {
|
||||
_mm256_zeroupper();
|
||||
}
|
||||
@@ -4484,7 +4488,7 @@ fn test_mm256_lddqu_si256() {
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx")]
|
||||
#[cfg_attr(miri, ignore)] // Non-temporal store, which is not supported by Miri
|
||||
#[cfg_attr(miri, ignore)] // Inline asm (for non-temporal store), which is not supported by Miri
|
||||
fn test_mm256_stream_si256() {
|
||||
let a = _mm256_setr_epi64x(1, 2, 3, 4);
|
||||
let mut r = _mm256_undefined_si256();
|
||||
@@ -4496,7 +4500,7 @@ fn test_mm256_stream_si256() {
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx")]
|
||||
#[cfg_attr(miri, ignore)] // Non-temporal store, which is not supported by Miri
|
||||
#[cfg_attr(miri, ignore)] // Inline asm (for non-temporal store), which is not supported by Miri
|
||||
fn test_mm256_stream_pd() {
|
||||
#[repr(align(32))]
|
||||
struct Memory {
|
||||
@@ -4515,7 +4519,7 @@ struct Memory {
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx")]
|
||||
#[cfg_attr(miri, ignore)] // Non-temporal store, which is not supported by Miri
|
||||
#[cfg_attr(miri, ignore)] // Inline asm (for non-temporal store), which is not supported by Miri
|
||||
fn test_mm256_stream_ps() {
|
||||
#[repr(align(32))]
|
||||
struct Memory {
|
||||
|
||||
@@ -58246,7 +58246,7 @@ const fn test_mm_mask_testn_epi32_mask() {
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
#[cfg_attr(miri, ignore)]
|
||||
#[cfg_attr(miri, ignore)] // Inline asm (for non-temporal store), which is not supported by Miri
|
||||
fn test_mm512_stream_ps() {
|
||||
#[repr(align(64))]
|
||||
struct Memory {
|
||||
@@ -58265,7 +58265,7 @@ struct Memory {
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
#[cfg_attr(miri, ignore)]
|
||||
#[cfg_attr(miri, ignore)] // Inline asm (for non-temporal store), which is not supported by Miri
|
||||
fn test_mm512_stream_pd() {
|
||||
#[repr(align(64))]
|
||||
struct Memory {
|
||||
@@ -58284,7 +58284,7 @@ struct Memory {
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
#[cfg_attr(miri, ignore)]
|
||||
#[cfg_attr(miri, ignore)] // Inline asm (for non-temporal store), which is not supported by Miri
|
||||
fn test_mm512_stream_si512() {
|
||||
#[repr(align(64))]
|
||||
struct Memory {
|
||||
|
||||
@@ -45,23 +45,35 @@ _mm_set1_pch
|
||||
_tpause
|
||||
_umwait
|
||||
|
||||
# IMM8 must be an even number in the range `0..=62`
|
||||
_mm_sm3rnds2_epi32
|
||||
|
||||
# SDE ERROR: Cannot execute XGETBV with ECX != 0
|
||||
_xgetbv
|
||||
|
||||
# top bits are undefined, unclear how to test these
|
||||
_mm256_castph128_ph256
|
||||
_mm256_castps128_ps256
|
||||
_mm256_castpd128_pd256
|
||||
_mm256_castsi128_si256
|
||||
|
||||
_mm512_castph128_ph512
|
||||
_mm512_castps128_ps512
|
||||
_mm512_castpd128_pd512
|
||||
_mm512_castsi128_si512
|
||||
|
||||
_mm512_castph256_ph512
|
||||
_mm512_castps256_ps512
|
||||
_mm512_castpd256_pd512
|
||||
_mm512_castsi256_si512
|
||||
|
||||
# Clang bug
|
||||
_mm256_extract_epi16
|
||||
_mm256_extract_epi8
|
||||
_mm512_mask_reduce_max_pd
|
||||
_mm512_mask_reduce_max_ps
|
||||
_mm512_mask_reduce_min_pd
|
||||
_mm512_mask_reduce_min_ps
|
||||
_mm_extract_epi16
|
||||
_mm_extract_epi8
|
||||
|
||||
# Rounding errors in release mode
|
||||
_mm_maskz_fmadd_sd
|
||||
_mm_maskz_fmadd_ss
|
||||
_mm_maskz_fmsub_sd
|
||||
_mm_maskz_fmsub_ss
|
||||
_mm_maskz_fnmadd_sd
|
||||
_mm_maskz_fnmadd_ss
|
||||
|
||||
@@ -1,51 +0,0 @@
|
||||
use crate::common::cli::ProcessedCli;
|
||||
use crate::common::compile_c::{CompilationCommandBuilder, CppCompilation};
|
||||
|
||||
pub fn build_cpp_compilation(config: &ProcessedCli) -> Option<CppCompilation> {
|
||||
let cpp_compiler = config.cpp_compiler.as_ref()?;
|
||||
|
||||
// -ffp-contract=off emulates Rust's approach of not fusing separate mul-add operations
|
||||
let mut command = CompilationCommandBuilder::new()
|
||||
.add_arch_flags(["armv8.6-a", "crypto", "crc", "dotprod", "fp16"])
|
||||
.set_compiler(cpp_compiler)
|
||||
.set_target(&config.target)
|
||||
.set_opt_level("2")
|
||||
.set_cxx_toolchain_dir(config.cxx_toolchain_dir.as_deref())
|
||||
.set_project_root("c_programs")
|
||||
.add_extra_flags(["-ffp-contract=off", "-Wno-narrowing"]);
|
||||
|
||||
if !config.target.contains("v7") {
|
||||
command = command.add_arch_flags(["faminmax", "lut", "sha3", "fp8"]);
|
||||
}
|
||||
|
||||
if !cpp_compiler.contains("clang") {
|
||||
command = command.add_extra_flag("-flax-vector-conversions");
|
||||
}
|
||||
|
||||
let mut cpp_compiler = command.into_cpp_compilation();
|
||||
|
||||
if config.target.contains("aarch64_be") {
|
||||
let Some(ref cxx_toolchain_dir) = config.cxx_toolchain_dir else {
|
||||
panic!(
|
||||
"target `{}` must specify `cxx_toolchain_dir`",
|
||||
config.target
|
||||
)
|
||||
};
|
||||
|
||||
cpp_compiler.command_mut().args([
|
||||
&format!("--sysroot={cxx_toolchain_dir}/aarch64_be-none-linux-gnu/libc"),
|
||||
"--include-directory",
|
||||
&format!("{cxx_toolchain_dir}/aarch64_be-none-linux-gnu/include/c++/14.3.1"),
|
||||
"--include-directory",
|
||||
&format!("{cxx_toolchain_dir}/aarch64_be-none-linux-gnu/include/c++/14.3.1/aarch64_be-none-linux-gnu"),
|
||||
"-L",
|
||||
&format!("{cxx_toolchain_dir}/lib/gcc/aarch64_be-none-linux-gnu/14.3.1"),
|
||||
"-L",
|
||||
&format!("{cxx_toolchain_dir}/aarch64_be-none-linux-gnu/libc/usr/lib"),
|
||||
"-B",
|
||||
&format!("{cxx_toolchain_dir}/lib/gcc/aarch64_be-none-linux-gnu/14.3.1"),
|
||||
]);
|
||||
}
|
||||
|
||||
Some(cpp_compiler)
|
||||
}
|
||||
@@ -3,51 +3,6 @@
|
||||
// test are derived from a JSON specification, published under the same license as the
|
||||
// `intrinsic-test` crate.\n";
|
||||
|
||||
pub const PLATFORM_C_FORWARD_DECLARATIONS: &str = r#"
|
||||
#ifdef __aarch64__
|
||||
std::ostream& operator<<(std::ostream& os, poly128_t value);
|
||||
#endif
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, float16_t value);
|
||||
std::ostream& operator<<(std::ostream& os, uint8_t value);
|
||||
|
||||
// T1 is the `To` type, T2 is the `From` type
|
||||
template<typename T1, typename T2> T1 cast(T2 x) {
|
||||
static_assert(sizeof(T1) == sizeof(T2), "sizeof T1 and T2 must be the same");
|
||||
T1 ret{};
|
||||
memcpy(&ret, &x, sizeof(T1));
|
||||
return ret;
|
||||
}
|
||||
"#;
|
||||
|
||||
pub const PLATFORM_C_DEFINITIONS: &str = r#"
|
||||
#ifdef __aarch64__
|
||||
std::ostream& operator<<(std::ostream& os, poly128_t value) {
|
||||
std::stringstream temp;
|
||||
do {
|
||||
int n = value % 10;
|
||||
value /= 10;
|
||||
temp << n;
|
||||
} while (value != 0);
|
||||
std::string tempstr(temp.str());
|
||||
std::string res(tempstr.rbegin(), tempstr.rend());
|
||||
os << res;
|
||||
return os;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, float16_t value) {
|
||||
os << static_cast<float>(value);
|
||||
return os;
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, uint8_t value) {
|
||||
os << (unsigned int) value;
|
||||
return os;
|
||||
}
|
||||
"#;
|
||||
|
||||
pub const PLATFORM_RUST_DEFINITIONS: &str = "";
|
||||
|
||||
pub const PLATFORM_RUST_CFGS: &str = r#"
|
||||
@@ -61,7 +16,6 @@
|
||||
#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_feat_lut))]
|
||||
#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_fp8))]
|
||||
#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(faminmax))]
|
||||
#![feature(fmt_helpers_for_derive)]
|
||||
#![feature(stdarch_neon_f16)]
|
||||
|
||||
#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
mod argument;
|
||||
mod compile;
|
||||
mod config;
|
||||
mod intrinsic;
|
||||
mod json_parser;
|
||||
@@ -7,7 +6,6 @@
|
||||
|
||||
use crate::common::SupportedArchitectureTest;
|
||||
use crate::common::cli::ProcessedCli;
|
||||
use crate::common::compile_c::CppCompilation;
|
||||
use crate::common::intrinsic::Intrinsic;
|
||||
use crate::common::intrinsic_helpers::TypeKind;
|
||||
use intrinsic::ArmIntrinsicType;
|
||||
@@ -15,16 +13,11 @@
|
||||
|
||||
pub struct ArmArchitectureTest {
|
||||
intrinsics: Vec<Intrinsic<ArmIntrinsicType>>,
|
||||
cli_options: ProcessedCli,
|
||||
}
|
||||
|
||||
impl SupportedArchitectureTest for ArmArchitectureTest {
|
||||
type IntrinsicImpl = ArmIntrinsicType;
|
||||
|
||||
fn cli_options(&self) -> &ProcessedCli {
|
||||
&self.cli_options
|
||||
}
|
||||
|
||||
fn intrinsics(&self) -> &[Intrinsic<ArmIntrinsicType>] {
|
||||
&self.intrinsics
|
||||
}
|
||||
@@ -32,18 +25,16 @@ fn intrinsics(&self) -> &[Intrinsic<ArmIntrinsicType>] {
|
||||
const NOTICE: &str = config::NOTICE;
|
||||
|
||||
const PLATFORM_C_HEADERS: &[&str] = &["arm_neon.h", "arm_acle.h", "arm_fp16.h"];
|
||||
const PLATFORM_C_DEFINITIONS: &str = config::PLATFORM_C_DEFINITIONS;
|
||||
const PLATFORM_C_FORWARD_DECLARATIONS: &str = config::PLATFORM_C_FORWARD_DECLARATIONS;
|
||||
|
||||
const PLATFORM_RUST_DEFINITIONS: &str = config::PLATFORM_RUST_DEFINITIONS;
|
||||
const PLATFORM_RUST_CFGS: &str = config::PLATFORM_RUST_CFGS;
|
||||
|
||||
fn cpp_compilation(&self) -> Option<CppCompilation> {
|
||||
compile::build_cpp_compilation(&self.cli_options)
|
||||
fn arch_flags(&self) -> Vec<&str> {
|
||||
vec!["-march=armv8.6a+crypto+crc+dotprod+fp16"]
|
||||
}
|
||||
|
||||
fn create(cli_options: ProcessedCli) -> Self {
|
||||
let a32 = cli_options.target.contains("v7");
|
||||
let a32 = cli_options.target.starts_with("armv7");
|
||||
let mut intrinsics = get_neon_intrinsics(&cli_options.filename, &cli_options.target)
|
||||
.expect("Error parsing input file");
|
||||
|
||||
@@ -68,9 +59,6 @@ fn create(cli_options: ProcessedCli) -> Self {
|
||||
.take(sample_size)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
Self {
|
||||
intrinsics,
|
||||
cli_options,
|
||||
}
|
||||
Self { intrinsics }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,4 @@
|
||||
use super::intrinsic::ArmIntrinsicType;
|
||||
use crate::common::cli::Language;
|
||||
use crate::common::indentation::Indentation;
|
||||
use crate::common::intrinsic_helpers::{IntrinsicType, IntrinsicTypeDefinition, Sign, TypeKind};
|
||||
|
||||
impl IntrinsicTypeDefinition for ArmIntrinsicType {
|
||||
@@ -8,8 +6,8 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType {
|
||||
fn c_type(&self) -> String {
|
||||
let prefix = self.kind.c_prefix();
|
||||
|
||||
if let (Some(bit_len), simd_len, vec_len) = (self.bit_len, self.simd_len, self.vec_len) {
|
||||
match (simd_len, vec_len) {
|
||||
if let Some(bit_len) = self.bit_len {
|
||||
match (self.simd_len, self.vec_len) {
|
||||
(None, None) => format!("{prefix}{bit_len}_t"),
|
||||
(Some(simd), None) => format!("{prefix}{bit_len}x{simd}_t"),
|
||||
(Some(simd), Some(vec)) => format!("{prefix}{bit_len}x{simd}x{vec}_t"),
|
||||
@@ -20,19 +18,24 @@ fn c_type(&self) -> String {
|
||||
}
|
||||
}
|
||||
|
||||
fn c_single_vector_type(&self) -> String {
|
||||
if let (Some(bit_len), Some(simd_len)) = (self.bit_len, self.simd_len) {
|
||||
format!(
|
||||
"{prefix}{bit_len}x{simd_len}_t",
|
||||
prefix = self.kind.c_prefix()
|
||||
)
|
||||
fn rust_type(&self) -> String {
|
||||
let rust_prefix = self.kind.rust_prefix();
|
||||
let c_prefix = self.kind.c_prefix();
|
||||
|
||||
if let Some(bit_len) = self.bit_len {
|
||||
match (self.simd_len, self.vec_len) {
|
||||
(None, None) => format!("{rust_prefix}{bit_len}"),
|
||||
(Some(simd), None) => format!("{c_prefix}{bit_len}x{simd}_t"),
|
||||
(Some(simd), Some(vec)) => format!("{c_prefix}{bit_len}x{simd}x{vec}_t"),
|
||||
(None, Some(_)) => todo!("{self:#?}"), // Likely an invalid case
|
||||
}
|
||||
} else {
|
||||
unreachable!("Shouldn't be called on this type")
|
||||
todo!("{self:#?}")
|
||||
}
|
||||
}
|
||||
|
||||
/// Determines the load function for this type.
|
||||
fn get_load_function(&self, language: Language) -> String {
|
||||
fn get_load_function(&self) -> String {
|
||||
if let IntrinsicType {
|
||||
kind: k,
|
||||
bit_len: Some(bl),
|
||||
@@ -47,43 +50,8 @@ fn get_load_function(&self, language: Language) -> String {
|
||||
""
|
||||
};
|
||||
|
||||
let choose_workaround = language == Language::C && self.target.contains("v7");
|
||||
format!(
|
||||
"vld{len}{quad}_{type}{size}",
|
||||
type = match k {
|
||||
TypeKind::Int(Sign::Unsigned) => "u",
|
||||
TypeKind::Int(Sign::Signed) => "s",
|
||||
TypeKind::Float => "f",
|
||||
// The ACLE doesn't support 64-bit polynomial loads on Armv7
|
||||
// if armv7 and bl == 64, use "s", else "p"
|
||||
TypeKind::Poly => if choose_workaround && *bl == 64 {"s"} else {"p"},
|
||||
x => todo!("get_load_function TypeKind: {x:#?}"),
|
||||
},
|
||||
size = bl,
|
||||
quad = quad,
|
||||
len = vec_len.unwrap_or(1),
|
||||
)
|
||||
} else {
|
||||
todo!("get_load_function IntrinsicType: {self:#?}")
|
||||
}
|
||||
}
|
||||
|
||||
/// Determines the get lane function for this type.
|
||||
fn get_lane_function(&self) -> String {
|
||||
if let IntrinsicType {
|
||||
kind: k,
|
||||
bit_len: Some(bl),
|
||||
simd_len,
|
||||
..
|
||||
} = &self.data
|
||||
{
|
||||
let quad = if (simd_len.unwrap_or(1) * bl) > 64 {
|
||||
"q"
|
||||
} else {
|
||||
""
|
||||
};
|
||||
format!(
|
||||
"vget{quad}_lane_{type}{size}",
|
||||
type = match k {
|
||||
TypeKind::Int(Sign::Unsigned) => "u",
|
||||
TypeKind::Int(Sign::Signed) => "s",
|
||||
@@ -93,71 +61,12 @@ fn get_lane_function(&self) -> String {
|
||||
},
|
||||
size = bl,
|
||||
quad = quad,
|
||||
len = vec_len.unwrap_or(1),
|
||||
)
|
||||
} else {
|
||||
todo!("get_lane_function IntrinsicType: {self:#?}")
|
||||
todo!("get_load_function IntrinsicType: {self:#?}")
|
||||
}
|
||||
}
|
||||
|
||||
/// Generates a std::cout for the intrinsics results that will match the
|
||||
/// rust debug output format for the return type. The generated line assumes
|
||||
/// there is an int i in scope which is the current pass number.
|
||||
fn print_result_c(&self, indentation: Indentation, additional: &str) -> String {
|
||||
let lanes = if self.num_vectors() > 1 {
|
||||
(0..self.num_vectors())
|
||||
.map(|vector| {
|
||||
format!(
|
||||
r#""{ty}(" << {lanes} << ")""#,
|
||||
ty = self.c_single_vector_type(),
|
||||
lanes = (0..self.num_lanes())
|
||||
.map(move |idx| -> std::string::String {
|
||||
let lane_fn = self.get_lane_function();
|
||||
let final_cast = self.generate_final_type_cast();
|
||||
format!(
|
||||
"{final_cast}{lane_fn}(__return_value.val[{vector}], {idx})"
|
||||
)
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join(r#" << ", " << "#)
|
||||
)
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join(r#" << ", " << "#)
|
||||
} else if self.num_lanes() > 1 {
|
||||
(0..self.num_lanes())
|
||||
.map(|idx| -> std::string::String {
|
||||
let lane_fn = self.get_lane_function();
|
||||
let final_cast = self.generate_final_type_cast();
|
||||
format!("{final_cast}{lane_fn}(__return_value, {idx})")
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join(r#" << ", " << "#)
|
||||
} else {
|
||||
format!(
|
||||
"{promote}cast<{cast}>(__return_value)",
|
||||
cast = match self.kind() {
|
||||
TypeKind::Float if self.inner_size() == 16 => "float16_t".to_string(),
|
||||
TypeKind::Float if self.inner_size() == 32 => "float".to_string(),
|
||||
TypeKind::Float if self.inner_size() == 64 => "double".to_string(),
|
||||
TypeKind::Int(Sign::Signed) => format!("int{}_t", self.inner_size()),
|
||||
TypeKind::Int(Sign::Unsigned) => format!("uint{}_t", self.inner_size()),
|
||||
TypeKind::Poly => format!("poly{}_t", self.inner_size()),
|
||||
ty => todo!("print_result_c - Unknown type: {ty:#?}"),
|
||||
},
|
||||
promote = self.generate_final_type_cast(),
|
||||
)
|
||||
};
|
||||
|
||||
format!(
|
||||
r#"{indentation}std::cout << "Result {additional}-" << i+1 << ": {ty}" << std::fixed << std::setprecision(150) << {lanes} << "{close}" << std::endl;"#,
|
||||
ty = if self.is_simd() {
|
||||
format!("{}(", self.c_type())
|
||||
} else {
|
||||
String::from("")
|
||||
},
|
||||
close = if self.is_simd() { ")" } else { "" },
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl ArmIntrinsicType {
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
use super::cli::Language;
|
||||
use itertools::Itertools;
|
||||
|
||||
use crate::common::intrinsic_helpers::TypeKind;
|
||||
|
||||
use super::constraint::Constraint;
|
||||
use super::gen_rust::PASSES;
|
||||
use super::indentation::Indentation;
|
||||
use super::intrinsic_helpers::IntrinsicTypeDefinition;
|
||||
|
||||
@@ -49,32 +53,19 @@ pub fn has_constraint(&self) -> bool {
|
||||
self.constraint.is_some()
|
||||
}
|
||||
|
||||
/// The binding keyword (e.g. "const" or "let") for the array of possible test inputs.
|
||||
fn rust_vals_array_binding(&self) -> impl std::fmt::Display {
|
||||
if self.ty.is_rust_vals_array_const() {
|
||||
"const"
|
||||
} else {
|
||||
"let"
|
||||
}
|
||||
}
|
||||
|
||||
/// The name (e.g. "A_VALS" or "a_vals") for the array of possible test inputs.
|
||||
pub(crate) fn rust_vals_array_name(&self) -> impl std::fmt::Display {
|
||||
if self.ty.is_rust_vals_array_const() {
|
||||
let loads = crate::common::gen_rust::PASSES;
|
||||
format!(
|
||||
"{}_{ty}_{load_size}",
|
||||
self.name.to_uppercase(),
|
||||
ty = self.ty.rust_scalar_type(),
|
||||
load_size = self.ty.num_lanes() * self.ty.num_vectors() + loads - 1,
|
||||
)
|
||||
} else {
|
||||
format!("{}_vals", self.name.to_lowercase())
|
||||
}
|
||||
let loads = crate::common::gen_rust::PASSES;
|
||||
format!(
|
||||
"{ty}_{load_size}",
|
||||
ty = self.ty.rust_scalar_type().to_uppercase(),
|
||||
load_size = self.ty.num_lanes() * self.ty.num_vectors() + loads - 1,
|
||||
)
|
||||
}
|
||||
|
||||
fn as_call_param_c(&self) -> String {
|
||||
self.ty.as_call_param_c(&self.generate_name())
|
||||
pub(crate) fn pass_by_ref(&self) -> bool {
|
||||
// pass SIMD types and `f16` by reference
|
||||
self.is_simd() || (self.ty.kind() == TypeKind::Float && self.ty.inner_size() == 16)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -87,13 +78,50 @@ impl<T> ArgumentList<T>
|
||||
where
|
||||
T: IntrinsicTypeDefinition,
|
||||
{
|
||||
/// Converts the argument list into the call parameters for a C function call.
|
||||
/// e.g. this would generate something like `a, &b, c`
|
||||
pub fn as_call_param_c(&self) -> String {
|
||||
pub fn as_non_imm_arglist_c(&self) -> String {
|
||||
self.iter()
|
||||
.map(|arg| arg.as_call_param_c())
|
||||
.collect::<Vec<String>>()
|
||||
.join(", ")
|
||||
.filter(|arg| !arg.has_constraint())
|
||||
.format_with("", |arg, fmt| {
|
||||
if arg.pass_by_ref() {
|
||||
fmt(&format_args!(", const {}* {}", arg.to_c_type(), arg.name))
|
||||
} else {
|
||||
fmt(&format_args!(", {} {}", arg.to_c_type(), arg.name))
|
||||
}
|
||||
})
|
||||
.to_string()
|
||||
}
|
||||
|
||||
pub fn as_non_imm_arglist_rust(&self) -> String {
|
||||
self.iter()
|
||||
.filter(|arg| !arg.has_constraint())
|
||||
.format_with("", |arg, fmt| {
|
||||
if arg.pass_by_ref() {
|
||||
fmt(&format_args!(
|
||||
", {}: *const {}",
|
||||
arg.name,
|
||||
arg.ty.rust_type()
|
||||
))
|
||||
} else {
|
||||
fmt(&format_args!(", {}: {}", arg.name, arg.ty.rust_type()))
|
||||
}
|
||||
})
|
||||
.to_string()
|
||||
}
|
||||
|
||||
pub fn as_call_params_c(&self, imm_args: &[i64]) -> String {
|
||||
let mut imm_args = imm_args.iter();
|
||||
self.iter()
|
||||
.format_with(", ", |arg, fmt| {
|
||||
if arg.has_constraint() {
|
||||
fmt(&imm_args.next().unwrap())
|
||||
} else {
|
||||
if arg.pass_by_ref() {
|
||||
fmt(&"*")?;
|
||||
}
|
||||
fmt(&arg.name)
|
||||
}
|
||||
})
|
||||
.to_string()
|
||||
}
|
||||
|
||||
/// Converts the argument list into the call parameters for a Rust function.
|
||||
@@ -101,53 +129,21 @@ pub fn as_call_param_c(&self) -> String {
|
||||
pub fn as_call_param_rust(&self) -> String {
|
||||
self.iter()
|
||||
.filter(|a| !a.has_constraint())
|
||||
.map(|arg| arg.generate_name() + " as _")
|
||||
.collect::<Vec<String>>()
|
||||
.map(|arg| arg.generate_name())
|
||||
.join(", ")
|
||||
}
|
||||
|
||||
/// Creates a line for each argument that initializes an array for C from which `loads` argument
|
||||
/// values can be loaded as a sliding window.
|
||||
/// e.g `const int32x2_t a_vals = {0x3effffff, 0x3effffff, 0x3f7fffff}`, if loads=2.
|
||||
pub fn gen_arglists_c(
|
||||
&self,
|
||||
w: &mut impl std::io::Write,
|
||||
indentation: Indentation,
|
||||
loads: u32,
|
||||
) -> std::io::Result<()> {
|
||||
for arg in self.iter().filter(|&arg| !arg.has_constraint()) {
|
||||
// Setting the variables on an aligned boundary to make it easier to pick
|
||||
// functions (of a specific architecture) that would help load the values.
|
||||
writeln!(
|
||||
w,
|
||||
"{indentation}alignas(64) const {ty} {name}_vals[] = {values};",
|
||||
ty = arg.ty.c_scalar_type(),
|
||||
name = arg.generate_name(),
|
||||
values = arg.ty.populate_random(indentation, loads, &Language::C)
|
||||
)?
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Creates a line for each argument that initializes an array for Rust from which `loads` argument
|
||||
/// values can be loaded as a sliding window, e.g `const A_VALS: [u32; 20] = [...];`
|
||||
pub fn gen_arglists_rust(
|
||||
&self,
|
||||
w: &mut impl std::io::Write,
|
||||
indentation: Indentation,
|
||||
loads: u32,
|
||||
) -> std::io::Result<()> {
|
||||
for arg in self.iter().filter(|&arg| !arg.has_constraint()) {
|
||||
// Constants are defined globally.
|
||||
if arg.ty.is_rust_vals_array_const() {
|
||||
continue;
|
||||
}
|
||||
|
||||
Self::gen_arg_rust(arg, w, indentation, loads)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
pub fn as_c_call_param_rust(&self) -> String {
|
||||
self.iter()
|
||||
.filter(|a| !a.has_constraint())
|
||||
.map(|arg| {
|
||||
if arg.pass_by_ref() {
|
||||
format!(", &raw const {}", arg.generate_name())
|
||||
} else {
|
||||
format!(", {}", arg.generate_name())
|
||||
}
|
||||
})
|
||||
.join("")
|
||||
}
|
||||
|
||||
pub fn gen_arg_rust(
|
||||
@@ -158,39 +154,14 @@ pub fn gen_arg_rust(
|
||||
) -> std::io::Result<()> {
|
||||
writeln!(
|
||||
w,
|
||||
"{indentation}{bind} {name}: [{ty}; {load_size}] = {values};\n",
|
||||
bind = arg.rust_vals_array_binding(),
|
||||
"{indentation}static {name}: [{ty}; {load_size}] = {values};\n",
|
||||
name = arg.rust_vals_array_name(),
|
||||
ty = arg.ty.rust_scalar_type(),
|
||||
load_size = arg.ty.num_lanes() * arg.ty.num_vectors() + loads - 1,
|
||||
values = arg.ty.populate_random(indentation, loads, &Language::Rust)
|
||||
values = arg.ty.populate_random(indentation, loads)
|
||||
)
|
||||
}
|
||||
|
||||
/// Creates a line for each argument that initializes the argument from an array `[arg]_vals` at
|
||||
/// an offset `i` using a load intrinsic, in C.
|
||||
/// e.g `uint8x8_t a = vld1_u8(&a_vals[i]);`
|
||||
///
|
||||
/// ARM-specific
|
||||
pub fn load_values_c(&self, indentation: Indentation) -> String {
|
||||
self.iter()
|
||||
.filter(|&arg| !arg.has_constraint())
|
||||
.enumerate()
|
||||
.map(|(idx, arg)| {
|
||||
format!(
|
||||
"{indentation}{ty} {name} = cast<{ty}>({load}(&{name}_vals[i+{idx}]));\n",
|
||||
ty = arg.to_c_type(),
|
||||
name = arg.generate_name(),
|
||||
load = if arg.is_simd() {
|
||||
arg.ty.get_load_function(Language::C)
|
||||
} else {
|
||||
"*".to_string()
|
||||
}
|
||||
)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Creates a line for each argument that initializes the argument from array `[ARG]_VALS` at
|
||||
/// an offset `i` using a load intrinsic, in Rust.
|
||||
/// e.g `let a = vld1_u8(A_VALS.as_ptr().offset(i));`
|
||||
@@ -199,17 +170,20 @@ pub fn load_values_rust(&self, indentation: Indentation) -> String {
|
||||
.filter(|&arg| !arg.has_constraint())
|
||||
.enumerate()
|
||||
.map(|(idx, arg)| {
|
||||
let load = if arg.is_simd() {
|
||||
arg.ty.get_load_function(Language::Rust)
|
||||
if arg.is_simd() {
|
||||
format!(
|
||||
"{indentation}let {name} = {load}({vals_name}.as_ptr().add((i+{idx}) % {PASSES}) as _);\n",
|
||||
name = arg.generate_name(),
|
||||
vals_name = arg.rust_vals_array_name(),
|
||||
load = arg.ty.get_load_function(),
|
||||
)
|
||||
} else {
|
||||
"*".to_string()
|
||||
};
|
||||
let typecast = if load.len() > 2 { "as _" } else { "" };
|
||||
format!(
|
||||
"{indentation}let {name} = {load}({vals_name}.as_ptr().offset(i+{idx}){typecast});\n",
|
||||
name = arg.generate_name(),
|
||||
vals_name = arg.rust_vals_array_name(),
|
||||
)
|
||||
format!(
|
||||
"{indentation}let {name} = {vals_name}[(i+{idx}) % {PASSES}];\n",
|
||||
name = arg.generate_name(),
|
||||
vals_name = arg.rust_vals_array_name(),
|
||||
)
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
@@ -1,12 +1,6 @@
|
||||
use itertools::Itertools;
|
||||
use std::path::PathBuf;
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum Language {
|
||||
Rust,
|
||||
C,
|
||||
}
|
||||
|
||||
/// Intrinsic test tool
|
||||
#[derive(clap::Parser)]
|
||||
#[command(
|
||||
@@ -17,55 +11,21 @@ pub struct Cli {
|
||||
/// The input file containing the intrinsics
|
||||
pub input: PathBuf,
|
||||
|
||||
/// The rust toolchain to use for building the rust code
|
||||
#[arg(long)]
|
||||
pub toolchain: Option<String>,
|
||||
|
||||
/// The C++ compiler to use for compiling the c++ code
|
||||
#[arg(long, default_value_t = String::from("clang++"))]
|
||||
pub cppcompiler: String,
|
||||
|
||||
/// Run the C programs under emulation with this command
|
||||
#[arg(long)]
|
||||
pub runner: Option<String>,
|
||||
|
||||
/// Filename for a list of intrinsics to skip (one per line)
|
||||
#[arg(long)]
|
||||
pub skip: Option<PathBuf>,
|
||||
|
||||
/// Regenerate test programs, but don't build or run them
|
||||
#[arg(long)]
|
||||
pub generate_only: bool,
|
||||
|
||||
/// Pass a target the test suite
|
||||
#[arg(long, default_value_t = String::from("armv7-unknown-linux-gnueabihf"))]
|
||||
#[arg(long)]
|
||||
pub target: String,
|
||||
|
||||
/// Pass a profile (release, dev)
|
||||
#[arg(long, default_value_t = String::from("release"))]
|
||||
pub profile: String,
|
||||
|
||||
/// Set the linker
|
||||
#[arg(long)]
|
||||
pub linker: Option<String>,
|
||||
|
||||
/// Set the sysroot for the C++ compiler
|
||||
#[arg(long)]
|
||||
pub cxx_toolchain_dir: Option<String>,
|
||||
|
||||
#[arg(long, default_value_t = 100u8)]
|
||||
pub sample_percentage: u8,
|
||||
}
|
||||
|
||||
pub struct ProcessedCli {
|
||||
pub filename: PathBuf,
|
||||
pub toolchain: Option<String>,
|
||||
pub cpp_compiler: Option<String>,
|
||||
pub runner: String,
|
||||
pub target: String,
|
||||
pub profile: String,
|
||||
pub linker: Option<String>,
|
||||
pub cxx_toolchain_dir: Option<String>,
|
||||
pub skip: Vec<String>,
|
||||
pub sample_percentage: u8,
|
||||
}
|
||||
@@ -73,11 +33,7 @@ pub struct ProcessedCli {
|
||||
impl ProcessedCli {
|
||||
pub fn new(cli_options: Cli) -> Self {
|
||||
let filename = cli_options.input;
|
||||
let runner = cli_options.runner.unwrap_or_default();
|
||||
let target = cli_options.target;
|
||||
let profile = cli_options.profile;
|
||||
let linker = cli_options.linker;
|
||||
let cxx_toolchain_dir = cli_options.cxx_toolchain_dir;
|
||||
let sample_percentage = cli_options.sample_percentage;
|
||||
|
||||
let skip = if let Some(filename) = cli_options.skip {
|
||||
@@ -91,27 +47,8 @@ pub fn new(cli_options: Cli) -> Self {
|
||||
Default::default()
|
||||
};
|
||||
|
||||
let (toolchain, cpp_compiler) = if cli_options.generate_only {
|
||||
(None, None)
|
||||
} else {
|
||||
(
|
||||
Some(
|
||||
cli_options
|
||||
.toolchain
|
||||
.map_or_else(String::new, |t| format!("+{t}")),
|
||||
),
|
||||
Some(cli_options.cppcompiler),
|
||||
)
|
||||
};
|
||||
|
||||
Self {
|
||||
toolchain,
|
||||
cpp_compiler,
|
||||
runner,
|
||||
target,
|
||||
profile,
|
||||
linker,
|
||||
cxx_toolchain_dir,
|
||||
skip,
|
||||
filename,
|
||||
sample_percentage,
|
||||
|
||||
@@ -1,144 +0,0 @@
|
||||
use itertools::Itertools;
|
||||
use rayon::prelude::*;
|
||||
use std::{collections::HashMap, process::Command};
|
||||
|
||||
pub const INTRINSIC_DELIMITER: &str = "############";
|
||||
fn runner_command(runner: &str) -> Command {
|
||||
let mut it = runner.split_whitespace();
|
||||
let mut cmd = Command::new(it.next().unwrap());
|
||||
cmd.args(it);
|
||||
|
||||
cmd
|
||||
}
|
||||
|
||||
pub fn compare_outputs(
|
||||
intrinsic_name_list: &Vec<String>,
|
||||
runner: &str,
|
||||
target: &str,
|
||||
profile: &str,
|
||||
) -> bool {
|
||||
let profile_dir = match profile {
|
||||
"dev" => "debug",
|
||||
_ => "release",
|
||||
};
|
||||
|
||||
let (c, rust) = rayon::join(
|
||||
|| {
|
||||
runner_command(runner)
|
||||
.arg("./intrinsic-test-programs")
|
||||
.current_dir("c_programs")
|
||||
.output()
|
||||
},
|
||||
|| {
|
||||
runner_command(runner)
|
||||
.arg(format!(
|
||||
"./target/{target}/{profile_dir}/intrinsic-test-programs"
|
||||
))
|
||||
.current_dir("rust_programs")
|
||||
.output()
|
||||
},
|
||||
);
|
||||
let (c, rust) = match (c, rust) {
|
||||
(Ok(c), Ok(rust)) => (c, rust),
|
||||
failure => panic!("Failed to run: {failure:#?}"),
|
||||
};
|
||||
|
||||
if !c.status.success() {
|
||||
error!(
|
||||
"Failed to run C program.\nstdout: {stdout}\nstderr: {stderr}",
|
||||
stdout = std::str::from_utf8(&c.stdout).unwrap_or(""),
|
||||
stderr = std::str::from_utf8(&c.stderr).unwrap_or(""),
|
||||
);
|
||||
}
|
||||
|
||||
if !rust.status.success() {
|
||||
error!(
|
||||
"Failed to run Rust program.\nstdout: {stdout}\nstderr: {stderr}",
|
||||
stdout = std::str::from_utf8(&rust.stdout).unwrap_or(""),
|
||||
stderr = std::str::from_utf8(&rust.stderr).unwrap_or(""),
|
||||
);
|
||||
}
|
||||
|
||||
info!("Completed running C++ and Rust test binaries");
|
||||
let c = std::str::from_utf8(&c.stdout)
|
||||
.unwrap()
|
||||
.to_lowercase()
|
||||
.replace("-nan", "nan");
|
||||
let rust = std::str::from_utf8(&rust.stdout)
|
||||
.unwrap()
|
||||
.to_lowercase()
|
||||
.replace("-nan", "nan");
|
||||
|
||||
let c_output_map = c
|
||||
.split(INTRINSIC_DELIMITER)
|
||||
.filter_map(|output| output.trim().split_once("\n"))
|
||||
.collect::<HashMap<&str, &str>>();
|
||||
let rust_output_map = rust
|
||||
.split(INTRINSIC_DELIMITER)
|
||||
.filter_map(|output| output.trim().split_once("\n"))
|
||||
.collect::<HashMap<&str, &str>>();
|
||||
|
||||
assert!(!c_output_map.is_empty(), "No C intrinsic output found!");
|
||||
|
||||
let intrinsics = c_output_map
|
||||
.keys()
|
||||
.chain(rust_output_map.keys())
|
||||
.unique()
|
||||
.collect_vec();
|
||||
|
||||
info!("Comparing outputs");
|
||||
let intrinsics_diff_count = intrinsics
|
||||
.par_iter()
|
||||
.filter_map(|&&intrinsic| {
|
||||
let c_output = c_output_map.get(intrinsic).unwrap();
|
||||
let rust_output = rust_output_map.get(intrinsic).unwrap();
|
||||
if rust_output.eq(c_output) {
|
||||
None
|
||||
} else {
|
||||
let diff = diff::lines(c_output, rust_output);
|
||||
let diffs = diff
|
||||
.into_iter()
|
||||
.filter_map(|diff| match diff {
|
||||
diff::Result::Left(_) | diff::Result::Right(_) => Some(diff),
|
||||
diff::Result::Both(_, _) => None,
|
||||
})
|
||||
.collect_vec();
|
||||
if diffs.len() > 0 {
|
||||
Some((intrinsic, diffs))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
})
|
||||
.inspect(|(intrinsic, diffs)| {
|
||||
use std::io::Write;
|
||||
|
||||
let stdout = std::io::stdout();
|
||||
let mut out = stdout.lock();
|
||||
|
||||
writeln!(out, "Difference for intrinsic: {intrinsic}").unwrap();
|
||||
diffs.into_iter().for_each(|diff| match diff {
|
||||
diff::Result::Left(c) => {
|
||||
writeln!(out, "C: {c}").unwrap();
|
||||
}
|
||||
diff::Result::Right(rust) => {
|
||||
writeln!(out, "Rust: {rust}").unwrap();
|
||||
}
|
||||
_ => (),
|
||||
});
|
||||
writeln!(
|
||||
out,
|
||||
"****************************************************************"
|
||||
)
|
||||
.unwrap();
|
||||
})
|
||||
.count();
|
||||
|
||||
println!(
|
||||
"{} differences found (tested {} intrinsics)",
|
||||
intrinsics_diff_count,
|
||||
intrinsic_name_list.len()
|
||||
);
|
||||
|
||||
intrinsics_diff_count == 0
|
||||
}
|
||||
@@ -1,136 +0,0 @@
|
||||
#[derive(Clone)]
|
||||
pub struct CompilationCommandBuilder {
|
||||
compiler: String,
|
||||
target: Option<String>,
|
||||
cxx_toolchain_dir: Option<String>,
|
||||
arch_flags: Vec<String>,
|
||||
optimization: String,
|
||||
project_root: Option<String>,
|
||||
extra_flags: Vec<String>,
|
||||
}
|
||||
|
||||
impl CompilationCommandBuilder {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
compiler: String::new(),
|
||||
target: None,
|
||||
cxx_toolchain_dir: None,
|
||||
arch_flags: Vec::new(),
|
||||
optimization: "2".to_string(),
|
||||
project_root: None,
|
||||
extra_flags: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_compiler(mut self, compiler: &str) -> Self {
|
||||
self.compiler = compiler.to_string();
|
||||
self
|
||||
}
|
||||
|
||||
pub fn set_target(mut self, target: &str) -> Self {
|
||||
self.target = Some(target.to_string());
|
||||
self
|
||||
}
|
||||
|
||||
pub fn set_cxx_toolchain_dir(mut self, path: Option<&str>) -> Self {
|
||||
self.cxx_toolchain_dir = path.map(|p| p.to_string());
|
||||
self
|
||||
}
|
||||
|
||||
pub fn add_arch_flags<'a>(mut self, flags: impl IntoIterator<Item = &'a str>) -> Self {
|
||||
self.arch_flags
|
||||
.extend(flags.into_iter().map(|s| s.to_owned()));
|
||||
|
||||
self
|
||||
}
|
||||
|
||||
pub fn set_opt_level(mut self, optimization: &str) -> Self {
|
||||
self.optimization = optimization.to_string();
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets the root path of all the generated test files.
|
||||
pub fn set_project_root(mut self, path: &str) -> Self {
|
||||
self.project_root = Some(path.to_string());
|
||||
self
|
||||
}
|
||||
|
||||
pub fn add_extra_flags<'a>(mut self, flags: impl IntoIterator<Item = &'a str>) -> Self {
|
||||
self.extra_flags
|
||||
.extend(flags.into_iter().map(|s| s.to_owned()));
|
||||
|
||||
self
|
||||
}
|
||||
|
||||
pub fn add_extra_flag(self, flag: &str) -> Self {
|
||||
self.add_extra_flags([flag])
|
||||
}
|
||||
}
|
||||
|
||||
impl CompilationCommandBuilder {
|
||||
pub fn into_cpp_compilation(self) -> CppCompilation {
|
||||
let mut cpp_compiler = std::process::Command::new(self.compiler);
|
||||
|
||||
if let Some(project_root) = self.project_root {
|
||||
cpp_compiler.current_dir(project_root);
|
||||
}
|
||||
|
||||
let flags = std::env::var("CPPFLAGS").unwrap_or("".into());
|
||||
cpp_compiler.args(flags.split_whitespace());
|
||||
|
||||
cpp_compiler.arg(format!("-march={}", self.arch_flags.join("+")));
|
||||
|
||||
cpp_compiler.arg(format!("-O{}", self.optimization));
|
||||
|
||||
cpp_compiler.args(self.extra_flags);
|
||||
|
||||
if let Some(target) = &self.target {
|
||||
cpp_compiler.arg(format!("--target={target}"));
|
||||
}
|
||||
|
||||
CppCompilation(cpp_compiler)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct CppCompilation(std::process::Command);
|
||||
|
||||
fn clone_command(command: &std::process::Command) -> std::process::Command {
|
||||
let mut cmd = std::process::Command::new(command.get_program());
|
||||
if let Some(current_dir) = command.get_current_dir() {
|
||||
cmd.current_dir(current_dir);
|
||||
}
|
||||
cmd.args(command.get_args());
|
||||
|
||||
for (key, val) in command.get_envs() {
|
||||
cmd.env(key, val.unwrap_or_default());
|
||||
}
|
||||
|
||||
cmd
|
||||
}
|
||||
|
||||
impl CppCompilation {
|
||||
pub fn command_mut(&mut self) -> &mut std::process::Command {
|
||||
&mut self.0
|
||||
}
|
||||
|
||||
pub fn compile_object_file(
|
||||
&self,
|
||||
input: &str,
|
||||
output: &str,
|
||||
) -> std::io::Result<std::process::Output> {
|
||||
let mut cmd = clone_command(&self.0);
|
||||
cmd.args([input, "-v", "-c", "-o", output]);
|
||||
cmd.output()
|
||||
}
|
||||
|
||||
pub fn link_executable(
|
||||
&self,
|
||||
inputs: impl Iterator<Item = String>,
|
||||
output: &str,
|
||||
) -> std::io::Result<std::process::Output> {
|
||||
let mut cmd = clone_command(&self.0);
|
||||
cmd.args(inputs);
|
||||
cmd.args(["-o", output]);
|
||||
cmd.output()
|
||||
}
|
||||
}
|
||||
@@ -1,173 +1,42 @@
|
||||
use itertools::Itertools;
|
||||
|
||||
use crate::common::intrinsic::Intrinsic;
|
||||
|
||||
use super::argument::Argument;
|
||||
use super::compare::INTRINSIC_DELIMITER;
|
||||
use super::indentation::Indentation;
|
||||
use super::intrinsic_helpers::IntrinsicTypeDefinition;
|
||||
|
||||
// The number of times each intrinsic will be called.
|
||||
const PASSES: u32 = 20;
|
||||
const COMMON_HEADERS: [&str; 7] = [
|
||||
"iostream",
|
||||
"string",
|
||||
"cstring",
|
||||
"iomanip",
|
||||
"sstream",
|
||||
"type_traits",
|
||||
"cassert",
|
||||
];
|
||||
|
||||
pub fn generate_c_test_loop<T: IntrinsicTypeDefinition + Sized>(
|
||||
w: &mut impl std::io::Write,
|
||||
intrinsic: &Intrinsic<T>,
|
||||
indentation: Indentation,
|
||||
additional: &str,
|
||||
passes: u32,
|
||||
) -> std::io::Result<()> {
|
||||
let body_indentation = indentation.nested();
|
||||
// Successive arguments are offset increasingly from their value array start
|
||||
let passes = passes + 1
|
||||
- intrinsic
|
||||
.arguments
|
||||
.iter()
|
||||
.filter(|&arg| !arg.has_constraint())
|
||||
.count() as u32;
|
||||
writeln!(
|
||||
w,
|
||||
"{indentation}for (int i=0; i<{passes}; i++) {{\n\
|
||||
{loaded_args}\
|
||||
{body_indentation}auto __return_value = {intrinsic_call}({args});\n\
|
||||
{print_result}\n\
|
||||
{indentation}}}",
|
||||
loaded_args = intrinsic.arguments.load_values_c(body_indentation),
|
||||
intrinsic_call = intrinsic.name,
|
||||
args = intrinsic.arguments.as_call_param_c(),
|
||||
print_result = intrinsic
|
||||
.results
|
||||
.print_result_c(body_indentation, additional)
|
||||
)
|
||||
}
|
||||
|
||||
pub fn generate_c_constraint_blocks<'a, T: IntrinsicTypeDefinition + 'a>(
|
||||
w: &mut impl std::io::Write,
|
||||
intrinsic: &Intrinsic<T>,
|
||||
indentation: Indentation,
|
||||
constraints: &mut (impl Iterator<Item = &'a Argument<T>> + Clone),
|
||||
name: String,
|
||||
) -> std::io::Result<()> {
|
||||
let Some(current) = constraints.next() else {
|
||||
return generate_c_test_loop(w, intrinsic, indentation, &name, PASSES);
|
||||
};
|
||||
|
||||
let body_indentation = indentation.nested();
|
||||
for i in current.constraint.iter().flat_map(|c| c.iter()) {
|
||||
let ty = current.ty.c_type();
|
||||
|
||||
writeln!(w, "{indentation}{{")?;
|
||||
|
||||
// TODO: Move to actually specifying the enum value
|
||||
// instead of typecasting integers, for better clarity
|
||||
// of generated code.
|
||||
writeln!(
|
||||
w,
|
||||
"{body_indentation}const {ty} {} = ({ty}){i};",
|
||||
current.generate_name()
|
||||
)?;
|
||||
|
||||
generate_c_constraint_blocks(
|
||||
w,
|
||||
intrinsic,
|
||||
body_indentation,
|
||||
&mut constraints.clone(),
|
||||
format!("{name}-{i}"),
|
||||
)?;
|
||||
|
||||
writeln!(w, "{indentation}}}")?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Compiles C test programs using specified compiler
|
||||
pub fn create_c_test_function<T: IntrinsicTypeDefinition>(
|
||||
w: &mut impl std::io::Write,
|
||||
intrinsic: &Intrinsic<T>,
|
||||
) -> std::io::Result<()> {
|
||||
let indentation = Indentation::default();
|
||||
|
||||
writeln!(w, "int run_{}() {{", intrinsic.name)?;
|
||||
|
||||
// Define the arrays of arguments.
|
||||
let arguments = &intrinsic.arguments;
|
||||
arguments.gen_arglists_c(w, indentation.nested(), PASSES)?;
|
||||
|
||||
generate_c_constraint_blocks(
|
||||
w,
|
||||
intrinsic,
|
||||
indentation.nested(),
|
||||
&mut arguments.iter().rev().filter(|&i| i.has_constraint()),
|
||||
Default::default(),
|
||||
)?;
|
||||
|
||||
writeln!(w, " return 0;")?;
|
||||
writeln!(w, "}}")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn write_mod_cpp<T: IntrinsicTypeDefinition>(
|
||||
pub fn write_wrapper_c<T: IntrinsicTypeDefinition>(
|
||||
w: &mut impl std::io::Write,
|
||||
notice: &str,
|
||||
platform_headers: &[&str],
|
||||
forward_declarations: &str,
|
||||
intrinsics: &[Intrinsic<T>],
|
||||
) -> std::io::Result<()> {
|
||||
write!(w, "{notice}")?;
|
||||
|
||||
for header in COMMON_HEADERS.iter().chain(platform_headers.iter()) {
|
||||
writeln!(w, "#include <stdint.h>")?;
|
||||
writeln!(w, "#include <stddef.h>")?;
|
||||
|
||||
for header in platform_headers {
|
||||
writeln!(w, "#include <{header}>")?;
|
||||
}
|
||||
|
||||
writeln!(w, "{}", forward_declarations)?;
|
||||
|
||||
for intrinsic in intrinsics {
|
||||
create_c_test_function(w, intrinsic)?;
|
||||
intrinsic.iter_specializations(|imm_values| {
|
||||
writeln!(
|
||||
w,
|
||||
"
|
||||
void {name}_wrapper{imm_arglist}({return_ty}* __dst{arglist}) {{
|
||||
*__dst = {name}({params});
|
||||
}}",
|
||||
return_ty = intrinsic.results.c_type(),
|
||||
name = intrinsic.name,
|
||||
imm_arglist = imm_values
|
||||
.iter()
|
||||
.format_with("", |i, fmt| fmt(&format_args!("_{i}"))),
|
||||
arglist = intrinsic.arguments.as_non_imm_arglist_c(),
|
||||
params = intrinsic.arguments.as_call_params_c(&imm_values)
|
||||
)
|
||||
})?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn write_main_cpp<'a>(
|
||||
w: &mut impl std::io::Write,
|
||||
arch_specific_definitions: &str,
|
||||
arch_specific_headers: &[&str],
|
||||
intrinsics: impl Iterator<Item = &'a str> + Clone,
|
||||
) -> std::io::Result<()> {
|
||||
for header in COMMON_HEADERS.iter().chain(arch_specific_headers.iter()) {
|
||||
writeln!(w, "#include <{header}>")?;
|
||||
}
|
||||
|
||||
// NOTE: It's assumed that this value contains the required `ifdef`s.
|
||||
writeln!(w, "{arch_specific_definitions }")?;
|
||||
|
||||
for intrinsic in intrinsics.clone() {
|
||||
writeln!(w, "extern int run_{intrinsic}(void);")?;
|
||||
}
|
||||
|
||||
writeln!(w, "int main(int argc, char **argv) {{")?;
|
||||
|
||||
for intrinsic in intrinsics {
|
||||
writeln!(
|
||||
w,
|
||||
" std::cout << \"{INTRINSIC_DELIMITER}\" << std::endl;"
|
||||
)?;
|
||||
writeln!(w, " std::cout << \"{intrinsic}\" << std::endl;")?;
|
||||
writeln!(w, " run_{intrinsic}();\n")?;
|
||||
}
|
||||
|
||||
writeln!(w, " return 0;")?;
|
||||
|
||||
writeln!(w, "}}")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -1,23 +1,54 @@
|
||||
use itertools::Itertools;
|
||||
use std::process::Command;
|
||||
|
||||
use super::compare::INTRINSIC_DELIMITER;
|
||||
use super::indentation::Indentation;
|
||||
use super::intrinsic_helpers::IntrinsicTypeDefinition;
|
||||
use crate::common::argument::ArgumentList;
|
||||
use crate::common::intrinsic::Intrinsic;
|
||||
use crate::common::intrinsic_helpers::TypeKind;
|
||||
|
||||
// The number of times each intrinsic will be called.
|
||||
pub(crate) const PASSES: u32 = 20;
|
||||
|
||||
// we need a reflexive equality relation, so treat NaNs as equal
|
||||
const COMMON_RUST_DEFINITIONS: &str = r#"
|
||||
macro_rules! wrap_partialeq {
|
||||
($($wrapper:ident ($inner:ty)),*) => {$(
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
#[repr(transparent)]
|
||||
pub struct $wrapper($inner);
|
||||
|
||||
impl PartialEq for $wrapper {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.0 == other.0 || (self.0.is_nan() && other.0.is_nan())
|
||||
}
|
||||
}
|
||||
|
||||
impl Eq for $wrapper {}
|
||||
)*}
|
||||
}
|
||||
|
||||
wrap_partialeq!(NanEqF16(f16), NanEqF32(f32), NanEqF64(f64));
|
||||
"#;
|
||||
|
||||
macro_rules! concatln {
|
||||
($($lines:expr),* $(,)?) => {
|
||||
concat!($( $lines, "\n" ),*)
|
||||
};
|
||||
}
|
||||
|
||||
fn write_cargo_toml_header(w: &mut impl std::io::Write, name: &str) -> std::io::Result<()> {
|
||||
writeln!(
|
||||
pub fn write_bin_cargo_toml(
|
||||
w: &mut impl std::io::Write,
|
||||
module_count: usize,
|
||||
) -> std::io::Result<()> {
|
||||
write!(w, concatln!("[workspace]", "members = ["))?;
|
||||
for i in 0..module_count {
|
||||
writeln!(w, " \"mod_{i}\",")?;
|
||||
}
|
||||
writeln!(w, "]")
|
||||
}
|
||||
|
||||
pub fn write_lib_cargo_toml(w: &mut impl std::io::Write, name: &str) -> std::io::Result<()> {
|
||||
write!(
|
||||
w,
|
||||
concatln!(
|
||||
"[package]",
|
||||
@@ -26,6 +57,12 @@ fn write_cargo_toml_header(w: &mut impl std::io::Write, name: &str) -> std::io::
|
||||
"authors = [{authors}]",
|
||||
"license = \"{license}\"",
|
||||
"edition = \"2018\"",
|
||||
"",
|
||||
"[dependencies]",
|
||||
"core_arch = {{ path = \"../../crates/core_arch\" }}",
|
||||
"",
|
||||
"[build-dependencies]",
|
||||
"cc = \"1\""
|
||||
),
|
||||
name = name,
|
||||
version = env!("CARGO_PKG_VERSION"),
|
||||
@@ -36,72 +73,12 @@ fn write_cargo_toml_header(w: &mut impl std::io::Write, name: &str) -> std::io::
|
||||
)
|
||||
}
|
||||
|
||||
pub fn write_bin_cargo_toml(
|
||||
w: &mut impl std::io::Write,
|
||||
module_count: usize,
|
||||
) -> std::io::Result<()> {
|
||||
write_cargo_toml_header(w, "intrinsic-test-programs")?;
|
||||
|
||||
writeln!(w, "[dependencies]")?;
|
||||
writeln!(w, "core_arch = {{ path = \"../crates/core_arch\" }}")?;
|
||||
|
||||
for i in 0..module_count {
|
||||
writeln!(w, "mod_{i} = {{ path = \"mod_{i}/\" }}")?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn write_lib_cargo_toml(w: &mut impl std::io::Write, name: &str) -> std::io::Result<()> {
|
||||
write_cargo_toml_header(w, name)?;
|
||||
|
||||
writeln!(w, "[dependencies]")?;
|
||||
writeln!(w, "core_arch = {{ path = \"../../crates/core_arch\" }}")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn write_main_rs<'a>(
|
||||
w: &mut impl std::io::Write,
|
||||
chunk_count: usize,
|
||||
cfg: &str,
|
||||
definitions: &str,
|
||||
intrinsics: impl Iterator<Item = &'a str> + Clone,
|
||||
) -> std::io::Result<()> {
|
||||
writeln!(w, "#![feature(simd_ffi)]")?;
|
||||
writeln!(w, "#![feature(f16)]")?;
|
||||
writeln!(w, "#![allow(unused)]")?;
|
||||
|
||||
// Cargo will spam the logs if these warnings are not silenced.
|
||||
writeln!(w, "#![allow(non_upper_case_globals)]")?;
|
||||
writeln!(w, "#![allow(non_camel_case_types)]")?;
|
||||
writeln!(w, "#![allow(non_snake_case)]")?;
|
||||
|
||||
writeln!(w, "{cfg}")?;
|
||||
writeln!(w, "{definitions}")?;
|
||||
|
||||
for module in 0..chunk_count {
|
||||
writeln!(w, "use mod_{module}::*;")?;
|
||||
}
|
||||
|
||||
writeln!(w, "fn main() {{")?;
|
||||
|
||||
for binary in intrinsics {
|
||||
writeln!(w, " println!(\"{INTRINSIC_DELIMITER}\");")?;
|
||||
writeln!(w, " println!(\"{binary}\");")?;
|
||||
writeln!(w, " run_{binary}();\n")?;
|
||||
}
|
||||
|
||||
writeln!(w, "}}")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn write_lib_rs<T: IntrinsicTypeDefinition>(
|
||||
w: &mut impl std::io::Write,
|
||||
notice: &str,
|
||||
cfg: &str,
|
||||
definitions: &str,
|
||||
i: usize,
|
||||
intrinsics: &[Intrinsic<T>],
|
||||
) -> std::io::Result<()> {
|
||||
write!(w, "{notice}")?;
|
||||
@@ -117,13 +94,15 @@ pub fn write_lib_rs<T: IntrinsicTypeDefinition>(
|
||||
|
||||
writeln!(w, "{cfg}")?;
|
||||
|
||||
writeln!(w, "{}", COMMON_RUST_DEFINITIONS)?;
|
||||
|
||||
writeln!(w, "{definitions}")?;
|
||||
|
||||
let mut seen = std::collections::HashSet::new();
|
||||
|
||||
for intrinsic in intrinsics {
|
||||
for arg in &intrinsic.arguments.args {
|
||||
if !arg.has_constraint() && arg.ty.is_rust_vals_array_const() {
|
||||
if !arg.has_constraint() {
|
||||
let name = arg.rust_vals_array_name().to_string();
|
||||
|
||||
if seen.insert(name) {
|
||||
@@ -133,196 +112,189 @@ pub fn write_lib_rs<T: IntrinsicTypeDefinition>(
|
||||
}
|
||||
}
|
||||
|
||||
write_bindings_rust(w, i, intrinsics)?;
|
||||
|
||||
for intrinsic in intrinsics {
|
||||
crate::common::gen_rust::create_rust_test_module(w, intrinsic)?;
|
||||
create_rust_test(w, intrinsic)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn compile_rust_programs(
|
||||
toolchain: Option<&str>,
|
||||
target: &str,
|
||||
profile: &str,
|
||||
linker: Option<&str>,
|
||||
) -> bool {
|
||||
/* If there has been a linker explicitly set from the command line then
|
||||
* we want to set it via setting it in the RUSTFLAGS*/
|
||||
|
||||
// This is done because `toolchain` is None when
|
||||
// the --generate-only flag is passed
|
||||
if toolchain.is_none() {
|
||||
return true;
|
||||
}
|
||||
|
||||
trace!("Building cargo command");
|
||||
|
||||
let mut cargo_command = Command::new("cargo");
|
||||
cargo_command.current_dir("rust_programs");
|
||||
|
||||
// Do not use the target directory of the workspace please.
|
||||
cargo_command.env("CARGO_TARGET_DIR", "target");
|
||||
|
||||
if toolchain.is_some_and(|val| !val.is_empty()) {
|
||||
cargo_command.arg(toolchain.unwrap());
|
||||
}
|
||||
cargo_command.args(["build", "--target", target, "--profile", profile]);
|
||||
|
||||
let mut rust_flags = "-Cdebuginfo=0".to_string();
|
||||
if let Some(linker) = linker {
|
||||
rust_flags.push_str(" -C linker=");
|
||||
rust_flags.push_str(linker);
|
||||
rust_flags.push_str(" -C link-args=-static");
|
||||
|
||||
cargo_command.env("CPPFLAGS", "-fuse-ld=lld");
|
||||
}
|
||||
|
||||
cargo_command.env("RUSTFLAGS", rust_flags);
|
||||
|
||||
trace!("running cargo");
|
||||
|
||||
if log::log_enabled!(log::Level::Trace) {
|
||||
cargo_command.stdout(std::process::Stdio::inherit());
|
||||
cargo_command.stderr(std::process::Stdio::inherit());
|
||||
}
|
||||
|
||||
let output = cargo_command.output();
|
||||
trace!("cargo is done");
|
||||
|
||||
if let Ok(output) = output {
|
||||
if output.status.success() {
|
||||
true
|
||||
} else {
|
||||
error!(
|
||||
"Failed to compile code for rust intrinsics\n\nstdout:\n{}\n\nstderr:\n{}",
|
||||
std::str::from_utf8(&output.stdout).unwrap_or(""),
|
||||
std::str::from_utf8(&output.stderr).unwrap_or("")
|
||||
);
|
||||
false
|
||||
}
|
||||
} else {
|
||||
error!("Command failed: {output:#?}");
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
pub fn generate_rust_test_loop<T: IntrinsicTypeDefinition>(
|
||||
fn generate_rust_test_loop<T: IntrinsicTypeDefinition>(
|
||||
w: &mut impl std::io::Write,
|
||||
intrinsic: &Intrinsic<T>,
|
||||
indentation: Indentation,
|
||||
specializations: &[Vec<i32>],
|
||||
passes: u32,
|
||||
) -> std::io::Result<()> {
|
||||
let intrinsic_name = &intrinsic.name;
|
||||
let passes = passes + 1
|
||||
- intrinsic
|
||||
.arguments
|
||||
.iter()
|
||||
.filter(|&arg| !arg.has_constraint())
|
||||
.count() as u32;
|
||||
|
||||
// Each function (and each specialization) has its own type. Erase that type with a cast.
|
||||
let mut coerce = String::from("unsafe fn(");
|
||||
let mut coerce = String::from("fn(");
|
||||
let mut c_coerce = String::from("fn(_, ");
|
||||
for _ in intrinsic.arguments.iter().filter(|a| !a.has_constraint()) {
|
||||
coerce += "_, ";
|
||||
c_coerce += "_, ";
|
||||
}
|
||||
coerce += ") -> _";
|
||||
c_coerce += ")";
|
||||
|
||||
match specializations {
|
||||
[] => {
|
||||
writeln!(w, " let specializations = [(\"\", {intrinsic_name})];")?;
|
||||
}
|
||||
[const_args] if const_args.is_empty() => {
|
||||
writeln!(w, " let specializations = [(\"\", {intrinsic_name})];")?;
|
||||
}
|
||||
_ => {
|
||||
writeln!(w, " let specializations = [")?;
|
||||
if intrinsic
|
||||
.arguments
|
||||
.iter()
|
||||
.filter(|arg| arg.has_constraint())
|
||||
.count()
|
||||
== 0
|
||||
{
|
||||
writeln!(
|
||||
w,
|
||||
" let specializations = [(\"\", {intrinsic_name}, {intrinsic_name}_wrapper)];"
|
||||
)?;
|
||||
} else {
|
||||
writeln!(w, " let specializations = [")?;
|
||||
|
||||
for specialization in specializations {
|
||||
let mut specialization: Vec<_> =
|
||||
specialization.iter().map(|d| d.to_string()).collect();
|
||||
intrinsic.iter_specializations(|imm_values| {
|
||||
writeln!(
|
||||
w,
|
||||
" (\"{const_args}\", {intrinsic_name}::<{const_args}> as unsafe {coerce}, {intrinsic_name}_wrapper_{c_const_args} as unsafe extern \"C\" {c_coerce}),",
|
||||
const_args = imm_values.iter().join(","),
|
||||
c_const_args = imm_values.iter().join("_"),
|
||||
)
|
||||
})?;
|
||||
|
||||
let const_args = specialization.join(",");
|
||||
|
||||
// The identifier is reversed.
|
||||
specialization.reverse();
|
||||
let id = specialization.join("-");
|
||||
|
||||
writeln!(
|
||||
w,
|
||||
" (\"-{id}\", {intrinsic_name}::<{const_args}> as {coerce}),"
|
||||
)?;
|
||||
}
|
||||
|
||||
writeln!(w, " ];")?;
|
||||
}
|
||||
writeln!(w, " ];")?;
|
||||
}
|
||||
|
||||
let (cast_prefix, cast_suffix) = if intrinsic.results.is_simd() {
|
||||
(
|
||||
format!(
|
||||
"std::mem::transmute::<_, [{}; {}]>(",
|
||||
intrinsic.results.rust_scalar_type().replace("f", "NanEqF"),
|
||||
intrinsic.results.num_lanes() * intrinsic.results.num_vectors()
|
||||
),
|
||||
")",
|
||||
)
|
||||
} else if intrinsic.results.kind == TypeKind::Float {
|
||||
(
|
||||
match intrinsic.results.inner_size() {
|
||||
16 => format!("NanEqF16("),
|
||||
32 => format!("NanEqF32("),
|
||||
64 => format!("NanEqF64("),
|
||||
_ => unimplemented!(),
|
||||
},
|
||||
")",
|
||||
)
|
||||
} else {
|
||||
("".to_string(), "")
|
||||
};
|
||||
|
||||
write!(
|
||||
w,
|
||||
concatln!(
|
||||
" for (id, f) in specializations {{",
|
||||
" for (id, rust, c) in specializations {{",
|
||||
" for i in 0..{passes} {{",
|
||||
" unsafe {{",
|
||||
"{loaded_args}",
|
||||
" let __return_value = f({args});",
|
||||
" println!(\"Result {{id}}-{{}}: {{:?}}\", i + 1, {return_value});",
|
||||
" let __rust_return_value = rust({rust_args});",
|
||||
"",
|
||||
" let mut __c_return_value = std::mem::MaybeUninit::uninit();",
|
||||
" c(__c_return_value.as_mut_ptr(){c_args});",
|
||||
" let __c_return_value = __c_return_value.assume_init();",
|
||||
"",
|
||||
" assert_eq!({cast_prefix}__rust_return_value{cast_suffix}, {cast_prefix}__c_return_value{cast_suffix}, \"{{id}}\");",
|
||||
" }}",
|
||||
" }}",
|
||||
" }}",
|
||||
),
|
||||
loaded_args = intrinsic.arguments.load_values_rust(indentation.nest_by(4)),
|
||||
args = intrinsic.arguments.as_call_param_rust(),
|
||||
return_value = intrinsic.results.print_result_rust(),
|
||||
loaded_args = intrinsic
|
||||
.arguments
|
||||
.load_values_rust(Indentation::default().nest_by(4)),
|
||||
rust_args = intrinsic.arguments.as_call_param_rust(),
|
||||
c_args = intrinsic.arguments.as_c_call_param_rust(),
|
||||
passes = passes,
|
||||
cast_prefix = cast_prefix,
|
||||
cast_suffix = cast_suffix,
|
||||
)
|
||||
}
|
||||
|
||||
/// Generate the specializations (unique sequences of const-generic arguments) for this intrinsic.
|
||||
fn generate_rust_specializations(
|
||||
constraints: &mut impl Iterator<Item = impl Iterator<Item = i64>>,
|
||||
) -> Vec<Vec<i32>> {
|
||||
let mut specializations = vec![vec![]];
|
||||
|
||||
for constraint in constraints {
|
||||
specializations = constraint
|
||||
.flat_map(|right| {
|
||||
specializations.iter().map(move |left| {
|
||||
let mut left = left.clone();
|
||||
left.push(i32::try_from(right).unwrap());
|
||||
left
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
}
|
||||
|
||||
specializations
|
||||
}
|
||||
|
||||
// Top-level function to create complete test program
|
||||
pub fn create_rust_test_module<T: IntrinsicTypeDefinition>(
|
||||
fn create_rust_test<T: IntrinsicTypeDefinition>(
|
||||
w: &mut impl std::io::Write,
|
||||
intrinsic: &Intrinsic<T>,
|
||||
) -> std::io::Result<()> {
|
||||
trace!("generating `{}`", intrinsic.name);
|
||||
let indentation = Indentation::default();
|
||||
|
||||
writeln!(w, "pub fn run_{}() {{", intrinsic.name)?;
|
||||
write!(
|
||||
w,
|
||||
concatln!("#[test]", "fn test_{intrinsic_name}() {{"),
|
||||
intrinsic_name = intrinsic.name,
|
||||
)?;
|
||||
|
||||
// Define the arrays of arguments.
|
||||
let arguments = &intrinsic.arguments;
|
||||
arguments.gen_arglists_rust(w, indentation.nested(), PASSES)?;
|
||||
|
||||
// Define any const generics as `const` items, then generate the actual test loop.
|
||||
let specializations = generate_rust_specializations(
|
||||
&mut arguments
|
||||
.iter()
|
||||
.filter_map(|i| i.constraint.as_ref().map(|v| v.iter())),
|
||||
);
|
||||
|
||||
generate_rust_test_loop(w, intrinsic, indentation, &specializations, PASSES)?;
|
||||
generate_rust_test_loop(w, intrinsic, PASSES)?;
|
||||
|
||||
writeln!(w, "}}")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn write_bindings_rust<T: IntrinsicTypeDefinition>(
|
||||
w: &mut impl std::io::Write,
|
||||
i: usize,
|
||||
intrinsics: &[Intrinsic<T>],
|
||||
) -> std::io::Result<()> {
|
||||
write!(
|
||||
w,
|
||||
concatln!(
|
||||
"#[allow(improper_ctypes)]",
|
||||
"#[link(name = \"wrapper_{i}\")]",
|
||||
"unsafe extern \"C\" {{"
|
||||
),
|
||||
i = i
|
||||
)?;
|
||||
|
||||
for intrinsic in intrinsics {
|
||||
intrinsic.iter_specializations(|imm_values| {
|
||||
writeln!(
|
||||
w,
|
||||
" fn {name}_wrapper{imm_arglist}(__dst: *mut {return_ty}{arglist});",
|
||||
return_ty = intrinsic.results.rust_type(),
|
||||
name = intrinsic.name,
|
||||
imm_arglist = imm_values
|
||||
.iter()
|
||||
.format_with("", |i, fmt| fmt(&format_args!("_{i}"))),
|
||||
arglist = intrinsic.arguments.as_non_imm_arglist_rust(),
|
||||
)
|
||||
})?;
|
||||
}
|
||||
|
||||
writeln!(w, "}}")
|
||||
}
|
||||
|
||||
pub fn write_build_rs(
|
||||
w: &mut impl std::io::Write,
|
||||
i: usize,
|
||||
arch_flags: &[&str],
|
||||
) -> std::io::Result<()> {
|
||||
const COMMON_FLAGS: &[&str] = &["-ffp-contract=off", "-ffp-model=strict", "-Wno-narrowing"];
|
||||
|
||||
write!(
|
||||
w,
|
||||
concatln!(
|
||||
"fn main() {{",
|
||||
" cc::Build::new()",
|
||||
" .file(\"../../c_programs/wrapper_{i}.c\")",
|
||||
" .opt_level(2)",
|
||||
" .flags(&[",
|
||||
),
|
||||
i = i
|
||||
)?;
|
||||
|
||||
let indentation = Indentation::default().nest_by(2);
|
||||
for flag in COMMON_FLAGS.iter().chain(arch_flags) {
|
||||
writeln!(w, "{indentation}\"{flag}\",")?;
|
||||
}
|
||||
|
||||
write!(
|
||||
w,
|
||||
concatln!(" ])", " .compile(\"wrapper_{i}\");", "}}"),
|
||||
i = i
|
||||
)
|
||||
}
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
use crate::common::constraint::Constraint;
|
||||
|
||||
use super::argument::ArgumentList;
|
||||
use super::intrinsic_helpers::IntrinsicTypeDefinition;
|
||||
|
||||
@@ -16,3 +18,36 @@ pub struct Intrinsic<T: IntrinsicTypeDefinition> {
|
||||
/// Any architecture-specific tags.
|
||||
pub arch_tags: Vec<String>,
|
||||
}
|
||||
|
||||
fn recurse_specializations<'a, E>(
|
||||
constraints: &mut (impl Iterator<Item = &'a Constraint> + Clone),
|
||||
imm_values: &mut Vec<i64>,
|
||||
f: &mut impl FnMut(&[i64]) -> Result<(), E>,
|
||||
) -> Result<(), E> {
|
||||
if let Some(current) = constraints.next() {
|
||||
for i in current.iter() {
|
||||
imm_values.push(i);
|
||||
recurse_specializations(&mut constraints.clone(), imm_values, f)?;
|
||||
imm_values.pop();
|
||||
}
|
||||
Ok(())
|
||||
} else {
|
||||
f(&imm_values)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: IntrinsicTypeDefinition> Intrinsic<T> {
|
||||
pub fn iter_specializations<E>(
|
||||
&self,
|
||||
mut f: impl FnMut(&[i64]) -> Result<(), E>,
|
||||
) -> Result<(), E> {
|
||||
recurse_specializations(
|
||||
&mut self
|
||||
.arguments
|
||||
.iter()
|
||||
.filter_map(|arg| arg.constraint.as_ref()),
|
||||
&mut Vec::new(),
|
||||
&mut f,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,7 +5,6 @@
|
||||
|
||||
use itertools::Itertools as _;
|
||||
|
||||
use super::cli::Language;
|
||||
use super::indentation::Indentation;
|
||||
use super::values::value_for_array;
|
||||
|
||||
@@ -94,6 +93,7 @@ pub fn rust_prefix(&self) -> &str {
|
||||
Self::Poly => "u",
|
||||
Self::Char(Sign::Unsigned) => "u",
|
||||
Self::Char(Sign::Signed) => "i",
|
||||
Self::Mask => "u",
|
||||
_ => unreachable!("Unused type kind: {self:#?}"),
|
||||
}
|
||||
}
|
||||
@@ -154,67 +154,7 @@ pub fn is_ptr(&self) -> bool {
|
||||
self.ptr
|
||||
}
|
||||
|
||||
pub fn c_scalar_type(&self) -> String {
|
||||
match self.kind() {
|
||||
TypeKind::Char(_) => String::from("char"),
|
||||
TypeKind::Vector => String::from("int32_t"),
|
||||
_ => format!(
|
||||
"{prefix}{bits}_t",
|
||||
prefix = self.kind().c_prefix(),
|
||||
bits = self.inner_size()
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn c_promotion(&self) -> &str {
|
||||
match *self {
|
||||
IntrinsicType {
|
||||
kind,
|
||||
bit_len: Some(8),
|
||||
..
|
||||
} => match kind {
|
||||
TypeKind::Int(Sign::Signed) => "int",
|
||||
TypeKind::Int(Sign::Unsigned) => "unsigned int",
|
||||
TypeKind::Poly => "uint8_t",
|
||||
_ => "",
|
||||
},
|
||||
IntrinsicType {
|
||||
kind: TypeKind::Poly,
|
||||
bit_len: Some(bit_len),
|
||||
..
|
||||
} => match bit_len {
|
||||
8 => unreachable!("handled above"),
|
||||
16 => "uint16_t",
|
||||
32 => "uint32_t",
|
||||
64 => "uint64_t",
|
||||
128 => "",
|
||||
_ => panic!("invalid bit_len"),
|
||||
},
|
||||
IntrinsicType {
|
||||
kind: TypeKind::Float,
|
||||
bit_len: Some(bit_len),
|
||||
..
|
||||
} => match bit_len {
|
||||
16 => "float16_t",
|
||||
32 => "float",
|
||||
64 => "double",
|
||||
128 => "",
|
||||
_ => panic!("invalid bit_len"),
|
||||
},
|
||||
IntrinsicType {
|
||||
kind: TypeKind::Char(_),
|
||||
..
|
||||
} => "char",
|
||||
_ => "",
|
||||
}
|
||||
}
|
||||
|
||||
pub fn populate_random(
|
||||
&self,
|
||||
indentation: Indentation,
|
||||
loads: u32,
|
||||
language: &Language,
|
||||
) -> String {
|
||||
pub fn populate_random(&self, indentation: Indentation, loads: u32) -> String {
|
||||
match self {
|
||||
IntrinsicType {
|
||||
bit_len: Some(bit_len @ (1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 16 | 32 | 64)),
|
||||
@@ -224,13 +164,9 @@ pub fn populate_random(
|
||||
vec_len,
|
||||
..
|
||||
} => {
|
||||
let (prefix, suffix) = match language {
|
||||
Language::Rust => ('[', ']'),
|
||||
Language::C => ('{', '}'),
|
||||
};
|
||||
let body_indentation = indentation.nested();
|
||||
format!(
|
||||
"{prefix}\n{body}\n{indentation}{suffix}",
|
||||
"[\n{body}\n{indentation}]",
|
||||
body = (0..(simd_len.unwrap_or(1) * vec_len.unwrap_or(1) + loads - 1))
|
||||
.format_with(",\n", |i, fmt| {
|
||||
let src = value_for_array(*bit_len, i);
|
||||
@@ -241,13 +177,7 @@ pub fn populate_random(
|
||||
let mask = !0u64 >> (64 - *bit_len);
|
||||
let ones_compl = src ^ mask;
|
||||
let twos_compl = ones_compl + 1;
|
||||
if (twos_compl == src) && (language == &Language::C) {
|
||||
// `src` is INT*_MIN. C requires `-0x7fffffff - 1` to avoid
|
||||
// undefined literal overflow behaviour.
|
||||
fmt(&format_args!("{body_indentation}-{ones_compl:#x} - 1"))
|
||||
} else {
|
||||
fmt(&format_args!("{body_indentation}-{twos_compl:#x}"))
|
||||
}
|
||||
fmt(&format_args!("{body_indentation}-{twos_compl:#x}"))
|
||||
} else {
|
||||
fmt(&format_args!("{body_indentation}{src:#x}"))
|
||||
}
|
||||
@@ -261,20 +191,11 @@ pub fn populate_random(
|
||||
vec_len,
|
||||
..
|
||||
} => {
|
||||
let (prefix, cast_prefix, cast_suffix, suffix) = match (language, bit_len) {
|
||||
(&Language::Rust, 16) => ('[', "f16::from_bits(", ")", ']'),
|
||||
(&Language::Rust, 32) => ('[', "f32::from_bits(", ")", ']'),
|
||||
(&Language::Rust, 64) => ('[', "f64::from_bits(", ")", ']'),
|
||||
(&Language::C, 16) => ('{', "cast<float16_t, uint16_t>(", ")", '}'),
|
||||
(&Language::C, 32) => ('{', "cast<float, uint32_t>(", ")", '}'),
|
||||
(&Language::C, 64) => ('{', "cast<double, uint64_t>(", ")", '}'),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
format!(
|
||||
"{prefix}\n{body}\n{indentation}{suffix}",
|
||||
"[\n{body}\n{indentation}]",
|
||||
body = (0..(simd_len.unwrap_or(1) * vec_len.unwrap_or(1) + loads - 1))
|
||||
.format_with(",\n", |i, fmt| fmt(&format_args!(
|
||||
"{indentation}{cast_prefix}{src:#x}{cast_suffix}",
|
||||
"{indentation}f{bit_len}::from_bits({src:#x})",
|
||||
indentation = indentation.nested(),
|
||||
src = value_for_array(*bit_len, i)
|
||||
)))
|
||||
@@ -287,14 +208,10 @@ pub fn populate_random(
|
||||
vec_len,
|
||||
..
|
||||
} => {
|
||||
let (prefix, suffix) = match language {
|
||||
Language::Rust => ('[', ']'),
|
||||
Language::C => ('{', '}'),
|
||||
};
|
||||
let body_indentation = indentation.nested();
|
||||
let effective_bit_len = 32;
|
||||
format!(
|
||||
"{prefix}\n{body}\n{indentation}{suffix}",
|
||||
"[\n{body}\n{indentation}]",
|
||||
body = (0..(vec_len.unwrap_or(1) * simd_len.unwrap_or(1) + loads - 1))
|
||||
.format_with(",\n", |i, fmt| {
|
||||
let src = value_for_array(effective_bit_len, i);
|
||||
@@ -304,13 +221,7 @@ pub fn populate_random(
|
||||
let mask = !0u64 >> (64 - effective_bit_len);
|
||||
let ones_compl = src ^ mask;
|
||||
let twos_compl = ones_compl + 1;
|
||||
if (twos_compl == src) && (language == &Language::C) {
|
||||
// `src` is INT*_MIN. C requires `-0x7fffffff - 1` to avoid
|
||||
// undefined literal overflow behaviour.
|
||||
fmt(&format_args!("{body_indentation}-{ones_compl:#x} - 1"))
|
||||
} else {
|
||||
fmt(&format_args!("{body_indentation}-{twos_compl:#x}"))
|
||||
}
|
||||
fmt(&format_args!("{body_indentation}-{twos_compl:#x}"))
|
||||
} else {
|
||||
fmt(&format_args!("{body_indentation}{src:#x}"))
|
||||
}
|
||||
@@ -320,73 +231,31 @@ pub fn populate_random(
|
||||
_ => unimplemented!("populate random: {self:#?}"),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_rust_vals_array_const(&self) -> bool {
|
||||
match self {
|
||||
// Floats have to be loaded at runtime for stable NaN conversion.
|
||||
IntrinsicType {
|
||||
kind: TypeKind::Float,
|
||||
..
|
||||
} => false,
|
||||
IntrinsicType {
|
||||
kind: TypeKind::Int(_) | TypeKind::Poly,
|
||||
..
|
||||
} => true,
|
||||
_ => true,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn as_call_param_c(&self, name: &String) -> String {
|
||||
if self.ptr {
|
||||
format!("&{name}")
|
||||
} else {
|
||||
name.clone()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub trait IntrinsicTypeDefinition: Deref<Target = IntrinsicType> {
|
||||
/// Determines the load function for this type.
|
||||
/// can be implemented in an `impl` block
|
||||
fn get_load_function(&self, _language: Language) -> String;
|
||||
|
||||
/// can be implemented in an `impl` block
|
||||
fn get_lane_function(&self) -> String;
|
||||
fn get_load_function(&self) -> String;
|
||||
|
||||
/// Gets a string containing the typename for this type in C format.
|
||||
/// can be directly defined in `impl` blocks
|
||||
fn c_type(&self) -> String;
|
||||
|
||||
/// Gets a string containing the typename for this type in Rust format.
|
||||
/// can be directly defined in `impl` blocks
|
||||
fn c_single_vector_type(&self) -> String;
|
||||
|
||||
/// Generates a std::cout for the intrinsics results that will match the
|
||||
/// rust debug output format for the return type. The generated line assumes
|
||||
/// there is an int i in scope which is the current pass number.
|
||||
fn print_result_c(&self, indentation: Indentation, additional: &str) -> String;
|
||||
|
||||
/// Generates a std::cout for the intrinsics results that will match the
|
||||
/// rust debug output format for the return type. The generated line assumes
|
||||
/// there is an int i in scope which is the current pass number.
|
||||
fn print_result_rust(&self) -> String {
|
||||
String::from("format_args!(\"{__return_value:.150?}\")")
|
||||
}
|
||||
fn rust_type(&self) -> String;
|
||||
|
||||
/// To enable architecture-specific logic
|
||||
fn rust_scalar_type(&self) -> String {
|
||||
format!(
|
||||
"{prefix}{bits}",
|
||||
prefix = self.kind().rust_prefix(),
|
||||
bits = self.inner_size()
|
||||
)
|
||||
}
|
||||
|
||||
fn generate_final_type_cast(&self) -> String {
|
||||
let type_data = self.c_promotion();
|
||||
if type_data.len() > 2 {
|
||||
format!("({type_data})")
|
||||
if self.is_simd() {
|
||||
format!(
|
||||
"{prefix}{bits}",
|
||||
prefix = self.kind().rust_prefix(),
|
||||
bits = self.inner_size()
|
||||
)
|
||||
} else {
|
||||
String::new()
|
||||
self.rust_type()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,38 +1,32 @@
|
||||
use std::fs::File;
|
||||
use std::{fs::File, io};
|
||||
|
||||
use rayon::prelude::*;
|
||||
|
||||
use cli::ProcessedCli;
|
||||
|
||||
use crate::common::{
|
||||
compile_c::CppCompilation,
|
||||
gen_c::{write_main_cpp, write_mod_cpp},
|
||||
gen_rust::{
|
||||
compile_rust_programs, write_bin_cargo_toml, write_lib_cargo_toml, write_lib_rs,
|
||||
write_main_rs,
|
||||
},
|
||||
gen_c::write_wrapper_c,
|
||||
gen_rust::{write_bin_cargo_toml, write_build_rs, write_lib_cargo_toml, write_lib_rs},
|
||||
intrinsic::Intrinsic,
|
||||
intrinsic_helpers::IntrinsicTypeDefinition,
|
||||
};
|
||||
|
||||
pub mod argument;
|
||||
pub mod cli;
|
||||
pub mod compare;
|
||||
pub mod compile_c;
|
||||
pub mod constraint;
|
||||
pub mod gen_c;
|
||||
pub mod gen_rust;
|
||||
pub mod indentation;
|
||||
pub mod intrinsic;
|
||||
pub mod intrinsic_helpers;
|
||||
pub mod values;
|
||||
|
||||
mod gen_c;
|
||||
mod gen_rust;
|
||||
mod indentation;
|
||||
mod values;
|
||||
|
||||
/// Architectures must support this trait
|
||||
/// to be successfully tested.
|
||||
pub trait SupportedArchitectureTest {
|
||||
type IntrinsicImpl: IntrinsicTypeDefinition + Sync;
|
||||
|
||||
fn cli_options(&self) -> &ProcessedCli;
|
||||
fn intrinsics(&self) -> &[Intrinsic<Self::IntrinsicImpl>];
|
||||
|
||||
fn create(cli_options: ProcessedCli) -> Self;
|
||||
@@ -40,118 +34,40 @@ pub trait SupportedArchitectureTest {
|
||||
const NOTICE: &str;
|
||||
|
||||
const PLATFORM_C_HEADERS: &[&str];
|
||||
const PLATFORM_C_DEFINITIONS: &str;
|
||||
const PLATFORM_C_FORWARD_DECLARATIONS: &str;
|
||||
|
||||
const PLATFORM_RUST_CFGS: &str;
|
||||
const PLATFORM_RUST_DEFINITIONS: &str;
|
||||
|
||||
fn cpp_compilation(&self) -> Option<CppCompilation>;
|
||||
fn arch_flags(&self) -> Vec<&str>;
|
||||
|
||||
fn build_c_file(&self) -> bool {
|
||||
let (chunk_size, chunk_count) = manual_chunk(self.intrinsics().len(), 400);
|
||||
|
||||
let cpp_compiler_wrapped = self.cpp_compilation();
|
||||
fn generate_c_file(&self) {
|
||||
let (chunk_size, _chunk_count) = manual_chunk(self.intrinsics().len());
|
||||
|
||||
std::fs::create_dir_all("c_programs").unwrap();
|
||||
self.intrinsics()
|
||||
.par_chunks(chunk_size)
|
||||
.enumerate()
|
||||
.map(|(i, chunk)| {
|
||||
let c_filename = format!("c_programs/mod_{i}.cpp");
|
||||
let c_filename = format!("c_programs/wrapper_{i}.c");
|
||||
let mut file = File::create(&c_filename).unwrap();
|
||||
let mod_file_write_result = write_mod_cpp(
|
||||
&mut file,
|
||||
Self::NOTICE,
|
||||
Self::PLATFORM_C_HEADERS,
|
||||
Self::PLATFORM_C_FORWARD_DECLARATIONS,
|
||||
chunk,
|
||||
);
|
||||
|
||||
if let Err(error) = mod_file_write_result {
|
||||
return Err(format!("Error writing to mod_{i}.cpp: {error:?}"));
|
||||
}
|
||||
|
||||
// compile this cpp file into a .o file.
|
||||
//
|
||||
// This is done because `cpp_compiler_wrapped` is None when
|
||||
// the --generate-only flag is passed
|
||||
trace!("compiling mod_{i}.cpp");
|
||||
if let Some(cpp_compiler) = cpp_compiler_wrapped.as_ref() {
|
||||
let compile_output = cpp_compiler
|
||||
.compile_object_file(&format!("mod_{i}.cpp"), &format!("mod_{i}.o"))
|
||||
.map_err(|e| format!("Error compiling mod_{i}.cpp: {e:?}"))?;
|
||||
|
||||
assert!(
|
||||
compile_output.status.success(),
|
||||
"{}",
|
||||
String::from_utf8_lossy(&compile_output.stderr)
|
||||
);
|
||||
|
||||
trace!("finished compiling mod_{i}.cpp");
|
||||
}
|
||||
Ok(())
|
||||
write_wrapper_c(&mut file, Self::NOTICE, Self::PLATFORM_C_HEADERS, chunk)
|
||||
})
|
||||
.collect::<Result<(), String>>()
|
||||
.collect::<io::Result<()>>()
|
||||
.unwrap();
|
||||
|
||||
let mut file = File::create("c_programs/main.cpp").unwrap();
|
||||
write_main_cpp(
|
||||
&mut file,
|
||||
Self::PLATFORM_C_DEFINITIONS,
|
||||
Self::PLATFORM_C_HEADERS,
|
||||
self.intrinsics().iter().map(|i| i.name.as_str()),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// This is done because `cpp_compiler_wrapped` is None when
|
||||
// the --generate-only flag is passed
|
||||
if let Some(cpp_compiler) = cpp_compiler_wrapped.as_ref() {
|
||||
// compile this cpp file into a .o file
|
||||
trace!("compiling main.cpp");
|
||||
let output = cpp_compiler
|
||||
.compile_object_file("main.cpp", "intrinsic-test-programs.o")
|
||||
.unwrap();
|
||||
assert!(output.status.success(), "{output:?}");
|
||||
|
||||
let object_files = (0..chunk_count)
|
||||
.map(|i| format!("mod_{i}.o"))
|
||||
.chain(["intrinsic-test-programs.o".to_owned()]);
|
||||
|
||||
let output = cpp_compiler
|
||||
.link_executable(object_files, "intrinsic-test-programs")
|
||||
.unwrap();
|
||||
assert!(output.status.success(), "{output:?}");
|
||||
}
|
||||
|
||||
true
|
||||
}
|
||||
|
||||
fn build_rust_file(&self) -> bool {
|
||||
std::fs::create_dir_all("rust_programs/src").unwrap();
|
||||
fn generate_rust_file(&self) {
|
||||
let arch_flags = self.arch_flags();
|
||||
|
||||
let (chunk_size, chunk_count) = manual_chunk(self.intrinsics().len(), 400);
|
||||
std::fs::create_dir_all("rust_programs").unwrap();
|
||||
|
||||
let (chunk_size, chunk_count) = manual_chunk(self.intrinsics().len());
|
||||
|
||||
let mut cargo = File::create("rust_programs/Cargo.toml").unwrap();
|
||||
write_bin_cargo_toml(&mut cargo, chunk_count).unwrap();
|
||||
|
||||
let mut main_rs = File::create("rust_programs/src/main.rs").unwrap();
|
||||
write_main_rs(
|
||||
&mut main_rs,
|
||||
chunk_count,
|
||||
Self::PLATFORM_RUST_CFGS,
|
||||
"",
|
||||
self.intrinsics().iter().map(|i| i.name.as_str()),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let target = &self.cli_options().target;
|
||||
let profile = &self.cli_options().profile;
|
||||
let toolchain = self.cli_options().toolchain.as_deref();
|
||||
let linker = self.cli_options().linker.as_deref();
|
||||
|
||||
self.intrinsics()
|
||||
.par_chunks(chunk_size)
|
||||
.chunks(chunk_size)
|
||||
.enumerate()
|
||||
.map(|(i, chunk)| {
|
||||
std::fs::create_dir_all(format!("rust_programs/mod_{i}/src"))?;
|
||||
@@ -165,6 +81,7 @@ fn build_rust_file(&self) -> bool {
|
||||
Self::NOTICE,
|
||||
Self::PLATFORM_RUST_CFGS,
|
||||
Self::PLATFORM_RUST_DEFINITIONS,
|
||||
i,
|
||||
chunk,
|
||||
)?;
|
||||
|
||||
@@ -174,41 +91,20 @@ fn build_rust_file(&self) -> bool {
|
||||
|
||||
write_lib_cargo_toml(&mut file, &format!("mod_{i}"))?;
|
||||
|
||||
let build_rs_filename = format!("rust_programs/mod_{i}/build.rs");
|
||||
trace!("generating `{build_rs_filename}`");
|
||||
let mut file = File::create(build_rs_filename).unwrap();
|
||||
|
||||
write_build_rs(&mut file, i, &arch_flags).unwrap();
|
||||
|
||||
Ok(())
|
||||
})
|
||||
.collect::<Result<(), std::io::Error>>()
|
||||
.unwrap();
|
||||
|
||||
compile_rust_programs(toolchain, target, profile, linker)
|
||||
}
|
||||
|
||||
fn compare_outputs(&self) -> bool {
|
||||
if self.cli_options().toolchain.is_some() {
|
||||
let intrinsics_name_list = self
|
||||
.intrinsics()
|
||||
.iter()
|
||||
.map(|i| i.name.clone())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
compare::compare_outputs(
|
||||
&intrinsics_name_list,
|
||||
&self.cli_options().runner,
|
||||
&self.cli_options().target,
|
||||
&self.cli_options().profile,
|
||||
)
|
||||
} else {
|
||||
true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// pub fn chunk_info(intrinsic_count: usize) -> (usize, usize) {
|
||||
// let available_parallelism = std::thread::available_parallelism().unwrap().get();
|
||||
// let chunk_size = intrinsic_count.div_ceil(Ord::min(available_parallelism, intrinsic_count));
|
||||
|
||||
// (chunk_size, intrinsic_count.div_ceil(chunk_size))
|
||||
// }
|
||||
|
||||
pub fn manual_chunk(intrinsic_count: usize, chunk_size: usize) -> (usize, usize) {
|
||||
(chunk_size, intrinsic_count.div_ceil(chunk_size))
|
||||
pub fn manual_chunk(intrinsic_count: usize) -> (usize, usize) {
|
||||
let ncores = std::thread::available_parallelism().unwrap().into();
|
||||
(intrinsic_count.div_ceil(ncores), ncores)
|
||||
}
|
||||
|
||||
@@ -15,27 +15,21 @@ fn main() {
|
||||
let args: Cli = clap::Parser::parse();
|
||||
let processed_cli_options = ProcessedCli::new(args);
|
||||
|
||||
match processed_cli_options.target.as_str() {
|
||||
"aarch64-unknown-linux-gnu"
|
||||
| "armv7-unknown-linux-gnueabihf"
|
||||
| "aarch64_be-unknown-linux-gnu" => run(ArmArchitectureTest::create(processed_cli_options)),
|
||||
|
||||
"x86_64-unknown-linux-gnu" => run(X86ArchitectureTest::create(processed_cli_options)),
|
||||
_ => std::process::exit(0),
|
||||
if processed_cli_options.target.starts_with("arm")
|
||||
| processed_cli_options.target.starts_with("aarch64")
|
||||
{
|
||||
run(ArmArchitectureTest::create(processed_cli_options))
|
||||
} else if processed_cli_options.target.starts_with("x86") {
|
||||
run(X86ArchitectureTest::create(processed_cli_options))
|
||||
} else {
|
||||
unimplemented!("Unsupported target {}", processed_cli_options.target)
|
||||
}
|
||||
}
|
||||
|
||||
fn run(test_environment: impl SupportedArchitectureTest) {
|
||||
info!("building C binaries");
|
||||
if !test_environment.build_c_file() {
|
||||
std::process::exit(2);
|
||||
}
|
||||
test_environment.generate_c_file();
|
||||
|
||||
info!("building Rust binaries");
|
||||
if !test_environment.build_rust_file() {
|
||||
std::process::exit(3);
|
||||
}
|
||||
info!("Running binaries");
|
||||
if !test_environment.compare_outputs() {
|
||||
std::process::exit(1);
|
||||
}
|
||||
test_environment.generate_rust_file();
|
||||
}
|
||||
|
||||
@@ -1,59 +0,0 @@
|
||||
use crate::common::cli::ProcessedCli;
|
||||
use crate::common::compile_c::{CompilationCommandBuilder, CppCompilation};
|
||||
|
||||
pub fn build_cpp_compilation(config: &ProcessedCli) -> Option<CppCompilation> {
|
||||
let cpp_compiler = config.cpp_compiler.as_ref()?;
|
||||
|
||||
// -ffp-contract=off emulates Rust's approach of not fusing separate mul-add operations
|
||||
let mut command = CompilationCommandBuilder::new()
|
||||
.add_arch_flags(["icelake-client"])
|
||||
.set_compiler(cpp_compiler)
|
||||
.set_target(&config.target)
|
||||
.set_opt_level("2")
|
||||
.set_cxx_toolchain_dir(config.cxx_toolchain_dir.as_deref())
|
||||
.set_project_root("c_programs")
|
||||
.add_extra_flags(vec![
|
||||
"-ffp-contract=off",
|
||||
"-Wno-narrowing",
|
||||
"-mavx",
|
||||
"-mavx2",
|
||||
"-mavx512f",
|
||||
"-msse2",
|
||||
"-mavx512vl",
|
||||
"-mavx512bw",
|
||||
"-mavx512dq",
|
||||
"-mavx512cd",
|
||||
"-mavx512fp16",
|
||||
"-msha512",
|
||||
"-msm3",
|
||||
"-msm4",
|
||||
"-mavxvnni",
|
||||
"-mavxvnniint8",
|
||||
"-mavxneconvert",
|
||||
"-mavxifma",
|
||||
"-mavxvnniint16",
|
||||
"-mavx512bf16",
|
||||
"-mavx512bitalg",
|
||||
"-mavx512ifma",
|
||||
"-mavx512vbmi",
|
||||
"-mavx512vbmi2",
|
||||
"-mavx512vnni",
|
||||
"-mavx512vpopcntdq",
|
||||
"-mavx512vp2intersect",
|
||||
"-mbmi",
|
||||
"-mbmi2",
|
||||
"-mgfni",
|
||||
"-mvaes",
|
||||
"-mvpclmulqdq",
|
||||
"-ferror-limit=1000",
|
||||
"-std=c++23",
|
||||
]);
|
||||
|
||||
if !cpp_compiler.contains("clang") {
|
||||
command = command.add_extra_flag("-flax-vector-conversions");
|
||||
}
|
||||
|
||||
let cpp_compiler = command.into_cpp_compilation();
|
||||
|
||||
Some(cpp_compiler)
|
||||
}
|
||||
@@ -3,7 +3,6 @@
|
||||
// test are derived from an XML specification, published under the same license as the
|
||||
// `intrinsic-test` crate.\n";
|
||||
|
||||
// Format f16 values (and vectors containing them) in a way that is consistent with C.
|
||||
pub const PLATFORM_RUST_DEFINITIONS: &str = r#"
|
||||
use core_arch::arch::x86_64::*;
|
||||
|
||||
@@ -129,206 +128,11 @@ unsafe fn _mm512_loadu_epi64_to___m512(mem_addr: *const i64) -> __m512 {
|
||||
_mm512_castsi512_ps(_mm512_loadu_epi64(mem_addr))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn debug_simd_finish<T: core::fmt::Debug, const N: usize>(
|
||||
formatter: &mut core::fmt::Formatter<'_>,
|
||||
type_name: &str,
|
||||
array: &[T; N],
|
||||
) -> core::fmt::Result {
|
||||
core::fmt::Formatter::debug_tuple_fields_finish(
|
||||
formatter,
|
||||
type_name,
|
||||
&core::array::from_fn::<&dyn core::fmt::Debug, N, _>(|i| &array[i]),
|
||||
)
|
||||
}
|
||||
|
||||
trait DebugAs<T> {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result;
|
||||
}
|
||||
|
||||
impl<T: core::fmt::Display> DebugAs<T> for T {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||
write!(f, "{self}")
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! impl_debug_as {
|
||||
($simd:ty, $name:expr, $bits:expr, [$($type:ty),+]) => {
|
||||
$(
|
||||
impl DebugAs<$type> for $simd {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||
const ELEMENT_BITS: usize = core::mem::size_of::<$type>() * 8;
|
||||
const NUM_ELEMENTS: usize = $bits / ELEMENT_BITS;
|
||||
let array = unsafe { core::mem::transmute::<_, [$type; NUM_ELEMENTS]>(*self) };
|
||||
debug_simd_finish(f, $name, &array)
|
||||
}
|
||||
}
|
||||
)+
|
||||
};
|
||||
}
|
||||
|
||||
impl_debug_as!(__m128i, "__m128i", 128, [u8, i8, u16, i16, u32, i32, u64, i64, f16]);
|
||||
impl_debug_as!(__m256i, "__m256i", 256, [u8, i8, u16, i16, u32, i32, u64, i64]);
|
||||
impl_debug_as!(__m512i, "__m512i", 512, [u8, i8, u16, i16, u32, i32, u64, i64]);
|
||||
impl_debug_as!(__m128h, "__m128h", 128, [f32]);
|
||||
impl_debug_as!(__m256h, "__m256h", 256, [f32]);
|
||||
impl_debug_as!(__m512h, "__m512h", 512, [f32]);
|
||||
|
||||
fn debug_as<V, T>(x: V) -> impl core::fmt::Debug
|
||||
where V: DebugAs<T>
|
||||
{
|
||||
struct DebugWrapper<V, T>(V, core::marker::PhantomData<T>);
|
||||
impl<V: DebugAs<T>, T> core::fmt::Debug for DebugWrapper<V, T> {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||
self.0.fmt(f)
|
||||
}
|
||||
}
|
||||
DebugWrapper(x, core::marker::PhantomData)
|
||||
}
|
||||
|
||||
"#;
|
||||
|
||||
pub const PLATFORM_C_FORWARD_DECLARATIONS: &str = r#"
|
||||
#ifndef X86_DECLARATIONS
|
||||
#define X86_DECLARATIONS
|
||||
typedef _Float16 float16_t;
|
||||
typedef float float32_t;
|
||||
typedef double float64_t;
|
||||
|
||||
#define __int64 long long
|
||||
#define __int32 int
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, _Float16 value);
|
||||
std::ostream& operator<<(std::ostream& os, __m128i value);
|
||||
std::ostream& operator<<(std::ostream& os, __m256i value);
|
||||
std::ostream& operator<<(std::ostream& os, __m512i value);
|
||||
std::ostream& operator<<(std::ostream& os, __mmask8 value);
|
||||
|
||||
#define _mm512_extract_intrinsic_test_epi8(m, lane) \
|
||||
_mm_extract_epi8(_mm512_extracti64x2_epi64((m), (lane) / 16), (lane) % 16)
|
||||
|
||||
#define _mm512_extract_intrinsic_test_epi16(m, lane) \
|
||||
_mm_extract_epi16(_mm512_extracti64x2_epi64((m), (lane) / 8), (lane) % 8)
|
||||
|
||||
#define _mm512_extract_intrinsic_test_epi32(m, lane) \
|
||||
_mm_extract_epi32(_mm512_extracti64x2_epi64((m), (lane) / 4), (lane) % 4)
|
||||
|
||||
#define _mm512_extract_intrinsic_test_epi64(m, lane) \
|
||||
_mm_extract_epi64(_mm512_extracti64x2_epi64((m), (lane) / 2), (lane) % 2)
|
||||
|
||||
// Load f16 (__m128h) and cast to integer (__m128i)
|
||||
#define _mm_loadu_ph_to___m128i(mem_addr) _mm_castph_si128(_mm_loadu_ph(mem_addr))
|
||||
#define _mm256_loadu_ph_to___m256i(mem_addr) _mm256_castph_si256(_mm256_loadu_ph(mem_addr))
|
||||
#define _mm512_loadu_ph_to___m512i(mem_addr) _mm512_castph_si512(_mm512_loadu_ph(mem_addr))
|
||||
|
||||
// Load f32 (__m128) and cast to f16 (__m128h)
|
||||
#define _mm_loadu_ps_to___m128h(mem_addr) _mm_castps_ph(_mm_loadu_ps(mem_addr))
|
||||
#define _mm256_loadu_ps_to___m256h(mem_addr) _mm256_castps_ph(_mm256_loadu_ps(mem_addr))
|
||||
#define _mm512_loadu_ps_to___m512h(mem_addr) _mm512_castps_ph(_mm512_loadu_ps(mem_addr))
|
||||
|
||||
// Load integer types and cast to double (__m128d, __m256d, __m512d)
|
||||
#define _mm_loadu_epi16_to___m128d(mem_addr) _mm_castsi128_pd(_mm_loadu_si128((__m128i const*)(mem_addr)))
|
||||
#define _mm256_loadu_epi16_to___m256d(mem_addr) _mm256_castsi256_pd(_mm256_loadu_si256((__m256i const*)(mem_addr)))
|
||||
#define _mm512_loadu_epi16_to___m512d(mem_addr) _mm512_castsi512_pd(_mm512_loadu_si512((__m512i const*)(mem_addr)))
|
||||
|
||||
#define _mm_loadu_epi32_to___m128d(mem_addr) _mm_castsi128_pd(_mm_loadu_si128((__m128i const*)(mem_addr)))
|
||||
#define _mm256_loadu_epi32_to___m256d(mem_addr) _mm256_castsi256_pd(_mm256_loadu_si256((__m256i const*)(mem_addr)))
|
||||
#define _mm512_loadu_epi32_to___m512d(mem_addr) _mm512_castsi512_pd(_mm512_loadu_si512((__m512i const*)(mem_addr)))
|
||||
|
||||
#define _mm_loadu_epi64_to___m128d(mem_addr) _mm_castsi128_pd(_mm_loadu_si128((__m128i const*)(mem_addr)))
|
||||
#define _mm256_loadu_epi64_to___m256d(mem_addr) _mm256_castsi256_pd(_mm256_loadu_si256((__m256i const*)(mem_addr)))
|
||||
#define _mm512_loadu_epi64_to___m512d(mem_addr) _mm512_castsi512_pd(_mm512_loadu_si512((__m512i const*)(mem_addr)))
|
||||
|
||||
// Load integer types and cast to float (__m128, __m256, __m512)
|
||||
#define _mm_loadu_epi16_to___m128(mem_addr) _mm_castsi128_ps(_mm_loadu_si128((__m128i const*)(mem_addr)))
|
||||
#define _mm256_loadu_epi16_to___m256(mem_addr) _mm256_castsi256_ps(_mm256_loadu_si256((__m256i const*)(mem_addr)))
|
||||
#define _mm512_loadu_epi16_to___m512(mem_addr) _mm512_castsi512_ps(_mm512_loadu_si512((__m512i const*)(mem_addr)))
|
||||
|
||||
#define _mm_loadu_epi32_to___m128(mem_addr) _mm_castsi128_ps(_mm_loadu_si128((__m128i const*)(mem_addr)))
|
||||
#define _mm256_loadu_epi32_to___m256(mem_addr) _mm256_castsi256_ps(_mm256_loadu_si256((__m256i const*)(mem_addr)))
|
||||
#define _mm512_loadu_epi32_to___m512(mem_addr) _mm512_castsi512_ps(_mm512_loadu_si512((__m512i const*)(mem_addr)))
|
||||
|
||||
#define _mm_loadu_epi64_to___m128(mem_addr) _mm_castsi128_ps(_mm_loadu_si128((__m128i const*)(mem_addr)))
|
||||
#define _mm256_loadu_epi64_to___m256(mem_addr) _mm256_castsi256_ps(_mm256_loadu_si256((__m256i const*)(mem_addr)))
|
||||
#define _mm512_loadu_epi64_to___m512(mem_addr) _mm512_castsi512_ps(_mm512_loadu_si512((__m512i const*)(mem_addr)))
|
||||
|
||||
// T1 is the `To` type, T2 is the `From` type
|
||||
template<typename T1, typename T2> T1 cast(T2 x) {
|
||||
if constexpr ((std::is_integral_v<T1> && std::is_integral_v<T2>) || (std::is_floating_point_v<T1> && std::is_floating_point_v<T2>)) {
|
||||
return x;
|
||||
} else if constexpr (sizeof(T1) <= sizeof(T2)) {
|
||||
T1 ret{};
|
||||
std::memcpy(&ret, &x, sizeof(T1));
|
||||
return ret;
|
||||
} else {
|
||||
static_assert(sizeof(T1) == sizeof(T2) || std::is_convertible_v<T2, T1>,
|
||||
"T2 must either be convertible to T1, or have the same size as T1!");
|
||||
return T1{};
|
||||
}
|
||||
}
|
||||
#endif
|
||||
"#;
|
||||
pub const PLATFORM_C_DEFINITIONS: &str = r#"
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, _Float16 value) {
|
||||
os << static_cast<float>(value);
|
||||
return os;
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, __m128i value) {
|
||||
void* temp = malloc(sizeof(__m128i));
|
||||
_mm_storeu_si128((__m128i*)temp, value);
|
||||
std::stringstream ss;
|
||||
|
||||
ss << "0x";
|
||||
for(int i = 0; i < 16; i++) {
|
||||
ss << std::setfill('0') << std::setw(2) << std::hex << ((char*)temp)[i];
|
||||
}
|
||||
os << ss.str();
|
||||
return os;
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, __m256i value) {
|
||||
void* temp = malloc(sizeof(__m256i));
|
||||
_mm256_storeu_si256((__m256i*)temp, value);
|
||||
std::stringstream ss;
|
||||
|
||||
ss << "0x";
|
||||
for(int i = 0; i < 32; i++) {
|
||||
ss << std::setfill('0') << std::setw(2) << std::hex << ((char*)temp)[i];
|
||||
}
|
||||
os << ss.str();
|
||||
return os;
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, __m512i value) {
|
||||
void* temp = malloc(sizeof(__m512i));
|
||||
_mm512_storeu_si512((__m512i*)temp, value);
|
||||
std::stringstream ss;
|
||||
|
||||
ss << "0x";
|
||||
for(int i = 0; i < 64; i++) {
|
||||
ss << std::setfill('0') << std::setw(2) << std::hex << ((char*)temp)[i];
|
||||
}
|
||||
os << ss.str();
|
||||
return os;
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, __mmask8 value) {
|
||||
os << static_cast<int>(value);
|
||||
return os;
|
||||
}
|
||||
"#;
|
||||
|
||||
pub const PLATFORM_RUST_CFGS: &str = r#"
|
||||
#![cfg_attr(target_arch = "x86", feature(avx))]
|
||||
#![cfg_attr(target_arch = "x86", feature(sse))]
|
||||
#![cfg_attr(target_arch = "x86", feature(sse2))]
|
||||
#![cfg_attr(target_arch = "x86", feature(stdarch_x86_avx512_bf16))]
|
||||
#![cfg_attr(target_arch = "x86", feature(stdarch_x86_avx512_f16))]
|
||||
#![cfg_attr(target_arch = "x86", feature(stdarch_x86_rtm))]
|
||||
#![cfg_attr(target_arch = "x86", feature(stdarch_x86_rtm))]
|
||||
#![cfg_attr(target_arch = "x86_64", feature(x86_amx_intrinsics))]
|
||||
#![cfg_attr(target_arch = "x86_64", feature(stdarch_x86_avx512_f16))]
|
||||
#![feature(fmt_helpers_for_derive)]
|
||||
#![feature(stdarch_x86_avx512_bf16)]
|
||||
#![feature(stdarch_x86_avx512_f16)]
|
||||
#![feature(stdarch_x86_rtm)]
|
||||
#![feature(x86_amx_intrinsics)]
|
||||
"#;
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
use crate::common::constraint::Constraint;
|
||||
|
||||
pub fn map_constraints(imm_type: &String, imm_width: u32) -> Option<Constraint> {
|
||||
pub fn map_constraints(fn_name: &str, imm_type: &String, imm_width: u32) -> Option<Constraint> {
|
||||
if imm_width > 0 {
|
||||
if fn_name == "_mm_sm3rnds2_epi32" {
|
||||
return Some(Constraint::Set((0..64).step_by(2).collect()));
|
||||
}
|
||||
let max: i64 = 2i64.pow(imm_width);
|
||||
return Some(Constraint::Range(0..max));
|
||||
}
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
mod compile;
|
||||
mod config;
|
||||
mod constraint;
|
||||
mod intrinsic;
|
||||
@@ -7,7 +6,6 @@
|
||||
|
||||
use crate::common::SupportedArchitectureTest;
|
||||
use crate::common::cli::ProcessedCli;
|
||||
use crate::common::compile_c::CppCompilation;
|
||||
use crate::common::intrinsic::Intrinsic;
|
||||
use crate::common::intrinsic_helpers::TypeKind;
|
||||
use intrinsic::X86IntrinsicType;
|
||||
@@ -15,33 +13,59 @@
|
||||
|
||||
pub struct X86ArchitectureTest {
|
||||
intrinsics: Vec<Intrinsic<X86IntrinsicType>>,
|
||||
cli_options: ProcessedCli,
|
||||
}
|
||||
|
||||
impl SupportedArchitectureTest for X86ArchitectureTest {
|
||||
type IntrinsicImpl = X86IntrinsicType;
|
||||
|
||||
fn cli_options(&self) -> &ProcessedCli {
|
||||
&self.cli_options
|
||||
}
|
||||
|
||||
fn intrinsics(&self) -> &[Intrinsic<X86IntrinsicType>] {
|
||||
&self.intrinsics
|
||||
}
|
||||
|
||||
fn cpp_compilation(&self) -> Option<CppCompilation> {
|
||||
compile::build_cpp_compilation(&self.cli_options)
|
||||
}
|
||||
|
||||
const NOTICE: &str = config::NOTICE;
|
||||
|
||||
const PLATFORM_C_HEADERS: &[&str] = &["immintrin.h", "cstddef", "cstdint"];
|
||||
const PLATFORM_C_DEFINITIONS: &str = config::PLATFORM_C_DEFINITIONS;
|
||||
const PLATFORM_C_FORWARD_DECLARATIONS: &str = config::PLATFORM_C_FORWARD_DECLARATIONS;
|
||||
const PLATFORM_C_HEADERS: &[&str] = &["immintrin.h"];
|
||||
|
||||
const PLATFORM_RUST_DEFINITIONS: &str = config::PLATFORM_RUST_DEFINITIONS;
|
||||
const PLATFORM_RUST_CFGS: &str = config::PLATFORM_RUST_CFGS;
|
||||
|
||||
fn arch_flags(&self) -> Vec<&str> {
|
||||
vec![
|
||||
"-mavx",
|
||||
"-mavx2",
|
||||
"-mavx512f",
|
||||
"-msse2",
|
||||
"-mavx512vl",
|
||||
"-mavx512bw",
|
||||
"-mavx512dq",
|
||||
"-mavx512cd",
|
||||
"-mavx512fp16",
|
||||
"-msha",
|
||||
"-msha512",
|
||||
"-msm3",
|
||||
"-msm4",
|
||||
"-mavxvnni",
|
||||
"-mavxvnniint8",
|
||||
"-mavxneconvert",
|
||||
"-mavxifma",
|
||||
"-mavxvnniint16",
|
||||
"-mavx512bf16",
|
||||
"-mavx512bitalg",
|
||||
"-mavx512ifma",
|
||||
"-mavx512vbmi",
|
||||
"-mavx512vbmi2",
|
||||
"-mavx512vnni",
|
||||
"-mavx512vpopcntdq",
|
||||
"-mavx512vp2intersect",
|
||||
"-mbmi",
|
||||
"-mbmi2",
|
||||
"-mgfni",
|
||||
"-mvaes",
|
||||
"-mvpclmulqdq",
|
||||
"-mlzcnt",
|
||||
]
|
||||
}
|
||||
|
||||
fn create(cli_options: ProcessedCli) -> Self {
|
||||
let mut intrinsics =
|
||||
get_xml_intrinsics(&cli_options.filename).expect("Error parsing input file");
|
||||
@@ -67,9 +91,6 @@ fn create(cli_options: ProcessedCli) -> Self {
|
||||
.take(sample_size)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
Self {
|
||||
intrinsics: intrinsics,
|
||||
cli_options: cli_options,
|
||||
}
|
||||
Self { intrinsics }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,11 +1,8 @@
|
||||
use std::str::FromStr;
|
||||
|
||||
use itertools::Itertools;
|
||||
use regex::Regex;
|
||||
|
||||
use super::intrinsic::X86IntrinsicType;
|
||||
use crate::common::cli::Language;
|
||||
use crate::common::indentation::Indentation;
|
||||
use crate::common::intrinsic_helpers::{IntrinsicType, IntrinsicTypeDefinition, Sign, TypeKind};
|
||||
use crate::x86::xml_parser::Parameter;
|
||||
|
||||
@@ -26,82 +23,32 @@ fn c_type(&self) -> String {
|
||||
.replace("const ", "")
|
||||
}
|
||||
|
||||
fn c_single_vector_type(&self) -> String {
|
||||
// matches __m128, __m256 and similar types
|
||||
let re = Regex::new(r"__m\d+").unwrap();
|
||||
if re.is_match(self.param.type_data.as_str()) {
|
||||
self.param.type_data.clone()
|
||||
} else {
|
||||
unreachable!("Shouldn't be called on this type")
|
||||
fn rust_type(&self) -> String {
|
||||
let type_data = &*self.param.type_data;
|
||||
if type_data.starts_with("__m") {
|
||||
return type_data.to_owned();
|
||||
}
|
||||
match &*type_data.replace("const ", "") {
|
||||
"_Float16" => "f16",
|
||||
"__bfloat16" => "bf16",
|
||||
"float" => "f32",
|
||||
"double" => "f64",
|
||||
"__int8" | "char" => "i8",
|
||||
"unsigned char" => "u8",
|
||||
"__int16" | "short" => "i16",
|
||||
"unsigned short" => "u16",
|
||||
"__int32" | "int" => "i32",
|
||||
"unsigned __int32" | "unsigned int" | "unsigned long" => "u32",
|
||||
"__int64" | "long long" => "i64",
|
||||
"unsigned __int64" => "u64",
|
||||
"size_t" => "usize",
|
||||
_ => todo!("unknown type {type_data}"),
|
||||
}
|
||||
.to_string()
|
||||
}
|
||||
|
||||
// fn rust_type(&self) -> String {
|
||||
// // handling edge cases first
|
||||
// // the general handling is implemented below
|
||||
// if let Some(val) = self.metadata.get("type") {
|
||||
// match val.as_str() {
|
||||
// "__m128 const *" => {
|
||||
// return "&__m128".to_string();
|
||||
// }
|
||||
// "__m128d const *" => {
|
||||
// return "&__m128d".to_string();
|
||||
// }
|
||||
// "const void*" => {
|
||||
// return "&__m128d".to_string();
|
||||
// }
|
||||
// _ => {}
|
||||
// }
|
||||
// }
|
||||
|
||||
// if self.kind() == TypeKind::Void && self.ptr {
|
||||
// // this has been handled by default settings in
|
||||
// // the from_param function of X86IntrinsicType
|
||||
// unreachable!()
|
||||
// }
|
||||
|
||||
// // general handling cases
|
||||
// let core_part = if self.kind() == TypeKind::Mask {
|
||||
// // all types of __mmask<int> are handled here
|
||||
// format!("__mask{}", self.bit_len.unwrap())
|
||||
// } else if self.simd_len.is_some() {
|
||||
// // all types of __m<int> vector types are handled here
|
||||
// let re = Regex::new(r"\__m\d+[a-z]*").unwrap();
|
||||
// let rust_type = self
|
||||
// .metadata
|
||||
// .get("type")
|
||||
// .map(|val| re.find(val).unwrap().as_str());
|
||||
// rust_type.unwrap().to_string()
|
||||
// } else {
|
||||
// format!(
|
||||
// "{}{}",
|
||||
// self.kind.rust_prefix().to_string(),
|
||||
// self.bit_len.unwrap()
|
||||
// )
|
||||
// };
|
||||
|
||||
// // extracting "memsize" so that even vector types can be involved
|
||||
// let memwidth = self
|
||||
// .metadata
|
||||
// .get("memwidth")
|
||||
// .map(|n| str::parse::<u32>(n).unwrap());
|
||||
// let prefix_part = if self.ptr && self.constant && self.bit_len.eq(&memwidth) {
|
||||
// "&"
|
||||
// } else if self.ptr && self.bit_len.eq(&memwidth) {
|
||||
// "&mut "
|
||||
// } else if self.ptr && self.constant {
|
||||
// "*const "
|
||||
// } else if self.ptr {
|
||||
// "*mut "
|
||||
// } else {
|
||||
// ""
|
||||
// };
|
||||
|
||||
// return prefix_part.to_string() + core_part.as_str();
|
||||
// }
|
||||
|
||||
/// Determines the load function for this type.
|
||||
fn get_load_function(&self, _language: Language) -> String {
|
||||
fn get_load_function(&self) -> String {
|
||||
let type_value = self.param.type_data.clone();
|
||||
if type_value.len() == 0 {
|
||||
unimplemented!("the value for key 'type' is not present!");
|
||||
@@ -168,112 +115,16 @@ fn get_load_function(&self, _language: Language) -> String {
|
||||
}
|
||||
}
|
||||
|
||||
/// Generates a std::cout for the intrinsics results that will match the
|
||||
/// rust debug output format for the return type. The generated line assumes
|
||||
/// there is an int i in scope which is the current pass number.
|
||||
fn print_result_c(&self, indentation: Indentation, additional: &str) -> String {
|
||||
let lanes = if self.num_lanes() > 1 {
|
||||
(0..self.num_lanes())
|
||||
.map(|idx| -> std::string::String {
|
||||
let cast_type = self.c_promotion();
|
||||
let lane_fn = self.get_lane_function();
|
||||
if cast_type.len() > 2 {
|
||||
format!("cast<{cast_type}>({lane_fn}(__return_value, {idx}))")
|
||||
} else {
|
||||
format!("{lane_fn}(__return_value, {idx})")
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join(r#" << ", " << "#)
|
||||
} else {
|
||||
format!(
|
||||
"{promote}cast<{cast}>(__return_value)",
|
||||
cast = match self.kind() {
|
||||
TypeKind::Void => "void".to_string(),
|
||||
TypeKind::Float if self.inner_size() == 64 => "double".to_string(),
|
||||
TypeKind::Float if self.inner_size() == 32 => "float".to_string(),
|
||||
TypeKind::Mask => format!(
|
||||
"__mmask{}",
|
||||
self.bit_len.expect(format!("self: {self:#?}").as_str())
|
||||
),
|
||||
TypeKind::Vector => format!(
|
||||
"__m{}i",
|
||||
self.bit_len.expect(format!("self: {self:#?}").as_str())
|
||||
),
|
||||
_ => self.c_scalar_type(),
|
||||
},
|
||||
promote = self.generate_final_type_cast(),
|
||||
)
|
||||
};
|
||||
|
||||
format!(
|
||||
r#"{indentation}std::cout << "Result {additional}-" << i+1 << ": {ty}" << std::fixed << std::setprecision(150) << {lanes} << "{close}" << std::endl;"#,
|
||||
ty = if self.is_simd() {
|
||||
format!("{}(", self.c_type())
|
||||
} else {
|
||||
String::from("")
|
||||
},
|
||||
close = if self.is_simd() { ")" } else { "" },
|
||||
)
|
||||
}
|
||||
|
||||
/// Determines the get lane function for this type.
|
||||
fn get_lane_function(&self) -> String {
|
||||
let total_vector_bits: Option<u32> = self
|
||||
.simd_len
|
||||
.zip(self.bit_len)
|
||||
.and_then(|(simd_len, bit_len)| Some(simd_len * bit_len));
|
||||
|
||||
match (self.bit_len, total_vector_bits) {
|
||||
(Some(8), Some(128)) => String::from("(uint8_t)_mm_extract_epi8"),
|
||||
(Some(16), Some(128)) => String::from("(uint16_t)_mm_extract_epi16"),
|
||||
(Some(32), Some(128)) => String::from("(uint32_t)_mm_extract_epi32"),
|
||||
(Some(64), Some(128)) => String::from("(uint64_t)_mm_extract_epi64"),
|
||||
(Some(8), Some(256)) => String::from("(uint8_t)_mm256_extract_epi8"),
|
||||
(Some(16), Some(256)) => String::from("(uint16_t)_mm256_extract_epi16"),
|
||||
(Some(32), Some(256)) => String::from("(uint32_t)_mm256_extract_epi32"),
|
||||
(Some(64), Some(256)) => String::from("(uint64_t)_mm256_extract_epi64"),
|
||||
(Some(8), Some(512)) => String::from("(uint8_t)_mm512_extract_intrinsic_test_epi8"),
|
||||
(Some(16), Some(512)) => String::from("(uint16_t)_mm512_extract_intrinsic_test_epi16"),
|
||||
(Some(32), Some(512)) => String::from("(uint32_t)_mm512_extract_intrinsic_test_epi32"),
|
||||
(Some(64), Some(512)) => String::from("(uint64_t)_mm512_extract_intrinsic_test_epi64"),
|
||||
_ => unreachable!(
|
||||
"invalid length for vector argument: {:?}, {:?}",
|
||||
self.bit_len, self.simd_len
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
fn rust_scalar_type(&self) -> String {
|
||||
let prefix = match self.data.kind {
|
||||
TypeKind::Mask => String::from("__mmask"),
|
||||
TypeKind::Vector => String::from("i"),
|
||||
_ => self.kind().rust_prefix().to_string(),
|
||||
};
|
||||
|
||||
let bits = if self.inner_size() >= 128 {
|
||||
32
|
||||
if self.is_simd() {
|
||||
format!(
|
||||
"{prefix}{bits}",
|
||||
prefix = self.kind().rust_prefix(),
|
||||
bits = self.inner_size()
|
||||
)
|
||||
} else {
|
||||
self.inner_size()
|
||||
};
|
||||
format!("{prefix}{bits}")
|
||||
}
|
||||
|
||||
fn print_result_rust(&self) -> String {
|
||||
let return_value = match self.kind() {
|
||||
// `_mm{256}_cvtps_ph` has return type __m128i but contains f16 values
|
||||
TypeKind::Float if self.param.type_data == "__m128i" => {
|
||||
"format_args!(\"{:.150?}\", debug_as::<_, f16>(__return_value))".to_string()
|
||||
}
|
||||
TypeKind::Int(_)
|
||||
if ["__m128i", "__m256i", "__m512i"].contains(&self.param.type_data.as_str()) =>
|
||||
{
|
||||
format!("debug_as::<_, u{}>(__return_value)", self.inner_size())
|
||||
}
|
||||
_ => "format_args!(\"{__return_value:.150?}\")".to_string(),
|
||||
};
|
||||
|
||||
return_value
|
||||
self.rust_type().replace("__mmask", "u")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -99,7 +99,7 @@ fn xml_to_intrinsic(
|
||||
} else {
|
||||
param.imm_width
|
||||
};
|
||||
let constraint = map_constraints(¶m.imm_type, effective_imm_width);
|
||||
let constraint = map_constraints(&name, ¶m.imm_type, effective_imm_width);
|
||||
let arg = Argument::<X86IntrinsicType>::new(
|
||||
i,
|
||||
param.var_name.clone(),
|
||||
|
||||
@@ -78,14 +78,22 @@ cfg-target-has-atomic-64: &cfg-target-has-atomic-64
|
||||
neon-unstable-fp8: &neon-unstable-fp8
|
||||
FnCall: [unstable, ['feature = "stdarch_neon_fp8"', 'issue = "none"']]
|
||||
|
||||
# all(test, target_endian = "little")
|
||||
all-test-little-endian: &all-test-little-endian
|
||||
FnCall: [all, [test, 'target_endian = "little"']]
|
||||
|
||||
# #[cfg(target_endian = "little")]
|
||||
little-endian: &little-endian
|
||||
cfg-little-endian: &cfg-little-endian
|
||||
FnCall: [cfg, ['target_endian = "little"']]
|
||||
|
||||
# #[cfg(target_endian = "big")]
|
||||
big-endian: &big-endian
|
||||
cfg-big-endian: &cfg-big-endian
|
||||
FnCall: [cfg, ['target_endian = "big"']]
|
||||
|
||||
# all(test, not(target_env = "msvc"), target_endian = "big")
|
||||
cfg-test-not-msvc-little-endian: &cfg-test-not-msvc-little-endian
|
||||
FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}, 'target_endian = "little"']]
|
||||
|
||||
intrinsics:
|
||||
- name: "vaddd_{type}"
|
||||
doc: Add
|
||||
@@ -174,12 +182,12 @@ intrinsics:
|
||||
- ['d_f64', 'f64']
|
||||
compose:
|
||||
- FnCall:
|
||||
- simd_extract!
|
||||
- 'vget_lane_{type[1]}'
|
||||
- - FnCall:
|
||||
- "vabd_{type[1]}"
|
||||
- - FnCall: ["vdup_n_{type[1]}", [a]]
|
||||
- FnCall: ["vdup_n_{type[1]}", [b]]
|
||||
- 0
|
||||
- - 0
|
||||
|
||||
- name: "vabd{type[0]}"
|
||||
doc: "Floating-point absolute difference"
|
||||
@@ -195,125 +203,28 @@ intrinsics:
|
||||
- ['h_f16', 'f16']
|
||||
compose:
|
||||
- FnCall:
|
||||
- simd_extract!
|
||||
- 'vget_lane_{type[1]}'
|
||||
- - FnCall:
|
||||
- "vabd_{type[1]}"
|
||||
- - FnCall: ["vdup_n_{type[1]}", [a]]
|
||||
- FnCall: ["vdup_n_{type[1]}", [b]]
|
||||
- 0
|
||||
- - 0
|
||||
|
||||
- name: "vabdl_high{neon_type[0].noq}"
|
||||
doc: Signed Absolute difference Long
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
|
||||
return_type: "{neon_type[1]}"
|
||||
attr: [*neon-stable]
|
||||
assert_instr: [sabdl2]
|
||||
attr:
|
||||
- *neon-stable
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sabdl2]]}]]
|
||||
safety: safe
|
||||
types:
|
||||
- [int8x16_t, int16x8_t, int8x8_t, uint8x8_t]
|
||||
compose:
|
||||
- Let:
|
||||
- c
|
||||
- "{neon_type[2]}"
|
||||
- FnCall:
|
||||
- simd_shuffle!
|
||||
- - a
|
||||
- a
|
||||
- [8, 9, 10, 11, 12, 13, 14, 15]
|
||||
- Let:
|
||||
- d
|
||||
- "{neon_type[2]}"
|
||||
- FnCall:
|
||||
- simd_shuffle!
|
||||
- - b
|
||||
- b
|
||||
- [8, 9, 10, 11, 12, 13, 14, 15]
|
||||
- Let:
|
||||
- e
|
||||
- "{neon_type[3]}"
|
||||
- FnCall:
|
||||
- simd_cast
|
||||
- - FnCall:
|
||||
- "vabd_{neon_type[0]}"
|
||||
- - c
|
||||
- d
|
||||
- FnCall:
|
||||
- simd_cast
|
||||
- - e
|
||||
|
||||
- name: "vabdl_high{neon_type[0].noq}"
|
||||
doc: Signed Absolute difference Long
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall:
|
||||
- stable
|
||||
- - 'feature = "neon_intrinsics"'
|
||||
- 'since = "1.59.0"'
|
||||
assert_instr: [sabdl2]
|
||||
safety: safe
|
||||
types:
|
||||
- [int16x8_t, int32x4_t, int16x4_t, uint16x4_t]
|
||||
compose:
|
||||
- Let:
|
||||
- c
|
||||
- "{neon_type[2]}"
|
||||
- FnCall:
|
||||
- simd_shuffle!
|
||||
- - a
|
||||
- a
|
||||
- [4, 5, 6, 7]
|
||||
- Let:
|
||||
- d
|
||||
- "{neon_type[2]}"
|
||||
- FnCall:
|
||||
- simd_shuffle!
|
||||
- - b
|
||||
- b
|
||||
- [4, 5, 6, 7]
|
||||
- Let:
|
||||
- e
|
||||
- "{neon_type[3]}"
|
||||
- FnCall:
|
||||
- simd_cast
|
||||
- - FnCall:
|
||||
- "vabd_{neon_type[0]}"
|
||||
- - c
|
||||
- d
|
||||
- FnCall:
|
||||
- simd_cast
|
||||
- - e
|
||||
|
||||
- name: "vabdl_high{neon_type[0].noq}"
|
||||
doc: Signed Absolute difference Long
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall:
|
||||
- stable
|
||||
- - 'feature = "neon_intrinsics"'
|
||||
- 'since = "1.59.0"'
|
||||
assert_instr: [sabdl2]
|
||||
safety: safe
|
||||
types:
|
||||
- [int32x4_t, int64x2_t, int32x2_t, uint32x2_t]
|
||||
compose:
|
||||
- Let:
|
||||
- c
|
||||
- "{neon_type[2]}"
|
||||
- FnCall:
|
||||
- simd_shuffle!
|
||||
- - a
|
||||
- a
|
||||
- [2, 3]
|
||||
- Let:
|
||||
- d
|
||||
- "{neon_type[2]}"
|
||||
- FnCall:
|
||||
- simd_shuffle!
|
||||
- - b
|
||||
- b
|
||||
- [2, 3]
|
||||
- Let: [c, FnCall: ['vget_high_{neon_type[0]}', [a]]]
|
||||
- Let: [d, FnCall: ['vget_high_{neon_type[0]}', [b]]]
|
||||
- Let:
|
||||
- e
|
||||
- "{neon_type[3]}"
|
||||
@@ -333,7 +244,7 @@ intrinsics:
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmeq]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [uint64x1_t, uint64x1_t]
|
||||
@@ -351,7 +262,7 @@ intrinsics:
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmeq]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [float64x1_t, uint64x1_t]
|
||||
@@ -365,19 +276,19 @@ intrinsics:
|
||||
return_type: "{type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["s_f32", "f32", "u32"]
|
||||
- ["d_f64", "f64", "u64"]
|
||||
compose:
|
||||
- FnCall:
|
||||
- simd_extract!
|
||||
- 'vget_lane_{type[2]}'
|
||||
- - FnCall:
|
||||
- "vceq_{type[1]}"
|
||||
- - FnCall: ["vdup_n_{type[1]}", [a]]
|
||||
- FnCall: ["vdup_n_{type[1]}", [b]]
|
||||
- '0'
|
||||
- - 0
|
||||
|
||||
|
||||
- name: "vceq{type[0]}"
|
||||
@@ -394,12 +305,12 @@ intrinsics:
|
||||
- ["h_f16", "f16", "u16"]
|
||||
compose:
|
||||
- FnCall:
|
||||
- simd_extract!
|
||||
- 'vget_lane_{type[2]}'
|
||||
- - FnCall:
|
||||
- "vceq_{type[1]}"
|
||||
- - FnCall: ["vdup_n_{type[1]}", [a]]
|
||||
- FnCall: ["vdup_n_{type[1]}", [b]]
|
||||
- '0'
|
||||
- - 0
|
||||
|
||||
- name: "vceqd_{type[0]}"
|
||||
doc: "Compare bitwise equal"
|
||||
@@ -407,7 +318,7 @@ intrinsics:
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmp]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["i64", "u64", "s64"]
|
||||
@@ -426,7 +337,7 @@ intrinsics:
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmtst]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [int64x1_t, uint64x1_t, 'i64x1', 'i64x1::new(0)']
|
||||
@@ -444,7 +355,7 @@ intrinsics:
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tst]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["i64", "u64", "s64"]
|
||||
@@ -463,7 +374,7 @@ intrinsics:
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [suqadd]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["s_s32", "i32", "u32"]
|
||||
@@ -481,19 +392,19 @@ intrinsics:
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [suqadd]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["b_s8", "i8", "u8", "s8"]
|
||||
- ["h_s16", "i16", "u16", "s16"]
|
||||
compose:
|
||||
- FnCall:
|
||||
- simd_extract!
|
||||
- 'vget_lane_{type[3]}'
|
||||
- - FnCall:
|
||||
- "vuqadd_{type[3]}"
|
||||
- - FnCall: ["vdup_n_{type[3]}", [a]]
|
||||
- FnCall: ["vdup_n_{type[2]}", [b]]
|
||||
- '0'
|
||||
- - '0'
|
||||
|
||||
- name: "vabs{neon_type.no}"
|
||||
doc: "Floating-point absolute value"
|
||||
@@ -501,7 +412,7 @@ intrinsics:
|
||||
return_type: "{neon_type}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fabs]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- float64x1_t
|
||||
@@ -515,7 +426,7 @@ intrinsics:
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmgt]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [int64x1_t, uint64x1_t]
|
||||
@@ -529,7 +440,7 @@ intrinsics:
|
||||
return_type: "{neon_type}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmhi]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- uint64x1_t
|
||||
@@ -543,7 +454,7 @@ intrinsics:
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmgt]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [float64x1_t, uint64x1_t]
|
||||
@@ -557,19 +468,19 @@ intrinsics:
|
||||
return_type: "{type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["s_f32", "f32", "u32"]
|
||||
- ["d_f64", "f64", "u64"]
|
||||
compose:
|
||||
- FnCall:
|
||||
- 'simd_extract!'
|
||||
- 'vget_lane_{type[2]}'
|
||||
- - FnCall:
|
||||
- "vcgt_{type[1]}"
|
||||
- - FnCall: ["vdup_n_{type[1]}", [a]]
|
||||
- FnCall: ["vdup_n_{type[1]}", [b]]
|
||||
- '0'
|
||||
- - '0'
|
||||
|
||||
|
||||
- name: "vcgt{type[0]}"
|
||||
@@ -586,12 +497,12 @@ intrinsics:
|
||||
- ["h_f16", "f16", "u16"]
|
||||
compose:
|
||||
- FnCall:
|
||||
- 'simd_extract!'
|
||||
- 'vget_lane_{type[2]}'
|
||||
- - FnCall:
|
||||
- "vcgt_{type[1]}"
|
||||
- - FnCall: ["vdup_n_{type[1]}", [a]]
|
||||
- FnCall: ["vdup_n_{type[1]}", [b]]
|
||||
- '0'
|
||||
- - '0'
|
||||
|
||||
- name: "vclt{neon_type[0].no}"
|
||||
doc: "Compare signed less than"
|
||||
@@ -599,7 +510,7 @@ intrinsics:
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmgt]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [int64x1_t, uint64x1_t]
|
||||
@@ -613,7 +524,7 @@ intrinsics:
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmge]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [int64x1_t, uint64x1_t]
|
||||
@@ -627,7 +538,7 @@ intrinsics:
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmge]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [float64x1_t, uint64x1_t]
|
||||
@@ -641,19 +552,19 @@ intrinsics:
|
||||
return_type: "{type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["s_f32", "f32", "u32"]
|
||||
- ["d_f64", "f64", "u64"]
|
||||
compose:
|
||||
- FnCall:
|
||||
- simd_extract!
|
||||
- 'vget_lane_{type[2]}'
|
||||
- - FnCall:
|
||||
- "vcle_{type[1]}"
|
||||
- - FnCall: ["vdup_n_{type[1]}", [a]]
|
||||
- FnCall: ["vdup_n_{type[1]}", [b]]
|
||||
- '0'
|
||||
- - '0'
|
||||
|
||||
|
||||
- name: "vcle{type[0]}"
|
||||
@@ -670,12 +581,12 @@ intrinsics:
|
||||
- ["h_f16", "f16", "u16"]
|
||||
compose:
|
||||
- FnCall:
|
||||
- simd_extract!
|
||||
- 'vget_lane_{type[2]}'
|
||||
- - FnCall:
|
||||
- "vcle_{type[1]}"
|
||||
- - FnCall: ["vdup_n_{type[1]}", [a]]
|
||||
- FnCall: ["vdup_n_{type[1]}", [b]]
|
||||
- '0'
|
||||
- - '0'
|
||||
|
||||
- name: "vcge{neon_type[0].no}"
|
||||
doc: "Compare signed greater than or equal"
|
||||
@@ -683,7 +594,7 @@ intrinsics:
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmge]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [int64x1_t, uint64x1_t]
|
||||
@@ -697,7 +608,7 @@ intrinsics:
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmge]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [int8x8_t, uint8x8_t, i8x8, 'i8x8::new(0, 0, 0, 0, 0, 0, 0, 0)']
|
||||
@@ -718,7 +629,7 @@ intrinsics:
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["i64", "u64"]
|
||||
@@ -735,7 +646,7 @@ intrinsics:
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmle]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [int8x8_t, uint8x8_t, i8x8, 'i8x8::new(0, 0, 0, 0, 0, 0, 0, 0)']
|
||||
@@ -759,7 +670,7 @@ intrinsics:
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmle]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [float32x2_t, uint32x2_t, f32x2, 'f32x2::new(0.0, 0.0)']
|
||||
@@ -779,18 +690,18 @@ intrinsics:
|
||||
return_type: "{type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["s_f32", "f32", "u32"]
|
||||
- ["d_f64", "f64", "u64"]
|
||||
compose:
|
||||
- FnCall:
|
||||
- simd_extract!
|
||||
- 'vget_lane_{type[2]}'
|
||||
- - FnCall:
|
||||
- "vclez_{type[1]}"
|
||||
- - FnCall: ["vdup_n_{type[1]}", [a]]
|
||||
- '0'
|
||||
- - '0'
|
||||
|
||||
- name: "vclez{type[0]}"
|
||||
doc: "Floating-point compare less than or equal to zero"
|
||||
@@ -806,11 +717,11 @@ intrinsics:
|
||||
- ["h_f16", "f16", "u16"]
|
||||
compose:
|
||||
- FnCall:
|
||||
- simd_extract!
|
||||
- 'vget_lane_{type[2]}'
|
||||
- - FnCall:
|
||||
- "vclez_{type[1]}"
|
||||
- - FnCall: ["vdup_n_{type[1]}", [a]]
|
||||
- '0'
|
||||
- - '0'
|
||||
|
||||
- name: "vcltz{neon_type[0].no}"
|
||||
doc: "Compare signed less than zero"
|
||||
@@ -818,7 +729,7 @@ intrinsics:
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmlt]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [int8x8_t, uint8x8_t, i8x8, 'i8x8::new(0, 0, 0, 0, 0, 0, 0, 0)']
|
||||
@@ -842,7 +753,7 @@ intrinsics:
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmlt]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [float32x2_t, uint32x2_t, f32x2, 'f32x2::new(0.0, 0.0)']
|
||||
@@ -862,18 +773,18 @@ intrinsics:
|
||||
return_type: "{type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["s_f32", "f32", "u32"]
|
||||
- ["d_f64", "f64", "u64"]
|
||||
compose:
|
||||
- FnCall:
|
||||
- simd_extract!
|
||||
- 'vget_lane_{type[2]}'
|
||||
- - FnCall:
|
||||
- "vcltz_{type[1]}"
|
||||
- - FnCall: ["vdup_n_{type[1]}", [a]]
|
||||
- '0'
|
||||
- - '0'
|
||||
|
||||
- name: "vcltz{type[0]}"
|
||||
doc: "Floating-point compare less than zero"
|
||||
@@ -889,11 +800,11 @@ intrinsics:
|
||||
- ["h_f16", "f16", "u16"]
|
||||
compose:
|
||||
- FnCall:
|
||||
- simd_extract!
|
||||
- 'vget_lane_{type[2]}'
|
||||
- - FnCall:
|
||||
- "vcltz_{type[1]}"
|
||||
- - FnCall: ["vdup_n_{type[1]}", [a]]
|
||||
- '0'
|
||||
- - '0'
|
||||
|
||||
- name: "vcltzd_s64"
|
||||
doc: "Compare less than zero"
|
||||
@@ -901,7 +812,7 @@ intrinsics:
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [asr]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["i64", "u64"]
|
||||
@@ -918,7 +829,7 @@ intrinsics:
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facgt]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [float64x1_t, uint64x1_t]
|
||||
@@ -936,7 +847,7 @@ intrinsics:
|
||||
return_type: "{type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facgt]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["s_f32", "f32", "u32", i32]
|
||||
@@ -975,7 +886,7 @@ intrinsics:
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facge]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [float64x1_t, uint64x1_t]
|
||||
@@ -993,7 +904,7 @@ intrinsics:
|
||||
return_type: "{type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facge]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["s_f32", "f32", "u32", i32]
|
||||
@@ -1033,7 +944,7 @@ intrinsics:
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facgt]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [float64x1_t, uint64x1_t]
|
||||
@@ -1047,7 +958,7 @@ intrinsics:
|
||||
return_type: "{type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facgt]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["s_f32", "f32", "u32"]
|
||||
@@ -1076,7 +987,7 @@ intrinsics:
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facge]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [float64x1_t, uint64x1_t]
|
||||
@@ -1090,7 +1001,7 @@ intrinsics:
|
||||
return_type: "{type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facge]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["s_f32", "f32", "u32"]
|
||||
@@ -1119,7 +1030,7 @@ intrinsics:
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [scvtf]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [int64x1_t, float64x1_t]
|
||||
@@ -1133,7 +1044,7 @@ intrinsics:
|
||||
return_type: "{type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [scvtf]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["s_f32", "i32", "f32", s32]
|
||||
@@ -1147,7 +1058,7 @@ intrinsics:
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ucvtf]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [uint64x1_t, float64x1_t]
|
||||
@@ -1161,7 +1072,7 @@ intrinsics:
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ucvtf]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["u32", "f32", "s_f32"]
|
||||
@@ -1176,7 +1087,7 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [scvtf, 'N = 2']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['1']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -1314,7 +1225,7 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [scvtf, 'N = 2']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['1']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -1340,7 +1251,7 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ucvtf, 'N = 2']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['1']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -1365,7 +1276,7 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ucvtf, 'N = 2']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['1']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -1389,7 +1300,7 @@ intrinsics:
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzs]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["f32", "i32", "s_s32_f32", "32"]
|
||||
@@ -1403,7 +1314,7 @@ intrinsics:
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzu]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["f32", "u32", "s_u32_f32"]
|
||||
@@ -1488,7 +1399,7 @@ intrinsics:
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtl]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [float32x2_t, float64x2_t]
|
||||
@@ -1500,28 +1411,20 @@ intrinsics:
|
||||
arguments: ["a: {neon_type[0]}"]
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtl2]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fcvtl2]]}]]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [float32x4_t, float64x2_t]
|
||||
compose:
|
||||
- Let:
|
||||
- b
|
||||
- float32x2_t
|
||||
- FnCall:
|
||||
- simd_shuffle!
|
||||
- - a
|
||||
- a
|
||||
- '[2, 3]'
|
||||
- FnCall: [simd_cast, [b]]
|
||||
- FnCall: [simd_cast, [{FnCall: ['vget_high_{neon_type[0]}', [a]]}]]
|
||||
|
||||
- name: "vcvt_high_f16_f32"
|
||||
doc: "Floating-point convert to lower precision"
|
||||
arguments: ["a: {neon_type[1]}", "b: {neon_type[2]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtn2]]}]]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fcvtn2]]}]]
|
||||
- *neon-stable-fp16
|
||||
- *target-not-arm64ec
|
||||
safety: safe
|
||||
@@ -1538,7 +1441,7 @@ intrinsics:
|
||||
arguments: ["a: {neon_type[1]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtl2]]}]]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fcvtl2]]}]]
|
||||
- *neon-stable-fp16
|
||||
- *target-not-arm64ec
|
||||
safety: safe
|
||||
@@ -1555,8 +1458,8 @@ intrinsics:
|
||||
arguments: ["a: {neon_type[0]}"]
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtn]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fcvtn]]}]]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [float64x2_t, float32x2_t]
|
||||
@@ -1568,25 +1471,24 @@ intrinsics:
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
|
||||
return_type: "{neon_type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtn2]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fcvtn2]]}]]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [float32x2_t, float64x2_t, float32x4_t]
|
||||
compose:
|
||||
- FnCall:
|
||||
- simd_shuffle!
|
||||
- vcombine_f32
|
||||
- - a
|
||||
- FnCall: [simd_cast, [b]]
|
||||
- '[0, 1, 2, 3]'
|
||||
- FnCall: [vcvt_f32_f64, [b]]
|
||||
|
||||
- name: "vcvtx_f32_f64"
|
||||
doc: "Floating-point convert to lower precision narrow, rounding to odd"
|
||||
arguments: ["a: {neon_type[0]}"]
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtxn]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fcvtxn]]}]]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [float64x2_t, float32x2_t]
|
||||
@@ -1603,34 +1505,33 @@ intrinsics:
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtxn]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["f64", "f32"]
|
||||
compose:
|
||||
- FnCall:
|
||||
- simd_extract!
|
||||
- 'vget_lane_{type[1]}'
|
||||
- - FnCall:
|
||||
- vcvtx_f32_f64
|
||||
- - FnCall: [vdupq_n_f64, [a]]
|
||||
- '0'
|
||||
- - '0'
|
||||
|
||||
- name: "vcvtx_high_f32_f64"
|
||||
doc: "Floating-point convert to lower precision narrow, rounding to odd"
|
||||
arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
|
||||
return_type: "{type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtxn2]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fcvtxn2]]}]]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [float32x2_t, float64x2_t, float32x4_t]
|
||||
compose:
|
||||
- FnCall:
|
||||
- simd_shuffle!
|
||||
- vcombine_f32
|
||||
- - a
|
||||
- FnCall: [vcvtx_f32_f64, [b]]
|
||||
- '[0, 1, 2, 3]'
|
||||
|
||||
- name: "vcvt{type[2]}"
|
||||
doc: "Floating-point convert to fixed-point, rounding toward zero"
|
||||
@@ -1639,7 +1540,7 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzs, 'N = 2']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['1']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -1662,7 +1563,7 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzs, 'N = 2']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['1']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -1756,7 +1657,7 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzu, 'N = 2']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['1']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -1779,7 +1680,7 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzu, 'N = 2']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['1']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -1801,7 +1702,7 @@ intrinsics:
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtas]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [float32x2_t, int32x2_t, _s32_f32]
|
||||
@@ -1842,7 +1743,7 @@ intrinsics:
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtas]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["f32", "i32", 's_s32_f32']
|
||||
@@ -1866,9 +1767,9 @@ intrinsics:
|
||||
- *target-not-arm64ec
|
||||
safety: safe
|
||||
types:
|
||||
- ["f16", "u16", 'h_u16_f16']
|
||||
- ["f16", "u32", 'h_u32_f16']
|
||||
- ["f16", "u64", 'h_u64_f16']
|
||||
|
||||
compose:
|
||||
- LLVMLink:
|
||||
name: "vcvta{type[2]}"
|
||||
@@ -1888,6 +1789,7 @@ intrinsics:
|
||||
- *target-not-arm64ec
|
||||
safety: safe
|
||||
types:
|
||||
- ["f16", "i16", 'h_s16_f16']
|
||||
- ["f16", "i32", 'h_s32_f16']
|
||||
- ["f16", "i64", 'h_s64_f16']
|
||||
compose:
|
||||
@@ -1898,44 +1800,13 @@ intrinsics:
|
||||
- link: "llvm.aarch64.neon.fcvtas.{type[1]}.{type[0]}"
|
||||
arch: aarch64,arm64ec
|
||||
|
||||
|
||||
- name: "vcvta{type[2]}"
|
||||
doc: "Floating-point convert to integer, rounding to nearest with ties to away"
|
||||
arguments: ["a: {type[0]}"]
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtas]]}]]
|
||||
- *neon-fp16
|
||||
- *neon-unstable-f16
|
||||
- *target-not-arm64ec
|
||||
safety: safe
|
||||
types:
|
||||
- ["f16", "i16", 'h_s16_f16', 's32']
|
||||
compose:
|
||||
- 'vcvtah_{type[3]}_f16(a) as i16'
|
||||
|
||||
- name: "vcvta{type[2]}"
|
||||
doc: "Floating-point convert to integer, rounding to nearest with ties to away"
|
||||
arguments: ["a: {type[0]}"]
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtau]]}]]
|
||||
- *neon-fp16
|
||||
- *neon-unstable-f16
|
||||
- *target-not-arm64ec
|
||||
safety: safe
|
||||
types:
|
||||
- ["f16", "u16", 'h_u16_f16', 'u32']
|
||||
compose:
|
||||
- 'vcvtah_{type[3]}_f16(a) as u16'
|
||||
|
||||
- name: "vcvta{type[2]}"
|
||||
doc: "Floating-point convert to integer, rounding to nearest with ties to away"
|
||||
arguments: ["a: {type[0]}"]
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtau]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["f32", "u32", 's_u32_f32']
|
||||
@@ -1953,7 +1824,7 @@ intrinsics:
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtns]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [float32x2_t, int32x2_t]
|
||||
@@ -1973,7 +1844,7 @@ intrinsics:
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtns]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["f32", "i32", 's_s32_f32']
|
||||
@@ -2038,6 +1909,7 @@ intrinsics:
|
||||
- *target-not-arm64ec
|
||||
safety: safe
|
||||
types:
|
||||
- ["f16", "i16", 'h']
|
||||
- ["f16", "i32", 'h']
|
||||
- ["f16", "i64", 'h']
|
||||
compose:
|
||||
@@ -2048,22 +1920,6 @@ intrinsics:
|
||||
- link: "llvm.aarch64.neon.fcvtns.{type[1]}.{type[0]}"
|
||||
arch: aarch64,arm64ec
|
||||
|
||||
- name: "vcvtn{type[2]}_{type[1]}_{type[0]}"
|
||||
doc: "Floating-point convert to integer, rounding to nearest with ties to even"
|
||||
arguments: ["a: {type[0]}"]
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtns]]}]]
|
||||
- *neon-fp16
|
||||
- *neon-unstable-f16
|
||||
- *target-not-arm64ec
|
||||
safety: safe
|
||||
types:
|
||||
- ["f16", "i16", 'h', 'i32']
|
||||
compose:
|
||||
- 'vcvtnh_{type[3]}_f16(a) as i16'
|
||||
|
||||
|
||||
- name: "vcvtn{type[2]}_{type[1]}_{type[0]}"
|
||||
doc: "Floating-point convert to unsigned integer, rounding to nearest with ties to even"
|
||||
arguments: ["a: {type[0]}"]
|
||||
@@ -2075,6 +1931,7 @@ intrinsics:
|
||||
- *target-not-arm64ec
|
||||
safety: safe
|
||||
types:
|
||||
- ["f16", "u16", 'h']
|
||||
- ["f16", "u32", 'h']
|
||||
- ["f16", "u64", 'h']
|
||||
compose:
|
||||
@@ -2085,28 +1942,13 @@ intrinsics:
|
||||
- link: "llvm.aarch64.neon.fcvtnu.{type[1]}.{type[0]}"
|
||||
arch: aarch64,arm64ec
|
||||
|
||||
- name: "vcvtn{type[2]}_{type[1]}_{type[0]}"
|
||||
doc: "Floating-point convert to unsigned integer, rounding to nearest with ties to even"
|
||||
arguments: ["a: {type[0]}"]
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtnu]]}]]
|
||||
- *neon-fp16
|
||||
- *neon-unstable-f16
|
||||
- *target-not-arm64ec
|
||||
safety: safe
|
||||
types:
|
||||
- ["f16", "u16", 'h', 'u32']
|
||||
compose:
|
||||
- 'vcvtnh_{type[3]}_f16(a) as u16'
|
||||
|
||||
- name: "vcvtm{neon_type[1].no}_{neon_type[0]}"
|
||||
doc: "Floating-point convert to signed integer, rounding toward minus infinity"
|
||||
arguments: ["a: {neon_type[0]}"]
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtms]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [float32x2_t, int32x2_t]
|
||||
@@ -2169,7 +2011,7 @@ intrinsics:
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtms]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["f32", "i32", 's_s32_f32']
|
||||
@@ -2187,7 +2029,7 @@ intrinsics:
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtps]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [float32x2_t, int32x2_t]
|
||||
@@ -2207,7 +2049,7 @@ intrinsics:
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtps]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["f32", "i32", 's_s32_f32']
|
||||
@@ -2225,7 +2067,7 @@ intrinsics:
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtnu]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [float32x2_t, uint32x2_t]
|
||||
@@ -2245,7 +2087,7 @@ intrinsics:
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtnu]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["f32", "u32", 's_u32_f32']
|
||||
@@ -2263,7 +2105,7 @@ intrinsics:
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtmu]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [float32x2_t, uint32x2_t]
|
||||
@@ -2283,7 +2125,7 @@ intrinsics:
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtmu]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["f32", "u32", s_u32_f32]
|
||||
@@ -2301,7 +2143,7 @@ intrinsics:
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtpu]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [float32x2_t, uint32x2_t]
|
||||
@@ -2321,7 +2163,7 @@ intrinsics:
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtpu]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["f32", "u32", s_u32_f32, 'i32']
|
||||
@@ -2390,6 +2232,7 @@ intrinsics:
|
||||
- *target-not-arm64ec
|
||||
safety: safe
|
||||
types:
|
||||
- ["f16", "i16", 'h']
|
||||
- ["f16", "i32", 'h']
|
||||
- ["f16", "i64", 'h']
|
||||
compose:
|
||||
@@ -2400,21 +2243,6 @@ intrinsics:
|
||||
- link: "llvm.aarch64.neon.fcvtps.{type[1]}.{type[0]}"
|
||||
arch: aarch64,arm64ec
|
||||
|
||||
- name: "vcvtp{type[2]}_{type[1]}_{type[0]}"
|
||||
doc: "Floating-point convert to integer, rounding to plus infinity"
|
||||
arguments: ["a: {type[0]}"]
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtps]]}]]
|
||||
- *neon-fp16
|
||||
- *neon-unstable-f16
|
||||
- *target-not-arm64ec
|
||||
safety: safe
|
||||
types:
|
||||
- ["f16", "i16", 'h', 'i32']
|
||||
compose:
|
||||
- 'vcvtph_{type[3]}_f16(a) as i16'
|
||||
|
||||
- name: "vcvtp{type[2]}_{type[1]}_{type[0]}"
|
||||
doc: "Floating-point convert to unsigned integer, rounding to plus infinity"
|
||||
arguments: ["a: {type[0]}"]
|
||||
@@ -2426,6 +2254,7 @@ intrinsics:
|
||||
- *target-not-arm64ec
|
||||
safety: safe
|
||||
types:
|
||||
- ["f16", "u16", 'h']
|
||||
- ["f16", "u32", 'h']
|
||||
- ["f16", "u64", 'h']
|
||||
compose:
|
||||
@@ -2436,21 +2265,6 @@ intrinsics:
|
||||
- link: "llvm.aarch64.neon.fcvtpu.{type[1]}.{type[0]}"
|
||||
arch: aarch64,arm64ec
|
||||
|
||||
- name: "vcvtp{type[2]}_{type[1]}_{type[0]}"
|
||||
doc: "Floating-point convert to unsigned integer, rounding to plus infinity"
|
||||
arguments: ["a: {type[0]}"]
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtpu]]}]]
|
||||
- *neon-fp16
|
||||
- *neon-unstable-f16
|
||||
- *target-not-arm64ec
|
||||
safety: safe
|
||||
types:
|
||||
- ["f16", "u16", 'h', 'u32']
|
||||
compose:
|
||||
- 'vcvtph_{type[3]}_f16(a) as u16'
|
||||
|
||||
- name: "vdup{neon_type.laneq_nox}"
|
||||
doc: "Set all vector lanes to the same value"
|
||||
arguments: ["a: {neon_type}"]
|
||||
@@ -2458,8 +2272,9 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [dup, 'N = 1']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['1']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
big_endian_inverse: true
|
||||
safety: safe
|
||||
types:
|
||||
- poly64x2_t
|
||||
@@ -2475,8 +2290,9 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [dup, 'N = 0']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['1']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
big_endian_inverse: true
|
||||
safety: safe
|
||||
types:
|
||||
- [poly64x1_t, poly64x2_t]
|
||||
@@ -2492,7 +2308,7 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 0']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['1']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -2509,7 +2325,7 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 0']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['1']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -2518,7 +2334,7 @@ intrinsics:
|
||||
- [float64x1_t, "f64"]
|
||||
compose:
|
||||
- FnCall: [static_assert!, ['N == 0']]
|
||||
- FnCall: [simd_extract!, [a, 'N as u32']]
|
||||
- FnCall: ['vget{neon_type[0].lane_nox}', [a], [N]]
|
||||
|
||||
- name: "vdup_laneq_{neon_type[0]}"
|
||||
doc: "Set all vector lanes to the same value"
|
||||
@@ -2527,7 +2343,7 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 1']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['1']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -2536,8 +2352,8 @@ intrinsics:
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [N, 1]]
|
||||
- FnCall:
|
||||
- "transmute::<{type[2]}, _>"
|
||||
- - FnCall: [simd_extract!, [a, 'N as u32']]
|
||||
- transmute
|
||||
- - FnCall: ['vget{neon_type[0].lane_nox}', [a], [N]]
|
||||
|
||||
- name: "vdup{type[2]}"
|
||||
doc: "Set all vector lanes to the same value"
|
||||
@@ -2546,7 +2362,7 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 1']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['1']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -2558,7 +2374,7 @@ intrinsics:
|
||||
- [float64x2_t, "f64", d_laneq_f64]
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [N, 1]]
|
||||
- FnCall: [simd_extract!, [a, 'N as u32']]
|
||||
- FnCall: ['vget{neon_type[0].lane_nox}', [a], [N]]
|
||||
|
||||
- name: "vdup{type[2]}"
|
||||
doc: "Set all vector lanes to the same value"
|
||||
@@ -2567,7 +2383,7 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 4']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['1']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -2579,7 +2395,7 @@ intrinsics:
|
||||
- [poly16x8_t, "p16", h_laneq_p16]
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [N, 3]]
|
||||
- FnCall: [simd_extract!, [a, 'N as u32']]
|
||||
- FnCall: ['vget{neon_type[0].lane_nox}', [a], [N]]
|
||||
|
||||
|
||||
- name: "vdup{type[2]}"
|
||||
@@ -2598,7 +2414,7 @@ intrinsics:
|
||||
- [float16x4_t, "f16", h_lane_f16]
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [N, 2]]
|
||||
- FnCall: [simd_extract!, [a, 'N as u32']]
|
||||
- FnCall: ['vget{neon_type[0].lane_nox}', [a], [N]]
|
||||
|
||||
|
||||
- name: "vdup{type[2]}"
|
||||
@@ -2617,7 +2433,7 @@ intrinsics:
|
||||
- [float16x8_t, "f16", h_laneq_f16]
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [N, 4]]
|
||||
- FnCall: [simd_extract!, [a, 'N as u32']]
|
||||
- FnCall: ['vget{neon_type[0].lane_nox}', [a], [N]]
|
||||
|
||||
|
||||
- name: "vdup{type[2]}"
|
||||
@@ -2627,7 +2443,7 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 8']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['1']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -2636,7 +2452,7 @@ intrinsics:
|
||||
- [poly8x16_t, "p8", b_laneq_p8]
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [N, 4]]
|
||||
- FnCall: [simd_extract!, [a, 'N as u32']]
|
||||
- FnCall: ['vget{neon_type[0].lane_nox}', [a], [N]]
|
||||
|
||||
- name: "vdup{type[2]}"
|
||||
doc: "Set all vector lanes to the same value"
|
||||
@@ -2645,7 +2461,7 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 2']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['1']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -2657,24 +2473,25 @@ intrinsics:
|
||||
- [float32x4_t, "f32", s_laneq_f32]
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [N, 2]]
|
||||
- FnCall: [simd_extract!, [a, 'N as u32']]
|
||||
- FnCall: ['vget{neon_type[0].lane_nox}', [a], [N]]
|
||||
|
||||
- name: "vext{neon_type[0].no}"
|
||||
- name: "vext{neon_type.no}"
|
||||
doc: "Extract vector from pair of vectors"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
arguments: ["a: {neon_type}", "b: {neon_type}"]
|
||||
return_type: "{neon_type}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ext, 'N = 1']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['2']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
big_endian_inverse: true
|
||||
safety: safe
|
||||
types:
|
||||
- [poly64x2_t, 'match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), }']
|
||||
- [float64x2_t, 'match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), }']
|
||||
- poly64x2_t
|
||||
- float64x2_t
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [N, 1]]
|
||||
- Identifier: ["{type[1]}", UnsafeSymbol]
|
||||
- FnCall: [simd_shuffle!, [a, b, '[N as u32, N as u32 + 1]']]
|
||||
|
||||
- name: "vmla{neon_type.no}"
|
||||
doc: "Floating-point multiply-add to accumulator"
|
||||
@@ -2682,7 +2499,7 @@ intrinsics:
|
||||
return_type: "{neon_type}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- float64x1_t
|
||||
@@ -2695,16 +2512,16 @@ intrinsics:
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smlal2]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [smlal2]]}]]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [int16x8_t, int8x16_t, int8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', '[8, 9, 10, 11, 12, 13, 14, 15]']
|
||||
- [int32x4_t, int16x8_t, int16x4_t, '[4, 5, 6, 7]', '[4, 5, 6, 7]']
|
||||
- [int64x2_t, int32x4_t, int32x2_t, '[2, 3]', '[2, 3]']
|
||||
- [int16x8_t, int8x16_t, int8x8_t]
|
||||
- [int32x4_t, int16x8_t, int16x4_t]
|
||||
- [int64x2_t, int32x4_t, int32x2_t]
|
||||
compose:
|
||||
- Let: [b, "{neon_type[2]}", {FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]}]
|
||||
- Let: [c, "{neon_type[2]}", {FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]}]
|
||||
- Let: [b, {FnCall: ['vget_high_{neon_type[1]}', [b]]}]
|
||||
- Let: [c, {FnCall: ['vget_high_{neon_type[1]}', [c]]}]
|
||||
- FnCall: ["vmlal_{neon_type[2]}", [a, b, c]]
|
||||
|
||||
- name: "vmlal_high_{neon_type[1]}"
|
||||
@@ -2712,22 +2529,16 @@ intrinsics:
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umlal2]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [umlal2]]}]]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [uint16x8_t, uint8x16_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]']
|
||||
- [uint32x4_t, uint16x8_t, uint16x4_t, '[4, 5, 6, 7]']
|
||||
- [uint64x2_t, uint32x4_t, uint32x2_t, '[2, 3]']
|
||||
- [uint16x8_t, uint8x16_t, uint8x8_t]
|
||||
- [uint32x4_t, uint16x8_t, uint16x4_t]
|
||||
- [uint64x2_t, uint32x4_t, uint32x2_t]
|
||||
compose:
|
||||
- Let:
|
||||
- b
|
||||
- "{neon_type[2]}"
|
||||
- FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]
|
||||
- Let:
|
||||
- c
|
||||
- "{neon_type[2]}"
|
||||
- FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]
|
||||
- Let: [b, {FnCall: ['vget_high_{neon_type[1]}', [b]]}]
|
||||
- Let: [c, {FnCall: ['vget_high_{neon_type[1]}', [c]]}]
|
||||
- FnCall: ["vmlal_{neon_type[1]}", [a, b, c]]
|
||||
|
||||
- name: "vmlsl_high_{neon_type[1]}"
|
||||
@@ -2735,22 +2546,16 @@ intrinsics:
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smlsl2]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [smlsl2]]}]]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [int16x8_t, int8x16_t, int8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]']
|
||||
- [int32x4_t, int16x8_t, int16x4_t, '[4, 5, 6, 7]']
|
||||
- [int64x2_t, int32x4_t, int32x2_t, '[2, 3]']
|
||||
- [int16x8_t, int8x16_t, int8x8_t]
|
||||
- [int32x4_t, int16x8_t, int16x4_t]
|
||||
- [int64x2_t, int32x4_t, int32x2_t]
|
||||
compose:
|
||||
- Let:
|
||||
- b
|
||||
- "{neon_type[2]}"
|
||||
- FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]
|
||||
- Let:
|
||||
- c
|
||||
- "{neon_type[2]}"
|
||||
- FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]
|
||||
- Let: [b, {FnCall: ['vget_high_{neon_type[1]}', [b]]}]
|
||||
- Let: [c, {FnCall: ['vget_high_{neon_type[1]}', [c]]}]
|
||||
- FnCall: ["vmlsl_{neon_type[1]}", [a, b, c]]
|
||||
|
||||
- name: "vmlsl_high_{neon_type[1]}"
|
||||
@@ -2758,44 +2563,38 @@ intrinsics:
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umlsl2]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [umlsl2]]}]]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [uint16x8_t, uint8x16_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]']
|
||||
- [uint32x4_t, uint16x8_t, uint16x4_t, '[4, 5, 6, 7]']
|
||||
- [uint64x2_t, uint32x4_t, uint32x2_t, '[2, 3]']
|
||||
- [uint16x8_t, uint8x16_t, uint8x8_t]
|
||||
- [uint32x4_t, uint16x8_t, uint16x4_t]
|
||||
- [uint64x2_t, uint32x4_t, uint32x2_t]
|
||||
compose:
|
||||
- Let: [b, "{neon_type[2]}", {FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]}]
|
||||
- Let: [c, "{neon_type[2]}", {FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]}]
|
||||
- Let: [b, {FnCall: ['vget_high_{neon_type[1]}', [b]]}]
|
||||
- Let: [c, {FnCall: ['vget_high_{neon_type[1]}', [c]]}]
|
||||
- FnCall: ["vmlsl_{neon_type[1]}", [a, b, c]]
|
||||
|
||||
- name: "vmovn_high{neon_type[1].noq}"
|
||||
doc: Extract narrow
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
|
||||
return_type: "{neon_type[2]}"
|
||||
attr: [*neon-stable]
|
||||
assert_instr: [xtn2]
|
||||
attr:
|
||||
- *neon-stable
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [xtn2]]}]]
|
||||
safety: safe
|
||||
types:
|
||||
- [int8x8_t, int16x8_t, int8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
|
||||
- [int16x4_t, int32x4_t, int16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]']
|
||||
- [int32x2_t, int64x2_t, int32x4_t, '[0, 1, 2, 3]']
|
||||
- [uint8x8_t, uint16x8_t, uint8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
|
||||
- [uint16x4_t, uint32x4_t, uint16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]']
|
||||
- [uint32x2_t, uint64x2_t, uint32x4_t, '[0, 1, 2, 3]']
|
||||
- [int8x8_t, int16x8_t, int8x16_t]
|
||||
- [int16x4_t, int32x4_t, int16x8_t]
|
||||
- [int32x2_t, int64x2_t, int32x4_t]
|
||||
- [uint8x8_t, uint16x8_t, uint8x16_t]
|
||||
- [uint16x4_t, uint32x4_t, uint16x8_t]
|
||||
- [uint32x2_t, uint64x2_t, uint32x4_t]
|
||||
compose:
|
||||
- Let:
|
||||
- c
|
||||
- "{neon_type[0]}"
|
||||
- FnCall:
|
||||
- simd_cast
|
||||
- - b
|
||||
- FnCall:
|
||||
- simd_shuffle!
|
||||
- 'vcombine_{neon_type[0]}'
|
||||
- - a
|
||||
- c
|
||||
- "{type[3]}"
|
||||
- FnCall: ['simd_cast', [b]]
|
||||
|
||||
- name: "vneg{neon_type.no}"
|
||||
doc: Negate
|
||||
@@ -2873,11 +2672,11 @@ intrinsics:
|
||||
- [i64, 'd_s64', 's64']
|
||||
compose:
|
||||
- FnCall:
|
||||
- 'simd_extract!'
|
||||
- 'vget_lane_{type[2]}'
|
||||
- - FnCall:
|
||||
- 'vqneg_{type[2]}'
|
||||
- - FnCall: ['vdup_n_{type[2]}', [a]]
|
||||
- 0
|
||||
- - 0
|
||||
|
||||
- name: "vqneg{neon_type[0].no}"
|
||||
doc: Signed saturating negate
|
||||
@@ -2954,12 +2753,12 @@ intrinsics:
|
||||
- "vdup_n_{type[2]}"
|
||||
- - b
|
||||
- FnCall:
|
||||
- 'simd_extract!'
|
||||
- 'vget_lane_{type[2]}'
|
||||
- - FnCall:
|
||||
- "vqsub_{type[2]}"
|
||||
- - a
|
||||
- b
|
||||
- "0"
|
||||
- - "0"
|
||||
|
||||
- name: "vqsub{type[3]}"
|
||||
doc: Saturating subtract
|
||||
@@ -2985,12 +2784,12 @@ intrinsics:
|
||||
- "vdup_n_{type[2]}"
|
||||
- - b
|
||||
- FnCall:
|
||||
- 'simd_extract!'
|
||||
- 'vget_lane_{type[2]}'
|
||||
- - FnCall:
|
||||
- "vqsub_{type[2]}"
|
||||
- - a
|
||||
- b
|
||||
- "0"
|
||||
- - "0"
|
||||
|
||||
- name: "vrbit{neon_type.no}"
|
||||
doc: Reverse bit order
|
||||
@@ -3439,12 +3238,12 @@ intrinsics:
|
||||
- "vdup_n_{type[0]}"
|
||||
- - b
|
||||
- FnCall:
|
||||
- simd_extract!
|
||||
- 'vget_lane_{type[0]}'
|
||||
- - FnCall:
|
||||
- "vqadd_{type[0]}"
|
||||
- - a
|
||||
- b
|
||||
- "0"
|
||||
- - "0"
|
||||
|
||||
- name: "vqadd{type[2]}"
|
||||
doc: Saturating add
|
||||
@@ -3470,12 +3269,12 @@ intrinsics:
|
||||
- "vdup_n_{type[0]}"
|
||||
- - b
|
||||
- FnCall:
|
||||
- simd_extract!
|
||||
- 'vget_lane_{type[0]}'
|
||||
- - FnCall:
|
||||
- "vqadd_{type[0]}"
|
||||
- - a
|
||||
- b
|
||||
- "0"
|
||||
- - "0"
|
||||
|
||||
- name: "vld1{neon_type[1].no}"
|
||||
doc: "Load multiple single-element structures to one, two, three, or four registers"
|
||||
@@ -3712,7 +3511,6 @@ intrinsics:
|
||||
return_type: "{neon_type[1]}"
|
||||
attr: [*neon-stable]
|
||||
assert_instr: [ld2]
|
||||
big_endian_inverse: false
|
||||
safety:
|
||||
unsafe: [neon]
|
||||
types:
|
||||
@@ -4059,7 +3857,6 @@ intrinsics:
|
||||
arguments: ["a: {type[0]}"]
|
||||
return_type: "{neon_type[1]}"
|
||||
attr: [*neon-stable]
|
||||
big_endian_inverse: false
|
||||
safety:
|
||||
unsafe: [neon]
|
||||
assert_instr: [ld3]
|
||||
@@ -4198,7 +3995,6 @@ intrinsics:
|
||||
return_type: "{neon_type[1]}"
|
||||
attr: [*neon-stable]
|
||||
assert_instr: [ld4]
|
||||
big_endian_inverse: false
|
||||
safety:
|
||||
unsafe: [neon]
|
||||
types:
|
||||
@@ -4306,7 +4102,6 @@ intrinsics:
|
||||
- *neon-stable
|
||||
static_defs:
|
||||
- "const LANE: i32"
|
||||
big_endian_inverse: false
|
||||
safety:
|
||||
unsafe: [neon]
|
||||
types:
|
||||
@@ -4356,7 +4151,6 @@ intrinsics:
|
||||
- *neon-stable
|
||||
static_defs:
|
||||
- "const LANE: i32"
|
||||
big_endian_inverse: false
|
||||
safety:
|
||||
unsafe: [neon]
|
||||
types:
|
||||
@@ -4480,7 +4274,7 @@ intrinsics:
|
||||
- Let:
|
||||
- "lane"
|
||||
- i64
|
||||
- FnCall: [simd_extract!, [val, 'LANE as u32']]
|
||||
- FnCall: ['vget{neon_type[1].lane_nox}', [val], [LANE]]
|
||||
- MethodCall:
|
||||
- "(*atomic_dst)"
|
||||
- store
|
||||
@@ -5152,45 +4946,35 @@ intrinsics:
|
||||
- name: "vmull_high{neon_type[0].noq}"
|
||||
doc: Signed multiply long
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
|
||||
return_type: "{neon_type[3]}"
|
||||
attr: [*neon-stable]
|
||||
assert_instr: [smull2]
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- *neon-stable
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [smull2]]}]]
|
||||
safety: safe
|
||||
types:
|
||||
- [int8x16_t, int8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', int16x8_t]
|
||||
- [int16x8_t, int16x4_t, '[4, 5, 6, 7]', int32x4_t]
|
||||
- [int32x4_t, int32x2_t, '[2, 3]', int64x2_t]
|
||||
- [int8x16_t, int16x8_t]
|
||||
- [int16x8_t, int32x4_t]
|
||||
- [int32x4_t, int64x2_t]
|
||||
compose:
|
||||
- Let:
|
||||
- a
|
||||
- "{neon_type[1]}"
|
||||
- FnCall: [simd_shuffle!, [a, a, "{type[2]}"]]
|
||||
- Let:
|
||||
- b
|
||||
- "{neon_type[1]}"
|
||||
- FnCall: [simd_shuffle!, [b, b, "{type[2]}"]]
|
||||
- Let: [a, {FnCall: ['vget_high_{neon_type[0]}', [a]]}]
|
||||
- Let: [b, {FnCall: ['vget_high_{neon_type[0]}', [b]]}]
|
||||
- FnCall: ["vmull_{neon_type[0]}", [a, b]]
|
||||
|
||||
- name: "vmull_high{neon_type[0].noq}"
|
||||
doc: "Unsigned multiply long"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
|
||||
return_type: "{neon_type[3]}"
|
||||
attr: [*neon-stable]
|
||||
assert_instr: [umull2]
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- *neon-stable
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [umull2]]}]]
|
||||
safety: safe
|
||||
types:
|
||||
- [uint8x16_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', uint16x8_t]
|
||||
- [uint16x8_t, uint16x4_t, '[4, 5, 6, 7]', uint32x4_t]
|
||||
- [uint32x4_t, uint32x2_t, '[2, 3]', uint64x2_t]
|
||||
- [uint8x16_t, uint16x8_t]
|
||||
- [uint16x8_t, uint32x4_t]
|
||||
- [uint32x4_t, uint64x2_t]
|
||||
compose:
|
||||
- Let:
|
||||
- a
|
||||
- "{neon_type[1]}"
|
||||
- FnCall: [simd_shuffle!, [a, a, "{type[2]}"]]
|
||||
- Let:
|
||||
- b
|
||||
- "{neon_type[1]}"
|
||||
- FnCall: [simd_shuffle!, [b, b, "{type[2]}"]]
|
||||
- Let: [a, {FnCall: ['vget_high_{neon_type[0]}', [a]]}]
|
||||
- Let: [b, {FnCall: ['vget_high_{neon_type[0]}', [b]]}]
|
||||
- FnCall: ["vmull_{neon_type[0]}", [a, b]]
|
||||
|
||||
- name: "vmull_p64"
|
||||
@@ -5216,22 +5000,16 @@ intrinsics:
|
||||
- name: "vmull_high{neon_type[0].noq}"
|
||||
doc: "Polynomial multiply long"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
|
||||
return_type: "{neon_type[3]}"
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- *neon-stable
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [pmull2]]}]]
|
||||
safety: safe
|
||||
assert_instr: [pmull2]
|
||||
types:
|
||||
- [poly8x16_t, poly8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', poly16x8_t]
|
||||
- [poly8x16_t, poly16x8_t]
|
||||
compose:
|
||||
- Let:
|
||||
- a
|
||||
- "{neon_type[1]}"
|
||||
- FnCall: [simd_shuffle!, [a, a, "{type[2]}"]]
|
||||
- Let:
|
||||
- b
|
||||
- "{neon_type[1]}"
|
||||
- FnCall: [simd_shuffle!, [b, b, "{type[2]}"]]
|
||||
- Let: [a, {FnCall: ['vget_high_{neon_type[0]}', [a]]}]
|
||||
- Let: [b, {FnCall: ['vget_high_{neon_type[0]}', [b]]}]
|
||||
- FnCall: ["vmull_{neon_type[0]}", [a, b]]
|
||||
|
||||
- name: "vmull_high{neon_type[0].noq}"
|
||||
@@ -5241,15 +5019,15 @@ intrinsics:
|
||||
attr:
|
||||
- *neon-aes
|
||||
- *neon-stable
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [pmull2]]}]]
|
||||
safety: safe
|
||||
assert_instr: [pmull2]
|
||||
types:
|
||||
- [poly64x2_t, "p128"]
|
||||
compose:
|
||||
- FnCall:
|
||||
- "vmull_{neon_type[0]}"
|
||||
- - FnCall: [simd_extract!, [a, '1']]
|
||||
- FnCall: [simd_extract!, [b, '1']]
|
||||
- - FnCall: ['vget{neon_type[0].lane_nox}', [a], [1]]
|
||||
- FnCall: ['vget{neon_type[0].lane_nox}', [b], [1]]
|
||||
|
||||
- name: "vmulx{neon_type.no}"
|
||||
doc: Floating-point multiply extended
|
||||
@@ -5348,11 +5126,8 @@ intrinsics:
|
||||
- vmulx_f64
|
||||
- - a
|
||||
- FnCall:
|
||||
- 'transmute::<f64, _>'
|
||||
- - FnCall:
|
||||
- "simd_extract!"
|
||||
- - b
|
||||
- 'LANE as u32'
|
||||
- 'transmute'
|
||||
- - FnCall: ['vget{neon_type.lane_nox}', [b], [LANE]]
|
||||
|
||||
- name: "vmulx{type[0]}"
|
||||
doc: Floating-point multiply extended
|
||||
@@ -5371,11 +5146,7 @@ intrinsics:
|
||||
- FnCall:
|
||||
- "vmulx{type[3]}"
|
||||
- - a
|
||||
- FnCall:
|
||||
- "simd_shuffle!"
|
||||
- - b
|
||||
- b
|
||||
- "{type[4]}"
|
||||
- FnCall: ['vdup{type[0]}', [b], [LANE]]
|
||||
|
||||
- name: "vmulx{type[0]}"
|
||||
doc: Floating-point multiply extended
|
||||
@@ -5388,16 +5159,13 @@ intrinsics:
|
||||
static_defs: ["const LANE: i32"]
|
||||
safety: safe
|
||||
types:
|
||||
- ["d_lane_f64", "f64", float64x1_t, "d_f64", 'LANE as u32']
|
||||
- ["d_lane_f64", "f64", float64x1_t, "d_f64"]
|
||||
compose:
|
||||
- FnCall: [static_assert!, ['LANE == 0']]
|
||||
- FnCall:
|
||||
- "vmulx{type[3]}"
|
||||
- - a
|
||||
- FnCall:
|
||||
- "simd_extract!"
|
||||
- - b
|
||||
- "{type[4]}"
|
||||
- FnCall: ['vget{neon_type[2].lane_nox}', [b], [LANE]]
|
||||
|
||||
- name: "vmulx_laneq_f64"
|
||||
doc: Floating-point multiply extended
|
||||
@@ -5417,11 +5185,8 @@ intrinsics:
|
||||
- vmulx_f64
|
||||
- - a
|
||||
- FnCall:
|
||||
- 'transmute::<f64, _>'
|
||||
- - FnCall:
|
||||
- "simd_extract!"
|
||||
- - b
|
||||
- 'LANE as u32'
|
||||
- 'transmute'
|
||||
- - FnCall: ['vget{neon_type[1].lane_nox}', [b], [LANE]]
|
||||
|
||||
- name: "vmulx{type[0]}"
|
||||
doc: Floating-point multiply extended
|
||||
@@ -5434,21 +5199,17 @@ intrinsics:
|
||||
static_defs: ["const LANE: i32"]
|
||||
safety: safe
|
||||
types:
|
||||
- ['_lane_f32', float32x2_t, float32x2_t, '1', '_f32', '[LANE as u32; 2]']
|
||||
- ['_laneq_f32', float32x2_t, float32x4_t, '2', '_f32', '[LANE as u32; 2]']
|
||||
- ['q_lane_f32', float32x4_t, float32x2_t, '1', 'q_f32', '[LANE as u32; 4]']
|
||||
- ['q_laneq_f32', float32x4_t, float32x4_t, '2', 'q_f32', '[LANE as u32; 4]']
|
||||
- ['q_laneq_f64', float64x2_t, float64x2_t, '1', 'q_f64', '[LANE as u32; 2]']
|
||||
- ['_lane_f32', float32x2_t, float32x2_t, '1', '_f32']
|
||||
- ['_laneq_f32', float32x2_t, float32x4_t, '2', '_f32']
|
||||
- ['q_lane_f32', float32x4_t, float32x2_t, '1', 'q_f32']
|
||||
- ['q_laneq_f32', float32x4_t, float32x4_t, '2', 'q_f32']
|
||||
- ['q_laneq_f64', float64x2_t, float64x2_t, '1', 'q_f64']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, ['LANE', "{type[3]}"]]
|
||||
- FnCall:
|
||||
- "vmulx{type[4]}"
|
||||
- - a
|
||||
- FnCall:
|
||||
- "simd_shuffle!"
|
||||
- - b
|
||||
- b
|
||||
- "{type[5]}"
|
||||
- FnCall: ['vdup{type[0]}', [b], [LANE]]
|
||||
|
||||
|
||||
- name: "vmulx{type[0]}"
|
||||
@@ -5464,20 +5225,16 @@ intrinsics:
|
||||
static_defs: ["const LANE: i32"]
|
||||
safety: safe
|
||||
types:
|
||||
- ['_lane_f16', float16x4_t, float16x4_t, '2', '_f16', '[LANE as u32; 4]']
|
||||
- ['_laneq_f16', float16x4_t, float16x8_t, '3', '_f16', '[LANE as u32; 4]']
|
||||
- ['q_lane_f16', float16x8_t, float16x4_t, '2', 'q_f16', '[LANE as u32; 8]']
|
||||
- ['q_laneq_f16', float16x8_t, float16x8_t, '3', 'q_f16', '[LANE as u32; 8]']
|
||||
- ['_lane_f16', float16x4_t, float16x4_t, '2', '_f16']
|
||||
- ['_laneq_f16', float16x4_t, float16x8_t, '3', '_f16']
|
||||
- ['q_lane_f16', float16x8_t, float16x4_t, '2', 'q_f16']
|
||||
- ['q_laneq_f16', float16x8_t, float16x8_t, '3', 'q_f16']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, ['LANE', "{type[3]}"]]
|
||||
- FnCall:
|
||||
- "vmulx{type[4]}"
|
||||
- - a
|
||||
- FnCall:
|
||||
- "simd_shuffle!"
|
||||
- - b
|
||||
- b
|
||||
- "{type[5]}"
|
||||
- FnCall: ['vdup{type[0]}', [b], [LANE]]
|
||||
|
||||
|
||||
- name: "vmulx{type[0]}"
|
||||
@@ -5491,18 +5248,15 @@ intrinsics:
|
||||
static_defs: ["const LANE: i32"]
|
||||
safety: safe
|
||||
types:
|
||||
- ['s_lane_f32', f32, float32x2_t, '1', 's_f32', 'LANE as u32']
|
||||
- ['s_laneq_f32', f32, float32x4_t, '2', 's_f32', 'LANE as u32']
|
||||
- ['d_laneq_f64', f64, float64x2_t, '1', 'd_f64', 'LANE as u32']
|
||||
- ['s_lane_f32', f32, float32x2_t, '1', 's_f32']
|
||||
- ['s_laneq_f32', f32, float32x4_t, '2', 's_f32']
|
||||
- ['d_laneq_f64', f64, float64x2_t, '1', 'd_f64']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, ['LANE', "{type[3]}"]]
|
||||
- FnCall:
|
||||
- "vmulx{type[4]}"
|
||||
- - a
|
||||
- FnCall:
|
||||
- "simd_extract!"
|
||||
- - b
|
||||
- "{type[5]}"
|
||||
- FnCall: ['vget{neon_type[2].lane_nox}', [b], [LANE]]
|
||||
|
||||
|
||||
- name: "vmulx{type[0]}"
|
||||
@@ -5525,10 +5279,7 @@ intrinsics:
|
||||
- FnCall:
|
||||
- "vmulx{type[4]}"
|
||||
- - a
|
||||
- FnCall:
|
||||
- "simd_extract!"
|
||||
- - b
|
||||
- "{type[5]}"
|
||||
- FnCall: ['vget{neon_type[2].lane_nox}', [b], [LANE]]
|
||||
|
||||
|
||||
- name: "vmulx{neon_type[0].N}"
|
||||
@@ -5822,18 +5573,18 @@ intrinsics:
|
||||
doc: Signed Subtract Wide
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
attr: [*neon-stable]
|
||||
assert_instr: [ssubw2]
|
||||
attr:
|
||||
- *neon-stable
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [ssubw2]]}]]
|
||||
safety: safe
|
||||
types:
|
||||
- [int16x8_t, int8x16_t, int8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]']
|
||||
- [int32x4_t, int16x8_t, int16x4_t, '[4, 5, 6, 7]']
|
||||
- [int64x2_t, int32x4_t, int32x2_t, '[2, 3]']
|
||||
- [int16x8_t, int8x16_t]
|
||||
- [int32x4_t, int16x8_t]
|
||||
- [int64x2_t, int32x4_t]
|
||||
compose:
|
||||
- Let:
|
||||
- c
|
||||
- "{neon_type[2]}"
|
||||
- FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]
|
||||
- FnCall: ['vget_high_{neon_type[1]}', [b]]
|
||||
- FnCall:
|
||||
- simd_sub
|
||||
- - a
|
||||
@@ -5843,18 +5594,18 @@ intrinsics:
|
||||
doc: Unsigned Subtract Wide
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
attr: [*neon-stable]
|
||||
assert_instr: [usubw2]
|
||||
attr:
|
||||
- *neon-stable
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [usubw2]]}]]
|
||||
safety: safe
|
||||
types:
|
||||
- [uint16x8_t, uint8x16_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]']
|
||||
- [uint32x4_t, uint16x8_t, uint16x4_t, '[4, 5, 6, 7]']
|
||||
- [uint64x2_t, uint32x4_t, uint32x2_t, '[2, 3]']
|
||||
- [uint16x8_t, uint8x16_t]
|
||||
- [uint32x4_t, uint16x8_t]
|
||||
- [uint64x2_t, uint32x4_t]
|
||||
compose:
|
||||
- Let:
|
||||
- c
|
||||
- "{neon_type[2]}"
|
||||
- FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]
|
||||
- FnCall: ['vget_high_{neon_type[1]}', [b]]
|
||||
- FnCall:
|
||||
- simd_sub
|
||||
- - a
|
||||
@@ -5864,61 +5615,47 @@ intrinsics:
|
||||
doc: "Signed Subtract Long"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
|
||||
return_type: "{neon_type[1]}"
|
||||
attr: [*neon-stable]
|
||||
assert_instr: [ssubl2]
|
||||
attr:
|
||||
- *neon-stable
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [ssubl2]]}]]
|
||||
safety: safe
|
||||
types:
|
||||
- [int8x16_t, int16x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', int8x8_t]
|
||||
- [int16x8_t, int32x4_t, '[4, 5, 6, 7]', int16x4_t]
|
||||
- [int32x4_t, int64x2_t, '[2, 3]', int32x2_t]
|
||||
- [int8x16_t, int16x8_t]
|
||||
- [int32x4_t, int64x2_t]
|
||||
- [int16x8_t, int32x4_t]
|
||||
compose:
|
||||
- Let:
|
||||
- c
|
||||
- "{neon_type[3]}"
|
||||
- FnCall: [simd_shuffle!, [a, a, "{type[2]}"]]
|
||||
- "{neon_type[1]}"
|
||||
- FnCall: [simd_cast, [{FnCall: ['vget_high_{neon_type[0]}', [a]]}]]
|
||||
- Let:
|
||||
- d
|
||||
- "{neon_type[1]}"
|
||||
- FnCall: [simd_cast, [c]]
|
||||
- Let:
|
||||
- e
|
||||
- "{neon_type[3]}"
|
||||
- FnCall: [simd_shuffle!, [b, b, "{type[2]}"]]
|
||||
- Let:
|
||||
- f
|
||||
- "{neon_type[1]}"
|
||||
- FnCall: [simd_cast, [e]]
|
||||
- FnCall: [simd_sub, [d, f]]
|
||||
- FnCall: [simd_cast, [{FnCall: ['vget_high_{neon_type[0]}', [b]]}]]
|
||||
- FnCall: [simd_sub, [c, d]]
|
||||
|
||||
- name: "vsubl_high{neon_type[0].noq}"
|
||||
doc: "Unsigned Subtract Long"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
|
||||
return_type: "{neon_type[1]}"
|
||||
attr: [*neon-stable]
|
||||
assert_instr: [usubl2]
|
||||
attr:
|
||||
- *neon-stable
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [usubl2]]}]]
|
||||
safety: safe
|
||||
types:
|
||||
- [uint8x16_t, uint16x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', uint8x8_t]
|
||||
- [uint16x8_t, uint32x4_t, '[4, 5, 6, 7]', uint16x4_t]
|
||||
- [uint32x4_t, uint64x2_t, '[2, 3]', uint32x2_t]
|
||||
- [uint8x16_t, uint16x8_t]
|
||||
- [uint16x8_t, uint32x4_t]
|
||||
- [uint32x4_t, uint64x2_t]
|
||||
compose:
|
||||
- Let:
|
||||
- c
|
||||
- "{neon_type[3]}"
|
||||
- FnCall: [simd_shuffle!, [a, a, "{type[2]}"]]
|
||||
- "{neon_type[1]}"
|
||||
- FnCall: [simd_cast, [{FnCall: ['vget_high_{neon_type[0]}', [a]]}]]
|
||||
- Let:
|
||||
- d
|
||||
- "{neon_type[1]}"
|
||||
- FnCall: [simd_cast, [c]]
|
||||
- Let:
|
||||
- e
|
||||
- "{neon_type[3]}"
|
||||
- FnCall: [simd_shuffle!, [b, b, "{type[2]}"]]
|
||||
- Let:
|
||||
- f
|
||||
- "{neon_type[1]}"
|
||||
- FnCall: [simd_cast, [e]]
|
||||
- FnCall: [simd_sub, [d, f]]
|
||||
- FnCall: [simd_cast, [{FnCall: ['vget_high_{neon_type[0]}', [b]]}]]
|
||||
- FnCall: [simd_sub, [c, d]]
|
||||
|
||||
- name: "vbcax{neon_type.no}"
|
||||
doc: Bit clear and exclusive OR
|
||||
@@ -5971,6 +5708,7 @@ intrinsics:
|
||||
- *neon-unstable-fcma
|
||||
assert_instr: [fcadd]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- float32x2_t
|
||||
- float32x4_t
|
||||
@@ -5991,6 +5729,7 @@ intrinsics:
|
||||
- *neon-unstable-fcma
|
||||
assert_instr: [fcadd]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- float32x2_t
|
||||
- float32x4_t
|
||||
@@ -6013,6 +5752,7 @@ intrinsics:
|
||||
- *target-not-arm64ec
|
||||
assert_instr: [fcadd]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- float16x4_t
|
||||
- float16x8_t
|
||||
@@ -6034,6 +5774,7 @@ intrinsics:
|
||||
- *target-not-arm64ec
|
||||
assert_instr: [fcadd]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- float16x4_t
|
||||
- float16x8_t
|
||||
@@ -6053,6 +5794,7 @@ intrinsics:
|
||||
- *neon-unstable-fcma
|
||||
assert_instr: [fcmla]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- float32x2_t
|
||||
- float32x4_t
|
||||
@@ -6075,6 +5817,7 @@ intrinsics:
|
||||
- *target-not-arm64ec
|
||||
assert_instr: [fcmla]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- float16x4_t
|
||||
- float16x8_t
|
||||
@@ -6094,6 +5837,7 @@ intrinsics:
|
||||
- *neon-unstable-fcma
|
||||
assert_instr: [fcmla]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- float32x2_t
|
||||
- float32x4_t
|
||||
@@ -6116,6 +5860,7 @@ intrinsics:
|
||||
- *target-not-arm64ec
|
||||
assert_instr: [fcmla]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- float16x4_t
|
||||
- float16x8_t
|
||||
@@ -6135,6 +5880,7 @@ intrinsics:
|
||||
- *neon-unstable-fcma
|
||||
assert_instr: [fcmla]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- float32x2_t
|
||||
- float32x4_t
|
||||
@@ -6158,6 +5904,7 @@ intrinsics:
|
||||
- *target-not-arm64ec
|
||||
assert_instr: [fcmla]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- float16x4_t
|
||||
- float16x8_t
|
||||
@@ -6180,14 +5927,13 @@ intrinsics:
|
||||
static_defs: ["const LANE: i32"]
|
||||
safety: safe
|
||||
types:
|
||||
- [float32x2_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]']
|
||||
- [float32x4_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
|
||||
- [float32x2_t, float32x4_t, '']
|
||||
- [float32x4_t, float32x4_t, 'q']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, 1]]
|
||||
- Let:
|
||||
- c
|
||||
- "{neon_type[0]}"
|
||||
- FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
|
||||
- Let: [c, {FnCall: [vreinterpretq_u64_f32, [c]]}]
|
||||
- Let: [c, {FnCall: ['vdup{type[2]}_laneq_u64', [c], [LANE]]}]
|
||||
- Let: [c, {FnCall: ['vreinterpret{type[2]}_f32_u64', [c]]}]
|
||||
- FnCall: ["vcmla{neon_type[0].no}", [a, b, c]]
|
||||
|
||||
- name: "vcmla{neon_type[0].laneq_nox}"
|
||||
@@ -6204,14 +5950,13 @@ intrinsics:
|
||||
static_defs: ["const LANE: i32"]
|
||||
safety: safe
|
||||
types:
|
||||
- [float16x4_t, float16x8_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
|
||||
- [float16x8_t, float16x8_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
|
||||
- [float16x4_t, float16x8_t, '']
|
||||
- [float16x8_t, float16x8_t, 'q']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, 2]]
|
||||
- Let:
|
||||
- c
|
||||
- "{neon_type[0]}"
|
||||
- FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
|
||||
- Let: [c, {FnCall: [vreinterpretq_u32_f16, [c]]}]
|
||||
- Let: [c, {FnCall: ['vdup{type[2]}_laneq_u32', [c], [LANE]]}]
|
||||
- Let: [c, {FnCall: ['vreinterpret{type[2]}_f16_u32', [c]]}]
|
||||
- FnCall: ["vcmla{neon_type[0].no}", [a, b, c]]
|
||||
|
||||
- name: "vcmla{neon_type[0].rot90_laneq}"
|
||||
@@ -6226,14 +5971,13 @@ intrinsics:
|
||||
static_defs: ["const LANE: i32"]
|
||||
safety: safe
|
||||
types:
|
||||
- [float32x2_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]']
|
||||
- [float32x4_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
|
||||
- [float32x2_t, float32x4_t, '']
|
||||
- [float32x4_t, float32x4_t, 'q']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, 1]]
|
||||
- Let:
|
||||
- c
|
||||
- "{neon_type[0]}"
|
||||
- FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
|
||||
- Let: [c, {FnCall: [vreinterpretq_u64_f32, [c]]}]
|
||||
- Let: [c, {FnCall: ['vdup{type[2]}_laneq_u64', [c], [LANE]]}]
|
||||
- Let: [c, {FnCall: ['vreinterpret{type[2]}_f32_u64', [c]]}]
|
||||
- FnCall: ["vcmla{neon_type[0].rot90}", [a, b, c]]
|
||||
|
||||
- name: "vcmla{neon_type[0].rot90_laneq}"
|
||||
@@ -6250,14 +5994,13 @@ intrinsics:
|
||||
static_defs: ["const LANE: i32"]
|
||||
safety: safe
|
||||
types:
|
||||
- [float16x4_t, float16x8_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
|
||||
- [float16x8_t, float16x8_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
|
||||
- [float16x4_t, float16x8_t, '']
|
||||
- [float16x8_t, float16x8_t, 'q']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, 2]]
|
||||
- Let:
|
||||
- c
|
||||
- "{neon_type[0]}"
|
||||
- FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
|
||||
- Let: [c, {FnCall: [vreinterpretq_u32_f16, [c]]}]
|
||||
- Let: [c, {FnCall: ['vdup{type[2]}_laneq_u32', [c], [LANE]]}]
|
||||
- Let: [c, {FnCall: ['vreinterpret{type[2]}_f16_u32', [c]]}]
|
||||
- FnCall: ["vcmla{neon_type[0].rot90}", [a, b, c]]
|
||||
|
||||
- name: "vcmla{neon_type[0].rot90_lane}"
|
||||
@@ -6272,14 +6015,13 @@ intrinsics:
|
||||
static_defs: ["const LANE: i32"]
|
||||
safety: safe
|
||||
types:
|
||||
- [float32x2_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]']
|
||||
- [float32x4_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
|
||||
- [float32x2_t, float32x2_t, '']
|
||||
- [float32x4_t, float32x2_t, 'q']
|
||||
compose:
|
||||
- FnCall: [static_assert!, ['LANE == 0']]
|
||||
- Let:
|
||||
- c
|
||||
- "{neon_type[0]}"
|
||||
- FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
|
||||
- Let: [c, {FnCall: [vreinterpret_u64_f32, [c]]}]
|
||||
- Let: [c, {FnCall: ['vdup{type[2]}_lane_u64', [c], [LANE]]}]
|
||||
- Let: [c, {FnCall: ['vreinterpret{type[2]}_f32_u64', [c]]}]
|
||||
- FnCall: ["vcmla{neon_type[0].rot90}", [a, b, c]]
|
||||
|
||||
- name: "vcmla{neon_type[0].rot90_lane}"
|
||||
@@ -6296,14 +6038,13 @@ intrinsics:
|
||||
static_defs: ["const LANE: i32"]
|
||||
safety: safe
|
||||
types:
|
||||
- [float16x4_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
|
||||
- [float16x8_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
|
||||
- [float16x4_t, float16x4_t, '']
|
||||
- [float16x8_t, float16x4_t, 'q']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, 1]]
|
||||
- Let:
|
||||
- c
|
||||
- "{neon_type[0]}"
|
||||
- FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
|
||||
- Let: [c, {FnCall: [vreinterpret_u32_f16, [c]]}]
|
||||
- Let: [c, {FnCall: ['vdup{type[2]}_lane_u32', [c], [LANE]]}]
|
||||
- Let: [c, {FnCall: ['vreinterpret{type[2]}_f16_u32', [c]]}]
|
||||
- FnCall: ["vcmla{neon_type[0].rot90}", [a, b, c]]
|
||||
|
||||
- name: "vcmla{neon_type.rot180}"
|
||||
@@ -6315,6 +6056,7 @@ intrinsics:
|
||||
- *neon-unstable-fcma
|
||||
assert_instr: [fcmla]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- float32x2_t
|
||||
- float32x4_t
|
||||
@@ -6338,6 +6080,7 @@ intrinsics:
|
||||
- *target-not-arm64ec
|
||||
assert_instr: [fcmla]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- float16x4_t
|
||||
- float16x8_t
|
||||
@@ -6361,14 +6104,13 @@ intrinsics:
|
||||
static_defs: ["const LANE: i32"]
|
||||
safety: safe
|
||||
types:
|
||||
- [float32x2_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]']
|
||||
- [float32x4_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
|
||||
- [float32x2_t, float32x4_t, '']
|
||||
- [float32x4_t, float32x4_t, 'q']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, 1]]
|
||||
- Let:
|
||||
- c
|
||||
- "{neon_type[0]}"
|
||||
- FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
|
||||
- Let: [c, {FnCall: [vreinterpretq_u64_f32, [c]]}]
|
||||
- Let: [c, {FnCall: ['vdup{type[2]}_laneq_u64', [c], [LANE]]}]
|
||||
- Let: [c, {FnCall: ['vreinterpret{type[2]}_f32_u64', [c]]}]
|
||||
- FnCall: ["vcmla{neon_type[0].rot180}", [a, b, c]]
|
||||
|
||||
- name: "vcmla{neon_type[0].rot180_laneq}"
|
||||
@@ -6385,19 +6127,16 @@ intrinsics:
|
||||
static_defs: ["const LANE: i32"]
|
||||
safety: safe
|
||||
types:
|
||||
- [float16x4_t, float16x8_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
|
||||
- [float16x8_t, float16x8_t,
|
||||
'[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'
|
||||
]
|
||||
- [float16x4_t, float16x8_t, '']
|
||||
- [float16x8_t, float16x8_t, 'q']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, 2]]
|
||||
- Let:
|
||||
- c
|
||||
- "{neon_type[0]}"
|
||||
- FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
|
||||
- Let: [c, {FnCall: [vreinterpretq_u32_f16, [c]]}]
|
||||
- Let: [c, {FnCall: ['vdup{type[2]}_laneq_u32', [c], [LANE]]}]
|
||||
- Let: [c, {FnCall: ['vreinterpret{type[2]}_f16_u32', [c]]}]
|
||||
- FnCall: ["vcmla{neon_type[0].rot180}", [a, b, c]]
|
||||
|
||||
- name: "vcmla{type[3]}"
|
||||
- name: "vcmla{neon_type[0].rot180_lane}"
|
||||
doc: Floating-point complex multiply accumulate
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
@@ -6409,17 +6148,16 @@ intrinsics:
|
||||
static_defs: ["const LANE: i32"]
|
||||
safety: safe
|
||||
types:
|
||||
- [float32x2_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]', '_rot180_lane_f32']
|
||||
- [float32x4_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]', 'q_rot180_lane_f32']
|
||||
- [float32x2_t, float32x2_t, '']
|
||||
- [float32x4_t, float32x2_t, 'q']
|
||||
compose:
|
||||
- FnCall: [static_assert!, ['LANE == 0']]
|
||||
- Let:
|
||||
- c
|
||||
- "{neon_type[0]}"
|
||||
- FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
|
||||
- Let: [c, {FnCall: [vreinterpret_u64_f32, [c]]}]
|
||||
- Let: [c, {FnCall: ['vdup{type[2]}_lane_u64', [c], [LANE]]}]
|
||||
- Let: [c, {FnCall: ['vreinterpret{type[2]}_f32_u64', [c]]}]
|
||||
- FnCall: ["vcmla{neon_type[0].rot180}", [a, b, c]]
|
||||
|
||||
- name: "vcmla{type[3]}"
|
||||
- name: "vcmla{neon_type[0].rot180_lane}"
|
||||
doc: Floating-point complex multiply accumulate
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
@@ -6433,16 +6171,13 @@ intrinsics:
|
||||
static_defs: ["const LANE: i32"]
|
||||
safety: safe
|
||||
types:
|
||||
- [float16x4_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]', '_rot180_lane_f16']
|
||||
- [float16x8_t, float16x4_t,
|
||||
'[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]', 'q_rot180_lane_f16'
|
||||
]
|
||||
- [float16x4_t, float16x4_t, '']
|
||||
- [float16x8_t, float16x4_t, 'q']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, 1]]
|
||||
- Let:
|
||||
- c
|
||||
- "{neon_type[0]}"
|
||||
- FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
|
||||
- Let: [c, {FnCall: [vreinterpret_u32_f16, [c]]}]
|
||||
- Let: [c, {FnCall: ['vdup{type[2]}_lane_u32', [c], [LANE]]}]
|
||||
- Let: [c, {FnCall: ['vreinterpret{type[2]}_f16_u32', [c]]}]
|
||||
- FnCall: ["vcmla{neon_type[0].rot180}", [a, b, c]]
|
||||
|
||||
- name: "vcmla{neon_type[0].rot270_laneq}"
|
||||
@@ -6457,14 +6192,13 @@ intrinsics:
|
||||
static_defs: ["const LANE: i32"]
|
||||
safety: safe
|
||||
types:
|
||||
- [float32x2_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]']
|
||||
- [float32x4_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
|
||||
- [float32x2_t, float32x4_t, '']
|
||||
- [float32x4_t, float32x4_t, 'q']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, 1]]
|
||||
- Let:
|
||||
- c
|
||||
- "{neon_type[0]}"
|
||||
- FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
|
||||
- Let: [c, {FnCall: [vreinterpretq_u64_f32, [c]]}]
|
||||
- Let: [c, {FnCall: ['vdup{type[2]}_laneq_u64', [c], [LANE]]}]
|
||||
- Let: [c, {FnCall: ['vreinterpret{type[2]}_f32_u64', [c]]}]
|
||||
- FnCall: ["vcmla{neon_type[0].rot270}", [a, b, c]]
|
||||
|
||||
- name: "vcmla{neon_type[0].rot270_laneq}"
|
||||
@@ -6481,14 +6215,13 @@ intrinsics:
|
||||
static_defs: ["const LANE: i32"]
|
||||
safety: safe
|
||||
types:
|
||||
- [float16x4_t, float16x8_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
|
||||
- [float16x8_t, float16x8_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
|
||||
- [float16x4_t, float16x8_t, '']
|
||||
- [float16x8_t, float16x8_t, 'q']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, 2]]
|
||||
- Let:
|
||||
- c
|
||||
- "{neon_type[0]}"
|
||||
- FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
|
||||
- Let: [c, {FnCall: [vreinterpretq_u32_f16, [c]]}]
|
||||
- Let: [c, {FnCall: ['vdup{type[2]}_laneq_u32', [c], [LANE]]}]
|
||||
- Let: [c, {FnCall: ['vreinterpret{type[2]}_f16_u32', [c]]}]
|
||||
- FnCall: ["vcmla{neon_type[0].rot270}", [a, b, c]]
|
||||
|
||||
- name: "vcmla{neon_type[0].lane_nox}"
|
||||
@@ -6503,14 +6236,13 @@ intrinsics:
|
||||
static_defs: ["const LANE: i32"]
|
||||
safety: safe
|
||||
types:
|
||||
- [float32x2_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]']
|
||||
- [float32x4_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
|
||||
- [float32x2_t, float32x2_t, '']
|
||||
- [float32x4_t, float32x2_t, 'q']
|
||||
compose:
|
||||
- FnCall: [static_assert!, ['LANE == 0']]
|
||||
- Let:
|
||||
- c
|
||||
- "{neon_type[0]}"
|
||||
- FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
|
||||
- Let: [c, {FnCall: [vreinterpret_u64_f32, [c]]}]
|
||||
- Let: [c, {FnCall: ['vdup{type[2]}_lane_u64', [c], [LANE]]}]
|
||||
- Let: [c, {FnCall: ['vreinterpret{type[2]}_f32_u64', [c]]}]
|
||||
- FnCall: ["vcmla{neon_type[0].no}", [a, b, c]]
|
||||
|
||||
|
||||
@@ -6528,14 +6260,13 @@ intrinsics:
|
||||
static_defs: ["const LANE: i32"]
|
||||
safety: safe
|
||||
types:
|
||||
- [float16x4_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
|
||||
- [float16x8_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
|
||||
- [float16x4_t, float16x4_t, '']
|
||||
- [float16x8_t, float16x4_t, 'q']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, 1]]
|
||||
- Let:
|
||||
- c
|
||||
- "{neon_type[0]}"
|
||||
- FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
|
||||
- Let: [c, {FnCall: [vreinterpret_u32_f16, [c]]}]
|
||||
- Let: [c, {FnCall: ['vdup{type[2]}_lane_u32', [c], [LANE]]}]
|
||||
- Let: [c, {FnCall: ['vreinterpret{type[2]}_f16_u32', [c]]}]
|
||||
- FnCall: ["vcmla{neon_type[0].no}", [a, b, c]]
|
||||
|
||||
- name: "vcmla{neon_type[0].rot270_lane}"
|
||||
@@ -6550,11 +6281,13 @@ intrinsics:
|
||||
static_defs: ["const LANE: i32"]
|
||||
safety: safe
|
||||
types:
|
||||
- [float32x2_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]']
|
||||
- [float32x4_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
|
||||
- [float32x2_t, float32x2_t, '']
|
||||
- [float32x4_t, float32x2_t, 'q']
|
||||
compose:
|
||||
- FnCall: [static_assert!, ['LANE == 0']]
|
||||
- Let: [c, "{neon_type[0]}", {FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]}]
|
||||
- Let: [c, {FnCall: [vreinterpret_u64_f32, [c]]}]
|
||||
- Let: [c, {FnCall: ['vdup{type[2]}_lane_u64', [c], [LANE]]}]
|
||||
- Let: [c, {FnCall: ['vreinterpret{type[2]}_f32_u64', [c]]}]
|
||||
- FnCall: ["vcmla{neon_type[0].rot270}", [a, b, c]]
|
||||
|
||||
- name: "vcmla{neon_type[0].rot270_lane}"
|
||||
@@ -6571,11 +6304,13 @@ intrinsics:
|
||||
static_defs: ["const LANE: i32"]
|
||||
safety: safe
|
||||
types:
|
||||
- [float16x4_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
|
||||
- [float16x8_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
|
||||
- [float16x4_t, float16x4_t, '']
|
||||
- [float16x8_t, float16x4_t, 'q']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, 1]]
|
||||
- Let: [c, "{neon_type[0]}", {FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]}]
|
||||
- Let: [c, {FnCall: [vreinterpret_u32_f16, [c]]}]
|
||||
- Let: [c, {FnCall: ['vdup{type[2]}_lane_u32', [c], [LANE]]}]
|
||||
- Let: [c, {FnCall: ['vreinterpret{type[2]}_f16_u32', [c]]}]
|
||||
- FnCall: ["vcmla{neon_type[0].rot270}", [a, b, c]]
|
||||
|
||||
- name: "vmax{neon_type.no}"
|
||||
@@ -6869,7 +6604,7 @@ intrinsics:
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fminnmp]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [float32x2_t, "f32"]
|
||||
@@ -6887,7 +6622,7 @@ intrinsics:
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fminnmv]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [float32x4_t, "f32"]
|
||||
@@ -6902,36 +6637,36 @@ intrinsics:
|
||||
doc: Vector move
|
||||
arguments: ["a: {neon_type[0]}"]
|
||||
return_type: "{neon_type[1]}"
|
||||
attr: [*neon-stable]
|
||||
assert_instr: [sxtl2]
|
||||
attr:
|
||||
- *neon-stable
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sxtl2]]}]]
|
||||
safety: safe
|
||||
types:
|
||||
- [int8x16_t, int16x8_t, int8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]']
|
||||
- [int16x8_t, int32x4_t, int16x4_t, '[4, 5, 6, 7]']
|
||||
- [int32x4_t, int64x2_t, int32x2_t, '[2, 3]']
|
||||
- [int8x16_t, int16x8_t]
|
||||
- [int16x8_t, int32x4_t]
|
||||
- [int32x4_t, int64x2_t]
|
||||
compose:
|
||||
- Let:
|
||||
- a
|
||||
- "{neon_type[2]}"
|
||||
- FnCall: [simd_shuffle!, [a, a, "{type[3]}"]]
|
||||
- FnCall: ['vget_high_{neon_type[0]}', [a]]
|
||||
- FnCall: ["vmovl{neon_type[0].noq}", [a]]
|
||||
|
||||
- name: "vmovl_high{neon_type[0].noq}"
|
||||
doc: Vector move
|
||||
arguments: ["a: {neon_type[0]}"]
|
||||
return_type: "{neon_type[1]}"
|
||||
attr: [*neon-stable]
|
||||
assert_instr: [uxtl2]
|
||||
attr:
|
||||
- *neon-stable
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [uxtl2]]}]]
|
||||
safety: safe
|
||||
types:
|
||||
- [uint8x16_t, uint16x8_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]']
|
||||
- [uint16x8_t, uint32x4_t, uint16x4_t, '[4, 5, 6, 7]']
|
||||
- [uint32x4_t, uint64x2_t, uint32x2_t, '[2, 3]']
|
||||
- [uint8x16_t, uint16x8_t]
|
||||
- [uint16x8_t, uint32x4_t]
|
||||
- [uint32x4_t, uint64x2_t]
|
||||
compose:
|
||||
- Let:
|
||||
- a
|
||||
- "{neon_type[2]}"
|
||||
- FnCall: [simd_shuffle!, [a, a, "{type[3]}"]]
|
||||
- FnCall: ['vget_high_{neon_type[0]}', [a]]
|
||||
- FnCall: ["vmovl{neon_type[0].noq}", [a]]
|
||||
|
||||
- name: "vpadd{neon_type[0].no}"
|
||||
@@ -6941,6 +6676,7 @@ intrinsics:
|
||||
attr: [*neon-stable]
|
||||
assert_instr: [faddp]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [float32x4_t, "4"]
|
||||
- [float64x2_t, "2"]
|
||||
@@ -6963,6 +6699,7 @@ intrinsics:
|
||||
- *target-not-arm64ec
|
||||
assert_instr: [faddp]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [float16x8_t, "8"]
|
||||
compose:
|
||||
@@ -6984,6 +6721,7 @@ intrinsics:
|
||||
- *target-not-arm64ec
|
||||
assert_instr: [fmaxp]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- float16x4_t
|
||||
- float16x8_t
|
||||
@@ -7005,6 +6743,7 @@ intrinsics:
|
||||
- *target-not-arm64ec
|
||||
assert_instr: [fmaxnmp]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- float16x4_t
|
||||
- float16x8_t
|
||||
@@ -7026,6 +6765,7 @@ intrinsics:
|
||||
- *target-not-arm64ec
|
||||
assert_instr: [fminp]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- float16x4_t
|
||||
- float16x8_t
|
||||
@@ -7047,6 +6787,7 @@ intrinsics:
|
||||
- *target-not-arm64ec
|
||||
assert_instr: [fminnmp]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- float16x4_t
|
||||
- float16x8_t
|
||||
@@ -7072,11 +6813,11 @@ intrinsics:
|
||||
- Let:
|
||||
- a1
|
||||
- "{type[2]}"
|
||||
- FnCall: [simd_extract!, [a, '0']]
|
||||
- FnCall: ['vget{neon_type[1].lane_nox}', [a], [0]]
|
||||
- Let:
|
||||
- a2
|
||||
- "{type[2]}"
|
||||
- FnCall: [simd_extract!, [a, '1']]
|
||||
- FnCall: ['vget{neon_type[1].lane_nox}', [a], [1]]
|
||||
- Identifier: ['a1 + a2', Symbol]
|
||||
|
||||
- name: "vpmin{type[0]}"
|
||||
@@ -7086,6 +6827,7 @@ intrinsics:
|
||||
attr: [*neon-stable]
|
||||
assert_instr: [fminp]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- ["s_f32", float32x2_t, f32]
|
||||
- ["qd_f64", float64x2_t, f64]
|
||||
@@ -7102,14 +6844,14 @@ intrinsics:
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["i16", "i32"]
|
||||
compose:
|
||||
- Let: [a, int16x4_t, {FnCall: [vdup_n_s16, [a]]}]
|
||||
- Let: [b, int16x4_t, {FnCall: [vdup_n_s16, [b]]}]
|
||||
- FnCall: [simd_extract!, [{FnCall: [vqdmull_s16, [a, b]]}, '0']]
|
||||
- FnCall: ['vgetq_lane_{type[1]}', [{FnCall: [vqdmull_s16, [a, b]]}], ['0']]
|
||||
|
||||
- name: "vqdmulls_s32"
|
||||
doc: "Signed saturating doubling multiply long"
|
||||
@@ -7117,7 +6859,7 @@ intrinsics:
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["i32", "i64"]
|
||||
@@ -7133,15 +6875,15 @@ intrinsics:
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull2]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmull2]]}]]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [int16x8_t, int32x4_t, int16x4_t, '[4, 5, 6, 7]']
|
||||
- [int32x4_t, int64x2_t, int32x2_t, '[2, 3]']
|
||||
- [int16x8_t, int32x4_t]
|
||||
- [int32x4_t, int64x2_t]
|
||||
compose:
|
||||
- Let: [a, "{neon_type[2]}", {FnCall: [simd_shuffle!, [a, a, '{type[3]}']]}]
|
||||
- Let: [b, "{neon_type[2]}", {FnCall: [simd_shuffle!, [b, b, '{type[3]}']]}]
|
||||
- Let: [a, {FnCall: ['vget_high_{neon_type[0]}', [a]]}]
|
||||
- Let: [b, {FnCall: ['vget_high_{neon_type[0]}', [b]]}]
|
||||
- FnCall: ["vqdmull{neon_type[0].noq}", [a, b]]
|
||||
|
||||
- name: "vqdmull_high_n_{type[1]}"
|
||||
@@ -7149,15 +6891,15 @@ intrinsics:
|
||||
arguments: ["a: {neon_type[0]}", "b: {type[1]}"]
|
||||
return_type: "{neon_type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull2]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmull2]]}]]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [int16x8_t, "i16", int32x4_t, int16x4_t, '[4, 5, 6, 7]']
|
||||
- [int32x4_t, "i32", int64x2_t, int32x2_t, '[2, 3]']
|
||||
- [int16x8_t, "i16", int32x4_t]
|
||||
- [int32x4_t, "i32", int64x2_t]
|
||||
compose:
|
||||
- Let: [a, "{neon_type[3]}", {FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]}]
|
||||
- Let: [b, "{neon_type[3]}", {FnCall: ["vdup_n{neon_type[0].noq}", [b]]}]
|
||||
- Let: [a, {FnCall: ['vget_high_{neon_type[0]}', [a]]}]
|
||||
- Let: [b, {FnCall: ["vdup_n{neon_type[0].noq}", [b]]}]
|
||||
- FnCall: ["vqdmull{neon_type[0].noq}", [a, b]]
|
||||
|
||||
- name: "vqdmull{type[3]}"
|
||||
@@ -7167,7 +6909,7 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull, 'N = 2']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['2']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -7175,7 +6917,7 @@ intrinsics:
|
||||
- ["i32", int32x4_t, "i64", 's_laneq_s32', 's_s32']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [N, 2]]
|
||||
- Let: [b, "{type[0]}", {FnCall: [simd_extract!, [b, 'N as u32']]}]
|
||||
- Let: [b, "{type[0]}", {FnCall: ['vget{neon_type[1].lane_nox}', [b], [N]]}]
|
||||
- FnCall: ["vqdmull{type[4]}", [a, b]]
|
||||
|
||||
- name: "vqdmullh_laneq_s16"
|
||||
@@ -7185,14 +6927,14 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull, N = 4]]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['2']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- ["i16", int16x8_t, "i32"]
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [N, 3]]
|
||||
- Let: [b, "{type[0]}", {FnCall: [simd_extract!, [b, 'N as u32']]}]
|
||||
- Let: [b, "{type[0]}", {FnCall: ['vget{neon_type[1].lane_nox}', [b], [N]]}]
|
||||
- FnCall: ["vqdmullh_s16", [a, b]]
|
||||
|
||||
- name: "vqdmulls_lane_s32"
|
||||
@@ -7202,33 +6944,33 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull, 'N = 1']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['2']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- ["i32", int32x2_t, "i64"]
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [N, 1]]
|
||||
- Let: [b, "{type[0]}", {FnCall: [simd_extract!, [b, 'N as u32']]}]
|
||||
- Let: [b, "{type[0]}", {FnCall: ['vget{neon_type[1].lane_nox}', [b], [N]]}]
|
||||
- FnCall: ["vqdmulls_s32", [a, b]]
|
||||
|
||||
- name: "vqdmull{type[6]}"
|
||||
- name: "vqdmull{type[3]}"
|
||||
doc: "Signed saturating doubling multiply long"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
|
||||
return_type: "{neon_type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull2, 'N = 2']]}]]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmull2, 'N = 2']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['2']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [int16x8_t, int16x4_t, int32x4_t, int16x4_t, '[4, 5, 6, 7]', '[N as u32, N as u32, N as u32, N as u32]', '_high_lane_s16']
|
||||
- [int32x4_t, int32x4_t, int64x2_t, int32x2_t, '[2, 3]', '[N as u32, N as u32]', '_high_laneq_s32']
|
||||
- [int16x8_t, int16x4_t, int32x4_t, '_high_lane_s16']
|
||||
- [int32x4_t, int32x4_t, int64x2_t, '_high_laneq_s32']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [N, '2']]
|
||||
- Let: [a, "{neon_type[3]}", {FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]}]
|
||||
- Let: [b, "{neon_type[3]}", {FnCall: [simd_shuffle!, [b, b, "{type[5]}"]]}]
|
||||
- Let: [a, {FnCall: ['vget_high_{neon_type[0]}', [a]]}]
|
||||
- Let: [b, {FnCall: ['vdup_lane{neon_type[1].nox}', [b], [N]]}]
|
||||
- FnCall: ["vqdmull{neon_type[0].noq}", [a, b]]
|
||||
|
||||
- name: "vqdmull_high_lane_s32"
|
||||
@@ -7236,17 +6978,17 @@ intrinsics:
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
|
||||
return_type: "{neon_type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull2, 'N = 1']]}]]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmull2, 'N = 1']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['2']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [int32x4_t, int32x2_t, int64x2_t, int32x2_t, '[2, 3]', '[N as u32, N as u32]']
|
||||
- [int32x4_t, int32x2_t, int64x2_t]
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [N, '1']]
|
||||
- Let: [a, "{neon_type[3]}", {FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]}]
|
||||
- Let: [b, "{neon_type[3]}", {FnCall: [simd_shuffle!, [b, b, "{type[5]}"]]}]
|
||||
- Let: [a, {FnCall: ['vget_high_{neon_type[0]}', [a]]}]
|
||||
- Let: [b, {FnCall: ['vdup_lane{neon_type[1].nox}', [b], [N]]}]
|
||||
- FnCall: ["vqdmull{neon_type[0].noq}", [a, b]]
|
||||
|
||||
- name: "vqdmull_high_laneq_s16"
|
||||
@@ -7254,17 +6996,17 @@ intrinsics:
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
|
||||
return_type: "{neon_type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull2, N = 4]]}]]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmull2, N = 4]]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['2']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [int16x8_t, int16x8_t, int32x4_t, int16x4_t, '[4, 5, 6, 7]', '[N as u32, N as u32, N as u32, N as u32]']
|
||||
- [int16x8_t, int16x8_t, int32x4_t]
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [N, '3']]
|
||||
- Let: [a, "{neon_type[3]}", {FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]}]
|
||||
- Let: [b, "{neon_type[3]}", {FnCall: [simd_shuffle!, [b, b, "{type[5]}"]]}]
|
||||
- Let: [a, {FnCall: ['vget_high_{neon_type[0]}', [a]]}]
|
||||
- Let: [b, {FnCall: ['vdup_lane{neon_type[1].nox}', [b], [N]]}]
|
||||
- FnCall: ["vqdmull{neon_type[0].noq}", [a, b]]
|
||||
|
||||
- name: "vqdmull_laneq_s16"
|
||||
@@ -7274,14 +7016,14 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull, 'N = 4']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['2']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [int16x4_t, int16x8_t, int32x4_t, '[N as u32, N as u32, N as u32, N as u32]']
|
||||
- [int16x4_t, int16x8_t, int32x4_t]
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [N, '3']]
|
||||
- Let: [b, "{neon_type[0]}", {FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]}]
|
||||
- Let: [b, {FnCall: ['vdup_lane{neon_type[1].nox}', [b], [N]]}]
|
||||
- FnCall: [vqdmull_s16, [a, b]]
|
||||
|
||||
- name: "vqdmull_laneq_s32"
|
||||
@@ -7291,14 +7033,14 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull, 'N = 2']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['2']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [int32x2_t, int32x4_t, int64x2_t, '[N as u32, N as u32]']
|
||||
- [int32x2_t, int32x4_t, int64x2_t]
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [N, '2']]
|
||||
- Let: [b, "{neon_type[0]}", {FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]}]
|
||||
- Let: [b, {FnCall: ['vdup_lane{neon_type[1].nox}', [b], [N]]}]
|
||||
- FnCall: [vqdmull_s32, [a, b]]
|
||||
|
||||
- name: "vqdmlal{type[4]}"
|
||||
@@ -7306,8 +7048,8 @@ intrinsics:
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal2]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmlal2]]}]]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [int32x4_t, int16x8_t, int16x8_t, int32x4_t, _high_s16]
|
||||
@@ -7322,9 +7064,9 @@ intrinsics:
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal2, 'N = 1']]}]]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmlal2, 'N = 1']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['3']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -7341,14 +7083,14 @@ intrinsics:
|
||||
arguments: ["a: {type[0]}", "b: {type[1]}", "c: {type[1]}"]
|
||||
return_type: "{type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmlal]]}]]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["i32", "i16", "s16"]
|
||||
compose:
|
||||
- Let: [x, int32x4_t, {FnCall: [vqdmull_s16, [{FnCall: [vdup_n_s16, [b]]}, {FnCall: [vdup_n_s16, [c]]}]]}]
|
||||
- FnCall: [vqadds_s32, [a, {FnCall: [simd_extract!, [x, 0]]}]]
|
||||
- FnCall: [vqadds_s32, [a, {FnCall: ['vgetq_lane_s32', [x], [0]]}]]
|
||||
|
||||
- name: "vqdmlals_s32"
|
||||
doc: "Signed saturating doubling multiply-add long"
|
||||
@@ -7356,7 +7098,7 @@ intrinsics:
|
||||
return_type: "{type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["i64", "i32", "i32", "i64"]
|
||||
@@ -7369,9 +7111,9 @@ intrinsics:
|
||||
arguments: ["a: {type[0]}", "b: {type[1]}", "c: {neon_type[2]}"]
|
||||
return_type: "{type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal, 'LANE = 0']]}]]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmlal, 'LANE = 0']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['3']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -7381,16 +7123,16 @@ intrinsics:
|
||||
- ["i64", "i32", int32x4_t, "i64", s_laneq_s32, '2', s_s32]
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, "{type[5]}"]]
|
||||
- FnCall: ["vqdmlal{type[6]}", [a, b, {FnCall: [simd_extract!, [c, 'LANE as u32']]}]]
|
||||
- FnCall: ["vqdmlal{type[6]}", [a, b, {FnCall: ['vget{neon_type[2].lane_nox}', [c], [LANE]]}]]
|
||||
|
||||
- name: "vqdmlal_laneq_s16"
|
||||
doc: "Vector widening saturating doubling multiply accumulate with scalar"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal, 'N = 2']]}]]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmlal, 'N = 2']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['3']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -7404,9 +7146,9 @@ intrinsics:
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal, 'N = 1']]}]]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmlal, 'N = 1']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['3']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -7420,8 +7162,8 @@ intrinsics:
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl2]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmlsl2]]}]]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [int32x4_t, int16x8_t, int16x8_t, int32x4_t, _high_s16]
|
||||
@@ -7436,9 +7178,9 @@ intrinsics:
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl2, 'N = 1']]}]]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmlsl2, 'N = 1']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['3']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -7455,14 +7197,14 @@ intrinsics:
|
||||
arguments: ["a: {type[0]}", "b: {type[1]}", "c: {type[1]}"]
|
||||
return_type: "{type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmlsl]]}]]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["i32", "i16"]
|
||||
compose:
|
||||
- Let: [x, int32x4_t, {FnCall: [vqdmull_s16, [{FnCall: [vdup_n_s16, [b]]}, {FnCall: [vdup_n_s16, [c]]}]]}]
|
||||
- FnCall: [vqsubs_s32, [a, {FnCall: [simd_extract!, [x, '0']]}]]
|
||||
- FnCall: [vqsubs_s32, [a, {FnCall: ['vgetq_lane_s32', [x], [0]]}]]
|
||||
|
||||
- name: "vqdmlsls_s32"
|
||||
doc: "Signed saturating doubling multiply-subtract long"
|
||||
@@ -7470,7 +7212,7 @@ intrinsics:
|
||||
return_type: "{type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["i64", "i32", "i32", "i64"]
|
||||
@@ -7483,9 +7225,9 @@ intrinsics:
|
||||
arguments: ["a: {type[0]}", "b: {type[1]}", "c: {neon_type[2]}"]
|
||||
return_type: "{type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl, 'LANE = 0']]}]]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmlsl, 'LANE = 0']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['3']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -7495,16 +7237,16 @@ intrinsics:
|
||||
- ["i64", "i32", int32x4_t, "i64", 's_laneq_s32', '2', 's_s32']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, "{type[5]}"]]
|
||||
- FnCall: ["vqdmlsl{type[6]}", [a, b, {FnCall: [simd_extract!, [c, 'LANE as u32']]}]]
|
||||
- FnCall: ["vqdmlsl{type[6]}", [a, b, {FnCall: ['vget{neon_type[2].lane_nox}', [c], [LANE]]}]]
|
||||
|
||||
- name: "vqdmlsl_laneq_s16"
|
||||
doc: "Vector widening saturating doubling multiply subtract with scalar"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl, 'N = 2']]}]]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmlsl, 'N = 2']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['3']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -7518,9 +7260,9 @@ intrinsics:
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl, 'N = 1']]}]]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmlsl, 'N = 1']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['3']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -7535,7 +7277,7 @@ intrinsics:
|
||||
return_type: "{type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmulh]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["i16", "i16", "i16", int16x4_t, 'h_s16']
|
||||
@@ -7543,7 +7285,7 @@ intrinsics:
|
||||
compose:
|
||||
- Let: [a, "{neon_type[3]}", {FnCall: ["vdup_n{neon_type[3].no}", [a]]}]
|
||||
- Let: [b, "{neon_type[3]}", {FnCall: ["vdup_n{neon_type[3].no}", [b]]}]
|
||||
- FnCall: [simd_extract!, [{FnCall: ["vqdmulh{neon_type[3].no}", [a, b]]}, '0']]
|
||||
- FnCall: ['vget{neon_type[3].lane_nox}', [{FnCall: ["vqdmulh{neon_type[3].no}", [a, b]]}], ['0']]
|
||||
|
||||
- name: "vqdmulhh{type[3]}"
|
||||
doc: "Signed saturating doubling multiply returning high half"
|
||||
@@ -7552,7 +7294,7 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmulh, 'N = 2']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['2']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -7560,7 +7302,7 @@ intrinsics:
|
||||
- ["i16", int16x8_t, "i16", '_laneq_s16', '3']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [N, "{type[4]}"]]
|
||||
- Let: [b, 'i16', {FnCall: [simd_extract!, [b, 'N as u32']]}]
|
||||
- Let: [b, 'i16', {FnCall: ['vget{neon_type[1].lane_nox}', [b], [N]]}]
|
||||
- FnCall: ['vqdmulhh_s16', [a, b]]
|
||||
|
||||
- name: "vqdmulhs{type[3]}"
|
||||
@@ -7570,7 +7312,7 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmulh, 'N = 1']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['2']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -7578,7 +7320,7 @@ intrinsics:
|
||||
- ["i32", int32x4_t, "i32", "_laneq_s32", '2']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [N, "{type[4]}"]]
|
||||
- Let: [b, 'i32', {FnCall: [simd_extract!, [b, 'N as u32']]}]
|
||||
- Let: [b, 'i32', {FnCall: ['vget{neon_type[1].lane_nox}', [b], [N]]}]
|
||||
- FnCall: ['vqdmulhs_s32', [a, b]]
|
||||
|
||||
- name: "vqmovn_high{neon_type[1].noq}"
|
||||
@@ -7586,30 +7328,30 @@ intrinsics:
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
|
||||
return_type: "{neon_type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqxtn2]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqxtn2]]}]]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [int8x8_t, int16x8_t, int8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
|
||||
- [int16x4_t, int32x4_t, int16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]']
|
||||
- [int32x2_t, int64x2_t, int32x4_t, '[0, 1, 2, 3]']
|
||||
- [int8x8_t, int16x8_t, int8x16_t]
|
||||
- [int16x4_t, int32x4_t, int16x8_t]
|
||||
- [int32x2_t, int64x2_t, int32x4_t]
|
||||
compose:
|
||||
- FnCall: [simd_shuffle!, [a, {FnCall: ["vqmovn{neon_type[1].noq}", [b]]}, "{type[3]}"]]
|
||||
- FnCall: ['vcombine_{neon_type[0]}', [a, {FnCall: ["vqmovn{neon_type[1].noq}", [b]]}]]
|
||||
|
||||
- name: "vqmovn_high{neon_type[1].noq}"
|
||||
doc: "Signed saturating extract narrow"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
|
||||
return_type: "{neon_type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqxtn2]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [uqxtn2]]}]]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [uint8x8_t, uint16x8_t, uint8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
|
||||
- [uint16x4_t, uint32x4_t, uint16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]']
|
||||
- [uint32x2_t, uint64x2_t, uint32x4_t, '[0, 1, 2, 3]']
|
||||
- [uint8x8_t, uint16x8_t, uint8x16_t]
|
||||
- [uint16x4_t, uint32x4_t, uint16x8_t]
|
||||
- [uint32x2_t, uint64x2_t, uint32x4_t]
|
||||
compose:
|
||||
- FnCall: [simd_shuffle!, [a, {FnCall: ["vqmovn{neon_type[1].noq}", [b]]}, "{type[3]}"]]
|
||||
- FnCall: ['vcombine_{neon_type[0]}', [a, {FnCall: ["vqmovn{neon_type[1].noq}", [b]]}]]
|
||||
|
||||
- name: "vqmovn{type[2]}"
|
||||
doc: "Saturating extract narrow"
|
||||
@@ -7617,13 +7359,13 @@ intrinsics:
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqxtn]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["i16", "i8", 'h_s16', s16]
|
||||
- ["i32", "i16", 's_s32', s32]
|
||||
compose:
|
||||
- FnCall: [simd_extract!, [{FnCall: ["vqmovn_{type[3]}", [{FnCall: ["vdupq_n_{type[3]}", [a]]}]]}, '0']]
|
||||
- FnCall: ['vget_lane_{type[1]}', [{FnCall: ["vqmovn_{type[3]}", [{FnCall: ["vdupq_n_{type[3]}", [a]]}]]}], ['0']]
|
||||
|
||||
- name: "vqmovn{type[2]}"
|
||||
doc: "Saturating extract narrow"
|
||||
@@ -7631,13 +7373,13 @@ intrinsics:
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqxtn]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["u16", "u8", 'h_u16', 'u16']
|
||||
- ["u32", "u16", 's_u32', 'u32']
|
||||
compose:
|
||||
- FnCall: [simd_extract!, [{FnCall: ["vqmovn_{type[3]}", [{FnCall: ["vdupq_n_{type[3]}", [a]]}]]}, '0']]
|
||||
- FnCall: ['vget_lane_{type[1]}', [{FnCall: ["vqmovn_{type[3]}", [{FnCall: ["vdupq_n_{type[3]}", [a]]}]]}], ['0']]
|
||||
|
||||
- name: "vqmovnd_s64"
|
||||
doc: "Saturating extract narrow"
|
||||
@@ -7645,7 +7387,7 @@ intrinsics:
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqxtn]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["i64", "i32"]
|
||||
@@ -7662,7 +7404,7 @@ intrinsics:
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqxtn]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["u64", "u32"]
|
||||
@@ -7679,29 +7421,29 @@ intrinsics:
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqxtun]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["i16", "u8", 'h_s16', s16]
|
||||
- ["i32", "u16", 's_s32', s32]
|
||||
- ["i64", "u32", 'd_s64', s64]
|
||||
compose:
|
||||
- FnCall: [simd_extract!, [{FnCall: ["vqmovun_{type[3]}", [{FnCall: ["vdupq_n_{type[3]}", [a]]}]]}, '0']]
|
||||
- FnCall: ['vget_lane_{type[1]}', [{FnCall: ["vqmovun_{type[3]}", [{FnCall: ["vdupq_n_{type[3]}", [a]]}]]}], ['0']]
|
||||
|
||||
- name: "vqmovun_high_{neon_type[1]}"
|
||||
doc: "Signed saturating extract unsigned narrow"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
|
||||
return_type: "{neon_type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqxtun2]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqxtun2]]}]]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [uint8x8_t, int16x8_t, uint8x16_t, s16, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
|
||||
- [uint16x4_t, int32x4_t, uint16x8_t, s32, '[0, 1, 2, 3, 4, 5, 6, 7]']
|
||||
- [uint32x2_t, int64x2_t, uint32x4_t, s64, '[0, 1, 2, 3]']
|
||||
- [uint8x8_t, int16x8_t, uint8x16_t, s16]
|
||||
- [uint16x4_t, int32x4_t, uint16x8_t, s32]
|
||||
- [uint32x2_t, int64x2_t, uint32x4_t, s64]
|
||||
compose:
|
||||
- FnCall: [simd_shuffle!, [a, {FnCall: ["vqmovun_{type[3]}", [b]]}, "{type[4]}"]]
|
||||
- FnCall: ['vcombine_{neon_type[0]}', [a, {FnCall: ["vqmovun_{type[3]}", [b]]}]]
|
||||
|
||||
- name: "vqrdmulh{type[1]}"
|
||||
doc: "Signed saturating rounding doubling multiply returning high half"
|
||||
@@ -7709,13 +7451,13 @@ intrinsics:
|
||||
return_type: "{type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrdmulh]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["i16", 'h_s16', 's16']
|
||||
- ["i32", 's_s32', 's32']
|
||||
compose:
|
||||
- FnCall: [simd_extract!, [{FnCall: ["vqrdmulh_{type[2]}", [{FnCall: ["vdup_n_{type[2]}", [a]]}, {FnCall: ["vdup_n_{type[2]}", [b]]}]]}, '0']]
|
||||
- FnCall: ['vget_lane_{type[2]}', [{FnCall: ["vqrdmulh_{type[2]}", [{FnCall: ["vdup_n_{type[2]}", [a]]}, {FnCall: ["vdup_n_{type[2]}", [b]]}]]}], ['0']]
|
||||
|
||||
- name: "vqrdmulh{type[2]}"
|
||||
doc: "Signed saturating rounding doubling multiply returning high half"
|
||||
@@ -7724,7 +7466,7 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrdmulh, LANE = 1]]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['2']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -7734,7 +7476,7 @@ intrinsics:
|
||||
- ["i32", int32x4_t, 's_laneq_s32', 's_s32', '2']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, "{type[4]}"]]
|
||||
- FnCall: ["vqrdmulh{type[3]}", [a, {FnCall: [simd_extract!, [b, 'LANE as u32']]}]]
|
||||
- FnCall: ["vqrdmulh{type[3]}", [a, {FnCall: ['vget{neon_type[1].lane_nox}', [b], [LANE]]}]]
|
||||
|
||||
- name: "vqrdmlah{neon_type.no}"
|
||||
doc: "Signed saturating rounding doubling multiply accumulate returning high half"
|
||||
@@ -7773,7 +7515,7 @@ intrinsics:
|
||||
- Let: [a, "{neon_type[1]}", {FnCall: ["vdup_n_{type[2]}", [a]]}]
|
||||
- Let: [b, "{neon_type[1]}", {FnCall: ["vdup_n_{type[2]}", [b]]}]
|
||||
- Let: [c, "{neon_type[1]}", {FnCall: ["vdup_n_{type[2]}", [c]]}]
|
||||
- FnCall: [simd_extract!, [{FnCall: ["vqrdmlah_{type[2]}", [a, b, c]]}, '0']]
|
||||
- FnCall: ['vget_lane_{type[2]}', [{FnCall: ["vqrdmlah_{type[2]}", [a, b, c]]}], ['0']]
|
||||
|
||||
- name: "vqrdmlah{type[0]}"
|
||||
doc: "Signed saturating rounding doubling multiply accumulate returning high half"
|
||||
@@ -7787,17 +7529,17 @@ intrinsics:
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [_lane_s16, int16x4_t, int16x4_t, int16x4_t, '2', '[LANE as u32; 4]']
|
||||
- [_laneq_s16, int16x4_t, int16x4_t, int16x8_t, '3', '[LANE as u32; 4]']
|
||||
- [q_lane_s16, int16x8_t, int16x8_t, int16x4_t, '2', '[LANE as u32; 8]']
|
||||
- [q_laneq_s16, int16x8_t, int16x8_t, int16x8_t, '3', '[LANE as u32; 8]']
|
||||
- [_lane_s32, int32x2_t, int32x2_t, int32x2_t, '1', '[LANE as u32; 2]']
|
||||
- [_laneq_s32, int32x2_t, int32x2_t, int32x4_t, '2', '[LANE as u32; 2]']
|
||||
- [q_lane_s32, int32x4_t, int32x4_t, int32x2_t, '1', '[LANE as u32; 4]']
|
||||
- [q_laneq_s32, int32x4_t, int32x4_t, int32x4_t, '2', '[LANE as u32; 4]']
|
||||
- [_lane_s16, int16x4_t, int16x4_t, int16x4_t, '2']
|
||||
- [_laneq_s16, int16x4_t, int16x4_t, int16x8_t, '3']
|
||||
- [q_lane_s16, int16x8_t, int16x8_t, int16x4_t, '2']
|
||||
- [q_laneq_s16, int16x8_t, int16x8_t, int16x8_t, '3']
|
||||
- [_lane_s32, int32x2_t, int32x2_t, int32x2_t, '1']
|
||||
- [_laneq_s32, int32x2_t, int32x2_t, int32x4_t, '2']
|
||||
- [q_lane_s32, int32x4_t, int32x4_t, int32x2_t, '1']
|
||||
- [q_laneq_s32, int32x4_t, int32x4_t, int32x4_t, '2']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, '{type[4]}']]
|
||||
- Let: [c, "{type[1]}", {FnCall: [simd_shuffle!, [c, c, "{type[5]}"]]}]
|
||||
- Let: [c, {FnCall: ['vdup{type[0]}', [c], [LANE]]}]
|
||||
- FnCall: ["vqrdmlah{neon_type[2].no}", [a, b, c]]
|
||||
|
||||
- name: "vqrdmlah{type[4]}"
|
||||
@@ -7818,7 +7560,7 @@ intrinsics:
|
||||
- ["i32", int32x4_t, '2', "s_s32", s_laneq_s32, s_s32]
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]]
|
||||
- FnCall: ["vqrdmlah{type[5]}", [a, b, {FnCall: [simd_extract!, [c, 'LANE as u32']]}]]
|
||||
- FnCall: ["vqrdmlah{type[5]}", [a, b, {FnCall: ['vget{neon_type[1].lane_nox}', [c], [LANE]]}]]
|
||||
|
||||
- name: "vqrdmlsh{neon_type.no}"
|
||||
doc: "Signed saturating rounding doubling multiply subtract returning high half"
|
||||
@@ -7857,7 +7599,7 @@ intrinsics:
|
||||
- Let: [a, "{neon_type[2]}", {FnCall: ["vdup_n_{type[3]}", [a]]}]
|
||||
- Let: [b, "{neon_type[2]}", {FnCall: ["vdup_n_{type[3]}", [b]]}]
|
||||
- Let: [c, "{neon_type[2]}", {FnCall: ["vdup_n_{type[3]}", [c]]}]
|
||||
- FnCall: [simd_extract!, [{FnCall: ["vqrdmlsh_{type[3]}", [a, b, c]]}, '0']]
|
||||
- FnCall: ['vget{neon_type[2].lane_nox}', [{FnCall: ["vqrdmlsh_{type[3]}", [a, b, c]]}], ['0']]
|
||||
|
||||
- name: "vqrdmlsh{type[0]}"
|
||||
doc: "Signed saturating rounding doubling multiply subtract returning high half"
|
||||
@@ -7871,17 +7613,17 @@ intrinsics:
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [_lane_s16, int16x4_t, int16x4_t, int16x4_t, '2', '[LANE as u32; 4]']
|
||||
- [_laneq_s16, int16x4_t, int16x4_t, int16x8_t, '3', '[LANE as u32; 4]']
|
||||
- [q_lane_s16, int16x8_t, int16x8_t, int16x4_t, '2', '[LANE as u32; 8]']
|
||||
- [q_laneq_s16, int16x8_t, int16x8_t, int16x8_t, '3', '[LANE as u32; 8]']
|
||||
- [_lane_s32, int32x2_t, int32x2_t, int32x2_t, '1', '[LANE as u32; 2]']
|
||||
- [_laneq_s32, int32x2_t, int32x2_t, int32x4_t, '2', '[LANE as u32; 2]']
|
||||
- [q_lane_s32, int32x4_t, int32x4_t, int32x2_t, '1', '[LANE as u32; 4]']
|
||||
- [q_laneq_s32, int32x4_t, int32x4_t, int32x4_t, '2', '[LANE as u32; 4]']
|
||||
- [_lane_s16, int16x4_t, int16x4_t, int16x4_t, '2']
|
||||
- [_laneq_s16, int16x4_t, int16x4_t, int16x8_t, '3']
|
||||
- [q_lane_s16, int16x8_t, int16x8_t, int16x4_t, '2']
|
||||
- [q_laneq_s16, int16x8_t, int16x8_t, int16x8_t, '3']
|
||||
- [_lane_s32, int32x2_t, int32x2_t, int32x2_t, '1']
|
||||
- [_laneq_s32, int32x2_t, int32x2_t, int32x4_t, '2']
|
||||
- [q_lane_s32, int32x4_t, int32x4_t, int32x2_t, '1']
|
||||
- [q_laneq_s32, int32x4_t, int32x4_t, int32x4_t, '2']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, '{type[4]}']]
|
||||
- Let: [c, "{type[1]}", {FnCall: [simd_shuffle!, [c, c, "{type[5]}"]]}]
|
||||
- Let: [c, {FnCall: ['vdup{type[0]}', [c], [LANE]]}]
|
||||
- FnCall: ["vqrdmlsh{neon_type[2].no}", [a, b, c]]
|
||||
|
||||
- name: "vqrdmlsh{type[3]}"
|
||||
@@ -7902,7 +7644,7 @@ intrinsics:
|
||||
- ["i32", int32x4_t, '2', s_laneq_s32, s_s32]
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]]
|
||||
- FnCall: ["vqrdmlsh{type[4]}", [a, b, {FnCall: [simd_extract!, [c, 'LANE as u32']]}]]
|
||||
- FnCall: ["vqrdmlsh{type[4]}", [a, b, {FnCall: ['vget{neon_type[1].lane_nox}', [c], [LANE]]}]]
|
||||
|
||||
- name: "vqrshl{type[0]}"
|
||||
doc: "Signed saturating rounding shift left"
|
||||
@@ -7910,7 +7652,7 @@ intrinsics:
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrshl]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ['s_s32', "i32"]
|
||||
@@ -7928,7 +7670,7 @@ intrinsics:
|
||||
return_type: "{type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrshl]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["i8", 'b_s8', int8x8_t, s8]
|
||||
@@ -7936,7 +7678,7 @@ intrinsics:
|
||||
compose:
|
||||
- Let: [a, "{neon_type[2]}", {FnCall: ["vdup_n_{type[3]}", [a]]}]
|
||||
- Let: [b, "{neon_type[2]}", {FnCall: ["vdup_n_{type[3]}", [b]]}]
|
||||
- FnCall: [simd_extract!, [{FnCall: ["vqrshl_{type[3]}", [a, b]]}, '0']]
|
||||
- FnCall: ['vget{neon_type[2].lane_nox}', [{FnCall: ["vqrshl_{type[3]}", [a, b]]}], ['0']]
|
||||
|
||||
- name: "vqrshl{type[2]}"
|
||||
doc: "Unsigned signed saturating rounding shift left"
|
||||
@@ -7944,7 +7686,7 @@ intrinsics:
|
||||
return_type: "{type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqrshl]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["u32", "i32", 's_u32']
|
||||
@@ -7962,7 +7704,7 @@ intrinsics:
|
||||
return_type: "{type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqrshl]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["u8", "i8", "b_u8", uint8x8_t, int8x8_t, s8]
|
||||
@@ -7970,7 +7712,7 @@ intrinsics:
|
||||
compose:
|
||||
- Let: [a, "{neon_type[3]}", {FnCall: ["vdup_n_{type[0]}", [a]]}]
|
||||
- Let: [b, "{neon_type[4]}", {FnCall: ["vdup_n_{type[5]}", [b]]}]
|
||||
- FnCall: [simd_extract!, [{FnCall: ["vqrshl_{type[0]}", [a, b]]}, '0']]
|
||||
- FnCall: ['vget{neon_type[3].lane_nox}', [{FnCall: ["vqrshl_{type[0]}", [a, b]]}], ['0']]
|
||||
|
||||
- name: "vqrshrn{type[2]}"
|
||||
doc: "Signed saturating rounded shift right narrow"
|
||||
@@ -7979,7 +7721,7 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrshrn, 'N = 2']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['1']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -7989,25 +7731,25 @@ intrinsics:
|
||||
compose:
|
||||
- FnCall: [static_assert!, ["{type[3]}"]]
|
||||
- Let: [a, "{neon_type[4]}", {FnCall: ["vdup{type[5]}", [a]]}]
|
||||
- FnCall: [simd_extract!, [{FnCall: ["vqrshrn_n{neon_type[4].noq}::<N>", [a]]}, '0']]
|
||||
- FnCall: ['vget_lane_{type[1]}', [{FnCall: ["vqrshrn_n{neon_type[4].noq}::<N>", [a]]}], ['0']]
|
||||
|
||||
- name: "vqrshrn{type[3]}"
|
||||
doc: "Signed saturating rounded shift right narrow"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
|
||||
return_type: "{neon_type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrshrn2, 'N = 2']]}]]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqrshrn2, 'N = 2']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['2']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [int8x8_t, int16x8_t, int8x16_t, '_high_n_s16', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]', 'N >= 1 && N <= 8']
|
||||
- [int16x4_t, int32x4_t, int16x8_t, '_high_n_s32', '[0, 1, 2, 3, 4, 5, 6, 7]', 'N >= 1 && N <= 16']
|
||||
- [int32x2_t, int64x2_t, int32x4_t, '_high_n_s64', '[0, 1, 2, 3]', 'N >= 1 && N <= 32']
|
||||
- [int8x8_t, int16x8_t, int8x16_t, '_high_n_s16', 'N >= 1 && N <= 8']
|
||||
- [int16x4_t, int32x4_t, int16x8_t, '_high_n_s32', 'N >= 1 && N <= 16']
|
||||
- [int32x2_t, int64x2_t, int32x4_t, '_high_n_s64', 'N >= 1 && N <= 32']
|
||||
compose:
|
||||
- FnCall: [static_assert!, ["{type[5]}"]]
|
||||
- FnCall: [simd_shuffle!, [a, {FnCall: ["vqrshrn_n{neon_type[1].noq}::<N>", [b]]}, "{type[4]}"]]
|
||||
- FnCall: [static_assert!, ["{type[4]}"]]
|
||||
- FnCall: ['vcombine_{neon_type[0]}', [a, {FnCall: ["vqrshrn_n{neon_type[1].noq}::<N>", [b]]}]]
|
||||
|
||||
- name: "vqrshrn{type[0]}"
|
||||
doc: "Unsigned saturating rounded shift right narrow"
|
||||
@@ -8016,7 +7758,7 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqrshrn, 'N = 2']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['1']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -8026,31 +7768,30 @@ intrinsics:
|
||||
compose:
|
||||
- FnCall: [static_assert!, ['{type[3]}']]
|
||||
- Let: [a, "{neon_type[4]}", {FnCall: ["vdup{type[5]}", [a]]}]
|
||||
- FnCall: [simd_extract!, [{FnCall: ["vqrshrn{type[6]}::<N>", [a]]}, '0']]
|
||||
- FnCall: ['vget_lane_{type[2]}', [{FnCall: ["vqrshrn{type[6]}::<N>", [a]]}], ['0']]
|
||||
|
||||
- name: "vqrshrn_high_n{neon_type[1].noq}"
|
||||
doc: "Unsigned saturating rounded shift right narrow"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
|
||||
return_type: "{neon_type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqrshrn2, 'N = 2']]}]]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [uqrshrn2, 'N = 2']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['2']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [uint8x8_t, uint16x8_t, uint8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
|
||||
- [uint16x4_t, uint32x4_t, uint16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]']
|
||||
- [uint32x2_t, uint64x2_t, uint32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]']
|
||||
- [uint8x8_t, uint16x8_t, uint8x16_t, 'N >= 1 && N <= 8']
|
||||
- [uint16x4_t, uint32x4_t, uint16x8_t, 'N >= 1 && N <= 16']
|
||||
- [uint32x2_t, uint64x2_t, uint32x4_t, 'N >= 1 && N <= 32']
|
||||
compose:
|
||||
- FnCall: [static_assert!, ['{type[3]}']]
|
||||
- FnCall:
|
||||
- simd_shuffle!
|
||||
- 'vcombine_{neon_type[0]}'
|
||||
- - a
|
||||
- FnCall:
|
||||
- "vqrshrn_n{neon_type[1].noq}::<N>"
|
||||
- - b
|
||||
- "{type[4]}"
|
||||
|
||||
- name: "vqrshrun{type[0]}"
|
||||
doc: "Signed saturating rounded shift right unsigned narrow"
|
||||
@@ -8059,7 +7800,7 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrshrun, 'N = 2']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['1']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -8073,35 +7814,34 @@ intrinsics:
|
||||
- "{neon_type[4]}"
|
||||
- FnCall: ["vdupq_n_{type[5]}", [a]]
|
||||
- FnCall:
|
||||
- simd_extract!
|
||||
- 'vget_lane_{type[2]}'
|
||||
- - FnCall:
|
||||
- "vqrshrun_n_{type[5]}::<N>"
|
||||
- - a
|
||||
- '0'
|
||||
- - '0'
|
||||
|
||||
- name: "vqrshrun_high_n{neon_type[1].noq}"
|
||||
doc: "Signed saturating rounded shift right unsigned narrow"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
|
||||
return_type: "{neon_type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrshrun2, 'N = 2']]}]]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqrshrun2, 'N = 2']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['2']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [uint8x8_t, int16x8_t, uint8x16_t, 'N >= 1 && N <= 8', s16, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
|
||||
- [uint16x4_t, int32x4_t, uint16x8_t, 'N >= 1 && N <= 16', s32, '[0, 1, 2, 3, 4, 5, 6, 7]']
|
||||
- [uint32x2_t, int64x2_t, uint32x4_t, 'N >= 1 && N <= 32', s64, '[0, 1, 2, 3]']
|
||||
- [uint8x8_t, int16x8_t, uint8x16_t, 'N >= 1 && N <= 8']
|
||||
- [uint16x4_t, int32x4_t, uint16x8_t, 'N >= 1 && N <= 16']
|
||||
- [uint32x2_t, int64x2_t, uint32x4_t, 'N >= 1 && N <= 32']
|
||||
compose:
|
||||
- FnCall: [static_assert!, ["{type[3]}"]]
|
||||
- FnCall:
|
||||
- simd_shuffle!
|
||||
- 'vcombine_{neon_type[0]}'
|
||||
- - a
|
||||
- FnCall:
|
||||
- "vqrshrun_n_{type[4]}::<N>"
|
||||
- "vqrshrun_n_{neon_type[1]}::<N>"
|
||||
- - b
|
||||
- "{type[5]}"
|
||||
|
||||
- name: "vqshld_{type}"
|
||||
doc: "Signed saturating shift left"
|
||||
@@ -8109,7 +7849,7 @@ intrinsics:
|
||||
return_type: "{type}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshl]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- i64
|
||||
@@ -8126,7 +7866,7 @@ intrinsics:
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshl]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [b_s8, "i8", int8x8_t]
|
||||
@@ -8140,7 +7880,7 @@ intrinsics:
|
||||
- "vqshl{neon_type[2].noq}"
|
||||
- - FnCall: ["vdup_n{neon_type[2].no}", [a]]
|
||||
- FnCall: ["vdup_n{neon_type[2].no}", [b]]
|
||||
- FnCall: [simd_extract!, [c, '0']]
|
||||
- FnCall: ['vget{neon_type[2].lane_nox}', [c], ['0']]
|
||||
|
||||
- name: "vqshl{type[0]}"
|
||||
doc: "Signed saturating shift left"
|
||||
@@ -8149,7 +7889,7 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshl, 'N = 2']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['1']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -8160,11 +7900,11 @@ intrinsics:
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [N, "{type[2]}"]]
|
||||
- FnCall:
|
||||
- simd_extract!
|
||||
- 'vget_lane_{type[1]}'
|
||||
- - FnCall:
|
||||
- "vqshl_n_{type[3]}::<N>"
|
||||
- - FnCall: ["vdup_n_{type[3]}", [a]]
|
||||
- '0'
|
||||
- - '0'
|
||||
|
||||
- name: "vqshld_{type[0]}"
|
||||
doc: "Unsigned saturating shift left"
|
||||
@@ -8172,7 +7912,7 @@ intrinsics:
|
||||
return_type: "{type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqshl]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["u64", "i64"]
|
||||
@@ -8189,7 +7929,7 @@ intrinsics:
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqshl]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [b_u8, "u8", "i8", uint8x8_t, int8x8_t]
|
||||
@@ -8203,7 +7943,7 @@ intrinsics:
|
||||
- "vqshl{neon_type[3].noq}"
|
||||
- - FnCall: ["vdup{neon_type[3].N}", [a]]
|
||||
- FnCall: ["vdup{neon_type[4].N}", [b]]
|
||||
- FnCall: [simd_extract!, [c, '0']]
|
||||
- FnCall: ['vget{neon_type[3].lane_nox}', [c], ['0']]
|
||||
|
||||
- name: "vqshl{type[0]}"
|
||||
doc: "Unsigned saturating shift left"
|
||||
@@ -8212,7 +7952,7 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqshl, 'N = 2']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['1']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -8223,9 +7963,9 @@ intrinsics:
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [N, "{type[2]}"]]
|
||||
- FnCall:
|
||||
- simd_extract!
|
||||
- 'vget_lane_{type[1]}'
|
||||
- - FnCall: ["vqshl_n_{type[1]}::<N>", [{FnCall: ["vdup_n_{type[1]}", [a]]}]]
|
||||
- '0'
|
||||
- - '0'
|
||||
|
||||
- name: "vqshrnd_n_s64"
|
||||
doc: "Signed saturating shift right narrow"
|
||||
@@ -8234,7 +7974,7 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshrn, 'N = 2']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['1']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -8258,7 +7998,7 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshrn, 'N = 2']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['1']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -8267,33 +8007,32 @@ intrinsics:
|
||||
compose:
|
||||
- FnCall: [static_assert!, ["{type[3]}"]]
|
||||
- FnCall:
|
||||
- simd_extract!
|
||||
- 'vget_lane_{type[2]}'
|
||||
- - FnCall:
|
||||
- "vqshrn_n_{type[4]}::<N>"
|
||||
- - FnCall: ["vdupq_n_{type[4]}", [a]]
|
||||
- '0'
|
||||
- - '0'
|
||||
|
||||
- name: "vqshrn{type[0]}"
|
||||
doc: "Signed saturating shift right narrow"
|
||||
arguments: ["a: {neon_type[1]}", "b: {neon_type[2]}"]
|
||||
return_type: "{neon_type[3]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshrn2, 'N = 2']]}]]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqshrn2, 'N = 2']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['2']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [_high_n_s16, int8x8_t, int16x8_t, int8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]', s16]
|
||||
- [_high_n_s32, int16x4_t, int32x4_t, int16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]', s32]
|
||||
- [_high_n_s64, int32x2_t, int64x2_t, int32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]', s64]
|
||||
- [_high_n_s16, int8x8_t, int16x8_t, int8x16_t, 'N >= 1 && N <= 8']
|
||||
- [_high_n_s32, int16x4_t, int32x4_t, int16x8_t, 'N >= 1 && N <= 16']
|
||||
- [_high_n_s64, int32x2_t, int64x2_t, int32x4_t, 'N >= 1 && N <= 32']
|
||||
compose:
|
||||
- FnCall: [static_assert!, ["{type[4]}"]]
|
||||
- FnCall:
|
||||
- simd_shuffle!
|
||||
- 'vcombine_{neon_type[1]}'
|
||||
- - a
|
||||
- FnCall: ["vqshrn_n_{type[6]}::<N>", [b]]
|
||||
- "{type[5]}"
|
||||
- FnCall: ["vqshrn_n_{neon_type[2]}::<N>", [b]]
|
||||
|
||||
- name: "vqshrnd_n_u64"
|
||||
doc: "Unsigned saturating shift right narrow"
|
||||
@@ -8302,7 +8041,7 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqshrn, 'N = 2']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['1']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -8326,7 +8065,7 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqshrn, 'N = 2']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['1']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -8335,33 +8074,32 @@ intrinsics:
|
||||
compose:
|
||||
- FnCall: [static_assert!, ["{type[3]}"]]
|
||||
- FnCall:
|
||||
- "simd_extract!"
|
||||
- 'vget_lane_{type[2]}'
|
||||
- - FnCall:
|
||||
- "vqshrn_n_{type[1]}::<N>"
|
||||
- - FnCall: ["vdupq_n_{type[1]}", [a]]
|
||||
- '0'
|
||||
- - '0'
|
||||
|
||||
- name: "vqshrn{type[0]}"
|
||||
doc: "Unsigned saturating shift right narrow"
|
||||
arguments: ["a: {neon_type[1]}", "b: {neon_type[2]}"]
|
||||
return_type: "{neon_type[3]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqshrn2, 'N = 2']]}]]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [uqshrn2, 'N = 2']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['2']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [_high_n_u16, uint8x8_t, uint16x8_t, uint8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
|
||||
- [_high_n_u32, uint16x4_t, uint32x4_t, uint16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]']
|
||||
- [_high_n_u64, uint32x2_t, uint64x2_t, uint32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]']
|
||||
- [_high_n_u16, uint8x8_t, uint16x8_t, uint8x16_t, 'N >= 1 && N <= 8']
|
||||
- [_high_n_u32, uint16x4_t, uint32x4_t, uint16x8_t, 'N >= 1 && N <= 16']
|
||||
- [_high_n_u64, uint32x2_t, uint64x2_t, uint32x4_t, 'N >= 1 && N <= 32']
|
||||
compose:
|
||||
- FnCall: [static_assert!, ["{type[4]}"]]
|
||||
- FnCall:
|
||||
- simd_shuffle!
|
||||
- 'vcombine_{neon_type[1]}'
|
||||
- - a
|
||||
- FnCall: ["vqshrn_n_{neon_type[2]}::<N>", [b]]
|
||||
- "{type[5]}"
|
||||
|
||||
- name: "vqshrun{type[0]}"
|
||||
doc: "Signed saturating shift right unsigned narrow"
|
||||
@@ -8370,7 +8108,7 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshrun, 'N = 2']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['1']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -8380,33 +8118,32 @@ intrinsics:
|
||||
compose:
|
||||
- FnCall: [static_assert!, ["{type[3]}"]]
|
||||
- FnCall:
|
||||
- simd_extract!
|
||||
- 'vget_lane_{type[2]}'
|
||||
- - FnCall:
|
||||
- "vqshrun_n_{type[4]}::<N>"
|
||||
- - FnCall: ["vdupq_n_{type[4]}", [a]]
|
||||
- '0'
|
||||
- - '0'
|
||||
|
||||
- name: "vqshrun_high_n_{neon_type[1]}"
|
||||
doc: "Signed saturating shift right unsigned narrow"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
|
||||
return_type: "{neon_type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshrun2, 'N = 2']]}]]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqshrun2, 'N = 2']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['2']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [uint8x8_t, int16x8_t, uint8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
|
||||
- [uint16x4_t, int32x4_t, uint16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]']
|
||||
- [uint32x2_t, int64x2_t, uint32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]']
|
||||
- [uint8x8_t, int16x8_t, uint8x16_t, 'N >= 1 && N <= 8']
|
||||
- [uint16x4_t, int32x4_t, uint16x8_t, 'N >= 1 && N <= 16']
|
||||
- [uint32x2_t, int64x2_t, uint32x4_t, 'N >= 1 && N <= 32']
|
||||
compose:
|
||||
- FnCall: [static_assert!, ["{type[3]}"]]
|
||||
- FnCall:
|
||||
- simd_shuffle!
|
||||
- 'vcombine_{neon_type[0]}'
|
||||
- - a
|
||||
- FnCall: ["vqshrun_n_{neon_type[1]}::<N>", [b]]
|
||||
- "{type[4]}"
|
||||
|
||||
- name: "vsqadd{type[0]}"
|
||||
doc: "Unsigned saturating accumulate of signed value"
|
||||
@@ -8414,19 +8151,19 @@ intrinsics:
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [usqadd]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [b_u8, "u8", "i8", s8]
|
||||
- [h_u16, "u16", "i16", s16]
|
||||
compose:
|
||||
- FnCall:
|
||||
- simd_extract!
|
||||
- 'vget_lane_{type[1]}'
|
||||
- - FnCall:
|
||||
- "vsqadd_{type[1]}"
|
||||
- - FnCall: ["vdup_n_{type[1]}", [a]]
|
||||
- FnCall: ["vdup_n_{type[2]}", [b]]
|
||||
- '0'
|
||||
- - '0'
|
||||
|
||||
- name: "vsqadd{type[0]}"
|
||||
doc: "Unsigned saturating accumulate of signed value"
|
||||
@@ -8434,7 +8171,7 @@ intrinsics:
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [usqadd]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [s_u32, "u32", "i32"]
|
||||
@@ -8452,7 +8189,7 @@ intrinsics:
|
||||
return_type: "{neon_type}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fsqrt]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- float32x2_t
|
||||
@@ -8499,7 +8236,7 @@ intrinsics:
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frsqrts]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [_f64, float64x1_t, v1f64]
|
||||
@@ -8517,7 +8254,7 @@ intrinsics:
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frsqrts]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [s_f32, "f32"]
|
||||
@@ -8556,7 +8293,7 @@ intrinsics:
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frecpe]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [_f64, float64x1_t, v1f64]
|
||||
@@ -8574,7 +8311,7 @@ intrinsics:
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frecpe]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [s_f32, "f32"]
|
||||
@@ -8613,7 +8350,7 @@ intrinsics:
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frecps]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [_f64, float64x1_t, v1f64]
|
||||
@@ -8631,7 +8368,7 @@ intrinsics:
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frecps]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [s_f32, "f32"]
|
||||
@@ -8670,7 +8407,7 @@ intrinsics:
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frecpx]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [s_f32, "f32"]
|
||||
@@ -8702,7 +8439,6 @@ intrinsics:
|
||||
- link: "llvm.aarch64.neon.frecpx.{type[1]}"
|
||||
arch: aarch64,arm64ec
|
||||
|
||||
|
||||
- name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}"
|
||||
doc: Vector reinterpret cast operation
|
||||
arguments: ["a: {type[0]}"]
|
||||
@@ -8719,54 +8455,65 @@ intrinsics:
|
||||
- [poly64x2_t, uint64x2_t]
|
||||
- [int64x2_t, poly64x2_t]
|
||||
- [uint64x2_t, poly64x2_t]
|
||||
- [float64x1_t, int64x1_t]
|
||||
- [float64x2_t, int64x2_t]
|
||||
- [float64x1_t, uint64x1_t]
|
||||
- [float64x2_t, uint64x2_t]
|
||||
- [float64x1_t, poly64x1_t]
|
||||
- [float64x2_t, poly64x2_t]
|
||||
- [int64x1_t, float64x1_t]
|
||||
- [int64x2_t, float64x2_t]
|
||||
- [uint64x1_t, float64x1_t]
|
||||
- [uint64x2_t, float64x2_t]
|
||||
- [poly64x1_t, float64x1_t]
|
||||
- [poly64x2_t, float64x2_t]
|
||||
compose:
|
||||
- FnCall: [transmute, [a]]
|
||||
|
||||
- name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}"
|
||||
doc: Vector reinterpret cast operation
|
||||
arguments: ["a: {type[0]}"]
|
||||
return_type: "{type[1]}"
|
||||
attr: [*neon-stable]
|
||||
assert_instr: [nop]
|
||||
safety: safe
|
||||
types:
|
||||
- [float64x1_t, int8x8_t]
|
||||
- [float64x1_t, int16x4_t]
|
||||
- [float64x1_t, int32x2_t]
|
||||
- [float64x1_t, int64x1_t]
|
||||
- [float64x2_t, int8x16_t]
|
||||
- [float64x2_t, int16x8_t]
|
||||
- [float64x2_t, int32x4_t]
|
||||
- [float64x2_t, int64x2_t]
|
||||
- [float64x1_t, uint8x8_t]
|
||||
- [float64x1_t, uint16x4_t]
|
||||
- [float64x1_t, uint32x2_t]
|
||||
- [float64x1_t, uint64x1_t]
|
||||
- [float64x2_t, uint8x16_t]
|
||||
- [float64x2_t, uint16x8_t]
|
||||
- [float64x2_t, uint32x4_t]
|
||||
- [float64x2_t, uint64x2_t]
|
||||
- [float64x1_t, poly8x8_t]
|
||||
- [float64x1_t, poly16x4_t]
|
||||
- [float32x2_t, poly64x1_t]
|
||||
- [float64x1_t, poly64x1_t]
|
||||
- [float64x2_t, poly8x16_t]
|
||||
- [float64x2_t, poly16x8_t]
|
||||
- [float32x4_t, poly64x2_t]
|
||||
- [float64x2_t, poly64x2_t]
|
||||
- [float64x2_t, p128]
|
||||
- [int8x8_t, float64x1_t]
|
||||
- [int16x4_t, float64x1_t]
|
||||
- [int32x2_t, float64x1_t]
|
||||
- [int64x1_t, float64x1_t]
|
||||
- [int8x16_t, float64x2_t]
|
||||
- [int16x8_t, float64x2_t]
|
||||
- [int32x4_t, float64x2_t]
|
||||
- [int64x2_t, float64x2_t]
|
||||
- [poly8x8_t, float64x1_t]
|
||||
- [uint16x4_t, float64x1_t]
|
||||
- [uint32x2_t, float64x1_t]
|
||||
- [uint64x1_t, float64x1_t]
|
||||
- [poly8x16_t, float64x2_t]
|
||||
- [uint16x8_t, float64x2_t]
|
||||
- [uint32x4_t, float64x2_t]
|
||||
- [uint64x2_t, float64x2_t]
|
||||
- [uint8x8_t, float64x1_t]
|
||||
- [poly16x4_t, float64x1_t]
|
||||
- [poly64x1_t, float64x1_t]
|
||||
- [poly64x1_t, float32x2_t]
|
||||
- [uint8x16_t, float64x2_t]
|
||||
- [poly16x8_t, float64x2_t]
|
||||
- [poly64x2_t, float64x2_t]
|
||||
- [poly64x2_t, float32x4_t]
|
||||
- [p128, float64x2_t]
|
||||
- [float32x2_t, float64x1_t]
|
||||
@@ -8802,7 +8549,7 @@ intrinsics:
|
||||
return_type: "{type}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [srshl]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- "i64"
|
||||
@@ -8819,7 +8566,7 @@ intrinsics:
|
||||
return_type: "{type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [urshl]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["u64", "i64"]
|
||||
@@ -8837,7 +8584,7 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [srshr, 'N = 2']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['1']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -8853,7 +8600,7 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [urshr, 'N = 2']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['1']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -8867,197 +8614,262 @@ intrinsics:
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
|
||||
return_type: "{neon_type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [rshrn2, 'N = 2']]}]]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [rshrn2, 'N = 2']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['2']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [int8x8_t, int16x8_t, int8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
|
||||
- [int16x4_t, int32x4_t, int16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]']
|
||||
- [int32x2_t, int64x2_t, int32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]']
|
||||
- [uint8x8_t, uint16x8_t, uint8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
|
||||
- [uint16x4_t, uint32x4_t, uint16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]']
|
||||
- [uint32x2_t, uint64x2_t, uint32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]']
|
||||
- [int8x8_t, int16x8_t, int8x16_t, 'N >= 1 && N <= 8']
|
||||
- [int16x4_t, int32x4_t, int16x8_t, 'N >= 1 && N <= 16']
|
||||
- [int32x2_t, int64x2_t, int32x4_t, 'N >= 1 && N <= 32']
|
||||
- [uint8x8_t, uint16x8_t, uint8x16_t, 'N >= 1 && N <= 8']
|
||||
- [uint16x4_t, uint32x4_t, uint16x8_t, 'N >= 1 && N <= 16']
|
||||
- [uint32x2_t, uint64x2_t, uint32x4_t, 'N >= 1 && N <= 32']
|
||||
compose:
|
||||
- FnCall: [static_assert!, ["{type[3]}"]]
|
||||
- FnCall:
|
||||
- simd_shuffle!
|
||||
- 'vcombine_{neon_type[0]}'
|
||||
- - a
|
||||
- FnCall: ["vrshrn_n_{neon_type[1]}::<N>", [b]]
|
||||
- "{type[4]}"
|
||||
|
||||
- name: "vrsubhn_high_{neon_type[1]}"
|
||||
doc: "Rounding subtract returning high narrow"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
|
||||
return_type: "{neon_type[3]}"
|
||||
attr:
|
||||
- *little-endian
|
||||
- *cfg-little-endian
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [rsubhn2]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [int8x8_t, int16x8_t, int16x8_t, int8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
|
||||
- [int16x4_t, int32x4_t, int32x4_t, int16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]']
|
||||
- [int32x2_t, int64x2_t, int64x2_t, int32x4_t, '[0, 1, 2, 3]']
|
||||
- [uint8x8_t, uint16x8_t, uint16x8_t, uint8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
|
||||
- [uint16x4_t, uint32x4_t, uint32x4_t, uint16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]']
|
||||
- [uint32x2_t, uint64x2_t, uint64x2_t, uint32x4_t, '[0, 1, 2, 3]']
|
||||
- [int8x8_t, int16x8_t, int16x8_t, int8x16_t]
|
||||
- [int16x4_t, int32x4_t, int32x4_t, int16x8_t]
|
||||
- [int32x2_t, int64x2_t, int64x2_t, int32x4_t]
|
||||
- [uint8x8_t, uint16x8_t, uint16x8_t, uint8x16_t]
|
||||
- [uint16x4_t, uint32x4_t, uint32x4_t, uint16x8_t]
|
||||
- [uint32x2_t, uint64x2_t, uint64x2_t, uint32x4_t]
|
||||
compose:
|
||||
- Let:
|
||||
- x
|
||||
- "{neon_type[0]}"
|
||||
- FnCall: ["vrsubhn_{neon_type[1]}", [b, c]]
|
||||
- FnCall: [simd_shuffle!, [a, x, "{type[4]}"]]
|
||||
- FnCall: ['vcombine_{neon_type[0]}', [a, {FnCall: ["vrsubhn_{neon_type[1]}", [b, c]]}]]
|
||||
|
||||
- name: "vrsubhn_high_{neon_type[1]}"
|
||||
doc: "Rounding subtract returning high narrow"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
|
||||
return_type: "{neon_type[3]}"
|
||||
attr:
|
||||
- *big-endian
|
||||
- *cfg-big-endian
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [rsubhn]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [int8x8_t, int16x8_t, int16x8_t, int8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
|
||||
- [int16x4_t, int32x4_t, int32x4_t, int16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]']
|
||||
- [int32x2_t, int64x2_t, int64x2_t, int32x4_t, '[0, 1, 2, 3]']
|
||||
- [uint8x8_t, uint16x8_t, uint16x8_t, uint8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
|
||||
- [uint16x4_t, uint32x4_t, uint32x4_t, uint16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]']
|
||||
- [uint32x2_t, uint64x2_t, uint64x2_t, uint32x4_t, '[0, 1, 2, 3]']
|
||||
- [int8x8_t, int16x8_t, int16x8_t, int8x16_t]
|
||||
- [int16x4_t, int32x4_t, int32x4_t, int16x8_t]
|
||||
- [int32x2_t, int64x2_t, int64x2_t, int32x4_t]
|
||||
- [uint8x8_t, uint16x8_t, uint16x8_t, uint8x16_t]
|
||||
- [uint16x4_t, uint32x4_t, uint32x4_t, uint16x8_t]
|
||||
- [uint32x2_t, uint64x2_t, uint64x2_t, uint32x4_t]
|
||||
compose:
|
||||
- Let:
|
||||
- x
|
||||
- "{neon_type[0]}"
|
||||
- FnCall: ["vrsubhn_{neon_type[1]}", [b, c]]
|
||||
- FnCall: [simd_shuffle!, [a, x, "{type[4]}"]]
|
||||
- FnCall: ['vcombine_{neon_type[0]}', [a, {FnCall: ["vrsubhn_{neon_type[1]}", [b, c]]}]]
|
||||
|
||||
- name: "vcopy{neon_type[0].lane_nox}"
|
||||
doc: "Insert vector element from another vector element"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
|
||||
return_type: "{neon_type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [mov, 'LANE1 = 0', 'LANE2 = 0']]}]]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [mov, 'LANE1 = 0', 'LANE2 = 0']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['1', '3']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const LANE1: i32, const LANE2: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [int8x8_t, int8x8_t, int8x8_t, '3', '3', ' match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }']
|
||||
- [int16x4_t, int16x4_t, int16x4_t, '2', '2', ' match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
|
||||
- [int32x2_t, int32x2_t, int32x2_t, '1', '1', ' match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }']
|
||||
- [uint8x8_t, uint8x8_t, uint8x8_t, '3', '3', ' match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }']
|
||||
- [uint16x4_t, uint16x4_t, uint16x4_t, '2', '2', ' match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
|
||||
- [uint32x2_t, uint32x2_t, uint32x2_t, '1', '1', ' match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }']
|
||||
- [poly8x8_t, poly8x8_t, poly8x8_t, '3', '3', ' match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }']
|
||||
- [poly16x4_t, poly16x4_t, poly16x4_t, '2', '2', ' match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
|
||||
- [float32x2_t, float32x2_t, float32x2_t, '1', '1', ' match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }']
|
||||
- [int8x8_t, int8x8_t, int8x8_t, '3', '3']
|
||||
- [int16x4_t, int16x4_t, int16x4_t, '2', '2']
|
||||
- [int32x2_t, int32x2_t, int32x2_t, '1', '1']
|
||||
- [uint8x8_t, uint8x8_t, uint8x8_t, '3', '3']
|
||||
- [uint16x4_t, uint16x4_t, uint16x4_t, '2', '2']
|
||||
- [uint32x2_t, uint32x2_t, uint32x2_t, '1', '1']
|
||||
- [poly8x8_t, poly8x8_t, poly8x8_t, '3', '3']
|
||||
- [poly16x4_t, poly16x4_t, poly16x4_t, '2', '2']
|
||||
- [float32x2_t, float32x2_t, float32x2_t, '1', '1']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE1, '{type[3]}']]
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE2, '{type[4]}']]
|
||||
- Identifier: ["{type[5]}", UnsafeSymbol]
|
||||
- FnCall: ['vset{neon_type[0].lane_nox}', [{FnCall: ['vget{neon_type[1].lane_nox}', [b], [LANE2]]}, a], [LANE1]]
|
||||
|
||||
- name: "vcopy{neon_type[0].lane_nox}"
|
||||
doc: "Insert vector element from another vector element"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
|
||||
return_type: "{neon_type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [mov, 'LANE1 = 0', 'LANE2 = 0']]}]]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [mov, 'LANE1 = 0', 'LANE2 = 0']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['1', '3']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const LANE1: i32, const LANE2: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [int8x16_t, int8x8_t, int8x16_t, '4', '3', ' let b: int8x16_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);', 'match LANE1 & 0b1111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), 8 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), 9 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), 10 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), 11 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), 12 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), 13 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), 14 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), 15 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), _ => unreachable_unchecked(), }']
|
||||
- [int16x8_t, int16x4_t, int16x8_t, '3', '2', ' let b: int16x8_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]);', 'match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }']
|
||||
- [int32x4_t, int32x2_t, int32x4_t, '2', '1', ' let b: int32x4_t = simd_shuffle!(b, b, [0, 1, 2, 3]);', 'match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
|
||||
- [uint8x16_t, uint8x8_t, uint8x16_t, '4', '3', ' let b: uint8x16_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);', 'match LANE1 & 0b1111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), 8 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), 9 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), 10 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), 11 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), 12 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), 13 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), 14 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), 15 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), _ => unreachable_unchecked(), }']
|
||||
- [uint16x8_t, uint16x4_t, uint16x8_t, '3', '2', ' let b: uint16x8_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]);', 'match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }']
|
||||
- [uint32x4_t, uint32x2_t, uint32x4_t, '2', '1', ' let b: uint32x4_t = simd_shuffle!(b, b, [0, 1, 2, 3]);', 'match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
|
||||
- [poly8x16_t, poly8x8_t, poly8x16_t, '4', '3', ' let b: poly8x16_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);', 'match LANE1 & 0b1111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), 8 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), 9 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), 10 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), 11 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), 12 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), 13 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), 14 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), 15 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), _ => unreachable_unchecked(), }']
|
||||
- [poly16x8_t, poly16x4_t, poly16x8_t, '3', '2', ' let b: poly16x8_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]);', 'match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }']
|
||||
- [int8x16_t, int8x8_t, int8x16_t, '4', '3']
|
||||
- [int16x8_t, int16x4_t, int16x8_t, '3', '2']
|
||||
- [int32x4_t, int32x2_t, int32x4_t, '2', '1']
|
||||
- [uint8x16_t, uint8x8_t, uint8x16_t, '4', '3']
|
||||
- [uint16x8_t, uint16x4_t, uint16x8_t, '3', '2']
|
||||
- [uint32x4_t, uint32x2_t, uint32x4_t, '2', '1']
|
||||
- [poly8x16_t, poly8x8_t, poly8x16_t, '4', '3']
|
||||
- [poly16x8_t, poly16x4_t, poly16x8_t, '3', '2']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE1, '{type[3]}']]
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE2, '{type[4]}']]
|
||||
- Identifier: ["{type[5]}", UnsafeSymbol]
|
||||
- Identifier: ["{type[6]}", UnsafeSymbol]
|
||||
- Let: [b, '{neon_type[2]}', {FnCall: ['vcombine{neon_type[1].no}', [b, b]]}]
|
||||
- FnCall: ['vset{neon_type[0].lane_nox}', [{FnCall: ['vget{neon_type[2].lane_nox}', [b], [LANE2]]}, a], [LANE1]]
|
||||
|
||||
- name: "vcopy_lane_{neon_type[0]}"
|
||||
doc: "Insert vector element from another vector element"
|
||||
arguments: ["_a: {neon_type[0]}", "b: {neon_type[0]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE1 = {type[1]}', 'LANE2 = {type[1]}']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['1', '3']]
|
||||
- *neon-stable
|
||||
static_defs: ['const LANE1: i32, const LANE2: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [float64x1_t, '0', 'b']
|
||||
- [poly64x1_t, '0', 'b']
|
||||
- [uint64x1_t, '0', 'b']
|
||||
- [int64x1_t, '0', 'b']
|
||||
compose:
|
||||
- FnCall: [static_assert!, ['LANE1 == {type[1]}']]
|
||||
- FnCall: [static_assert!, ['LANE2 == {type[1]}']]
|
||||
- Identifier: ["{type[2]}", Symbol]
|
||||
|
||||
- name: "vcopy_laneq_{neon_type[0]}"
|
||||
doc: "Insert vector element from another vector element"
|
||||
arguments: ["_a: {neon_type[1]}", "b: {neon_type[0]}"]
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE1 = {type[2]}', 'LANE2 = {type[3]}']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['1', '3']]
|
||||
- *neon-stable
|
||||
static_defs: ['const LANE1: i32, const LANE2: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [float64x2_t, float64x1_t, '0', '1']
|
||||
- [poly64x2_t, poly64x1_t, '0', '1']
|
||||
- [uint64x2_t, uint64x1_t, '0', '1']
|
||||
- [int64x2_t, int64x1_t, '0', '1']
|
||||
compose:
|
||||
- FnCall: [static_assert!, ['LANE1 == {type[2]}']]
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE2, '{type[3]}']]
|
||||
- FnCall: [transmute, [{FnCall: ['vget{neon_type[0].lane_nox}', [b], [LANE2]]}]]
|
||||
|
||||
- name: "vcopy{neon_type[0].laneq_nox}"
|
||||
doc: "Insert vector element from another vector element"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
|
||||
return_type: "{neon_type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [mov, 'LANE1 = 0', 'LANE2 = 0']]}]]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [mov, 'LANE1 = 0', 'LANE2 = 0']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['1', '3']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const LANE1: i32, const LANE2: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [int8x16_t, int8x16_t, int8x16_t, '4', '4', ' match LANE1 & 0b1111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), 8 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), 9 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), 10 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), 11 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), 12 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), 13 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), 14 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), 15 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), _ => unreachable_unchecked(), }']
|
||||
- [int16x8_t, int16x8_t, int16x8_t, '3', '3', ' match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }']
|
||||
- [int32x4_t, int32x4_t, int32x4_t, '2', '2', ' match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
|
||||
- [int64x2_t, int64x2_t, int64x2_t, '1', '1', ' match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }']
|
||||
- [uint8x16_t, uint8x16_t, uint8x16_t, '4', '4', ' match LANE1 & 0b1111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), 8 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), 9 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), 10 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), 11 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), 12 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), 13 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), 14 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), 15 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), _ => unreachable_unchecked(), }']
|
||||
- [uint16x8_t, uint16x8_t, uint16x8_t, '3', '3', ' match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }']
|
||||
- [uint32x4_t, uint32x4_t, uint32x4_t, '2', '2', ' match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
|
||||
- [uint64x2_t, uint64x2_t, uint64x2_t, '1', '1', ' match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }']
|
||||
- [poly8x16_t, poly8x16_t, poly8x16_t, '4', '4', ' match LANE1 & 0b1111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), 8 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), 9 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), 10 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), 11 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), 12 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), 13 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), 14 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), 15 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), _ => unreachable_unchecked(), }']
|
||||
- [poly16x8_t, poly16x8_t, poly16x8_t, '3', '3', ' match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }']
|
||||
- [poly64x2_t, poly64x2_t, poly64x2_t, '1', '1', ' match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }']
|
||||
- [float32x4_t, float32x4_t, float32x4_t, '2', '2', ' match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
|
||||
- [float64x2_t, float64x2_t, float64x2_t, '1', '1', ' match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }']
|
||||
- [int8x16_t, int8x16_t, int8x16_t, '4', '4']
|
||||
- [int16x8_t, int16x8_t, int16x8_t, '3', '3']
|
||||
- [int32x4_t, int32x4_t, int32x4_t, '2', '2']
|
||||
- [int64x2_t, int64x2_t, int64x2_t, '1', '1']
|
||||
- [uint8x16_t, uint8x16_t, uint8x16_t, '4', '4']
|
||||
- [uint16x8_t, uint16x8_t, uint16x8_t, '3', '3']
|
||||
- [uint32x4_t, uint32x4_t, uint32x4_t, '2', '2']
|
||||
- [uint64x2_t, uint64x2_t, uint64x2_t, '1', '1']
|
||||
- [poly8x16_t, poly8x16_t, poly8x16_t, '4', '4']
|
||||
- [poly16x8_t, poly16x8_t, poly16x8_t, '3', '3']
|
||||
- [float32x4_t, float32x4_t, float32x4_t, '2', '2']
|
||||
- [float64x2_t, float64x2_t, float64x2_t, '1', '1']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE1, '{type[3]}']]
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE2, '{type[4]}']]
|
||||
- Identifier: ["{type[5]}", UnsafeSymbol]
|
||||
- FnCall: ['vset{neon_type[0].lane_nox}', [{FnCall: ['vget{neon_type[1].lane_nox}', [b], [LANE2]]}, a], [LANE1]]
|
||||
|
||||
- name: "vcopy{neon_type[0].laneq_nox}"
|
||||
doc: "Insert vector element from another vector element"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
|
||||
return_type: "{neon_type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [mov, 'LANE1 = 0', 'LANE2 = 0']]}]]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [mov, 'LANE1 = 0', 'LANE2 = 0']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['1', '3']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const LANE1: i32, const LANE2: i32']
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [int8x8_t, int8x16_t, int8x8_t, '3', '4', ' let a: int8x16_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);', 'match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]), _ => unreachable_unchecked(), }']
|
||||
- [int16x4_t, int16x8_t, int16x4_t, '2', '3', ' let a: int16x8_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);', 'match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }']
|
||||
- [int32x2_t, int32x4_t, int32x2_t, '1', '2', ' let a: int32x4_t = simd_shuffle!(a, a, [0, 1, 2, 3]);', 'match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
|
||||
- [uint8x8_t, uint8x16_t, uint8x8_t, '3', '4', ' let a: uint8x16_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);', 'match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]), _ => unreachable_unchecked(), }']
|
||||
- [uint16x4_t, uint16x8_t, uint16x4_t, '2', '3', ' let a: uint16x8_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);', 'match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }']
|
||||
- [uint32x2_t, uint32x4_t, uint32x2_t, '1', '2', 'let a: uint32x4_t = simd_shuffle!(a, a, [0, 1, 2, 3]);', 'match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
|
||||
- [poly8x8_t, poly8x16_t, poly8x8_t, '3', '4', ' let a: poly8x16_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);', 'match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]), _ => unreachable_unchecked(), }']
|
||||
- [poly16x4_t, poly16x8_t, poly16x4_t, '2', '3', ' let a: poly16x8_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);', 'match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }']
|
||||
- [float32x2_t, float32x4_t, float32x2_t, '1', '2', ' let a: float32x4_t = simd_shuffle!(a, a, [0, 1, 2, 3]);', 'match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
|
||||
- [poly64x2_t, poly64x2_t, poly64x2_t, '1', '1']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE1, '{type[3]}']]
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE2, '{type[4]}']]
|
||||
- Identifier: ["{type[5]}", UnsafeSymbol]
|
||||
- Identifier: ["{type[6]}", UnsafeSymbol]
|
||||
- FnCall: [simd_insert!, [a, LANE1 as u32, {FnCall: [simd_extract!, [b, LANE2 as u32, p64]]}]]
|
||||
|
||||
- name: "vcopy{neon_type[0].laneq_nox}"
|
||||
doc: "Insert vector element from another vector element"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
|
||||
return_type: "{neon_type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [mov, 'LANE1 = 0', 'LANE2 = 0']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['1', '3']]
|
||||
- *neon-stable
|
||||
static_defs: ['const LANE1: i32, const LANE2: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [int8x8_t, int8x16_t, int8x8_t, '3', '4']
|
||||
- [int16x4_t, int16x8_t, int16x4_t, '2', '3']
|
||||
- [int32x2_t, int32x4_t, int32x2_t, '1', '2']
|
||||
- [uint8x8_t, uint8x16_t, uint8x8_t, '3', '4']
|
||||
- [uint16x4_t, uint16x8_t, uint16x4_t, '2', '3']
|
||||
- [uint32x2_t, uint32x4_t, uint32x2_t, '1', '2']
|
||||
- [poly8x8_t, poly8x16_t, poly8x8_t, '3', '4']
|
||||
- [poly16x4_t, poly16x8_t, poly16x4_t, '2', '3']
|
||||
- [float32x2_t, float32x4_t, float32x2_t, '1', '2']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE1, '{type[3]}']]
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE2, '{type[4]}']]
|
||||
- FnCall: ['vset{neon_type[0].lane_nox}', [{FnCall: ['vget{neon_type[1].lane_nox}', [b], [LANE2]]}, a], [LANE1]]
|
||||
|
||||
- name: "vcopyq_lane_{neon_type[0]}"
|
||||
doc: "Insert vector element from another vector element"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [mov, 'LANE1 = 1', 'LANE2 = 0']]}]]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [mov, 'LANE1 = 1', 'LANE2 = 0']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['1', '3']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const LANE1: i32, const LANE2: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [int64x2_t, int64x1_t, ' let b: int64x2_t = simd_shuffle!(b, b, [0, 1]);', 'match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }']
|
||||
- [uint64x2_t, uint64x1_t, ' let b: uint64x2_t = simd_shuffle!(b, b, [0, 1]);', 'match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }']
|
||||
- [poly64x2_t, poly64x1_t, ' let b: poly64x2_t = simd_shuffle!(b, b, [0, 1]);', 'match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }']
|
||||
- [float64x2_t, float64x1_t, ' let b: float64x2_t = simd_shuffle!(b, b, [0, 1]);', 'match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }']
|
||||
- [int64x2_t, int64x1_t]
|
||||
- [uint64x2_t, uint64x1_t]
|
||||
- [float64x2_t, float64x1_t]
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE1, '1']]
|
||||
- FnCall: [static_assert!, ['LANE2 == 0']]
|
||||
- Identifier: ['{type[2]}', UnsafeSymbol]
|
||||
- Identifier: ['{type[3]}', UnsafeSymbol]
|
||||
- Let: [b, '{neon_type[0]}', {FnCall: ['vcombine{neon_type[1].no}', [b, b]]}]
|
||||
- FnCall: ['vset{neon_type[0].lane_nox}', [{FnCall: ['vget{neon_type[0].lane_nox}', [b], [LANE2]]}, a], [LANE1]]
|
||||
|
||||
- name: "vcopyq_lane_{neon_type[0]}"
|
||||
doc: "Insert vector element from another vector element"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [mov, 'LANE1 = 1', 'LANE2 = 0']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['1', '3']]
|
||||
- *neon-stable
|
||||
static_defs: ['const LANE1: i32, const LANE2: i32']
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [poly64x2_t, poly64x1_t]
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE1, '1']]
|
||||
- FnCall: [static_assert!, ['LANE2 == 0']]
|
||||
- Let: [b, '{neon_type[0]}', {FnCall: ['vcombine{neon_type[1].no}', [b, b]]}]
|
||||
- FnCall: [simd_insert!, [a, LANE1 as u32, {FnCall: [simd_extract!, [b, LANE2 as u32, p64]]}]]
|
||||
|
||||
- name: "vcopyq_lane_f32"
|
||||
doc: "Insert vector element from another vector element"
|
||||
@@ -9066,16 +8878,15 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [mov, 'LANE1 = 1', 'LANE2 = 0']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['1', '3']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const LANE1: i32, const LANE2: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [float32x4_t, float32x2_t, ' let b: float32x4_t = simd_shuffle!(b, b, [0, 1, 2, 3]);', 'match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
|
||||
- [float32x4_t, float32x2_t]
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE1, 2]]
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE2, 1]]
|
||||
- Identifier: ["{type[2]}", UnsafeSymbol]
|
||||
- Identifier: ["{type[3]}", UnsafeSymbol]
|
||||
- FnCall: ['vset{neon_type[0].lane_nox}', [{FnCall: ['vget{neon_type[1].lane_nox}', [b], [LANE2]]}, a], [LANE1]]
|
||||
|
||||
- name: "vcreate_f64"
|
||||
doc: "Insert vector element from another vector element"
|
||||
@@ -9083,7 +8894,7 @@ intrinsics:
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["u64", float64x1_t]
|
||||
@@ -9097,9 +8908,10 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['2']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- ["f64", float64x1_t, float64x1_t]
|
||||
compose:
|
||||
@@ -9113,9 +8925,10 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['2']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- ["f64", float64x2_t, float64x2_t]
|
||||
compose:
|
||||
@@ -9128,7 +8941,7 @@ intrinsics:
|
||||
return_type: "{type}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sshl]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- "i64"
|
||||
@@ -9146,7 +8959,7 @@ intrinsics:
|
||||
return_type: "{type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ushl]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["u64", "i64"]
|
||||
@@ -9163,63 +8976,62 @@ intrinsics:
|
||||
arguments: ["a: {neon_type[0]}"]
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sshll2, 'N = 2']]}]]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sshll2, 'N = 2']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['1']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [int8x16_t, int16x8_t, int8x8_t, 'N >= 0 && N <= 8', '[8, 9, 10, 11, 12, 13, 14, 15]']
|
||||
- [int16x8_t, int32x4_t, int16x4_t, 'N >= 0 && N <= 16', '[4, 5, 6, 7]']
|
||||
- [int32x4_t, int64x2_t, int32x2_t, 'N >= 0 && N <= 32', '[2, 3]']
|
||||
- [int8x16_t, int16x8_t, int8x8_t, 'N >= 0 && N <= 8']
|
||||
- [int16x8_t, int32x4_t, int16x4_t, 'N >= 0 && N <= 16']
|
||||
- [int32x4_t, int64x2_t, int32x2_t, 'N >= 0 && N <= 32']
|
||||
compose:
|
||||
- FnCall: [static_assert!, ["{type[3]}"]]
|
||||
- Let: [b, "{neon_type[2]}", {FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]}]
|
||||
- FnCall: ["vshll_n_{neon_type[2]}::<N>", [b]]
|
||||
- Let: [b, {FnCall: ['vget_high_{neon_type[0]}', [a]]}]
|
||||
- FnCall: ["vshll_n_{neon_type[2]}", [b], [N]]
|
||||
|
||||
- name: "vshll_high_n_{neon_type[0]}"
|
||||
doc: "Signed shift left long"
|
||||
arguments: ["a: {neon_type[0]}"]
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ushll2, 'N = 2']]}]]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [ushll2, 'N = 2']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['1']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [uint8x16_t, uint16x8_t, uint8x8_t, 'N >= 0 && N <= 8', '[8, 9, 10, 11, 12, 13, 14, 15]']
|
||||
- [uint16x8_t, uint32x4_t, uint16x4_t, 'N >= 0 && N <= 16', '[4, 5, 6, 7]']
|
||||
- [uint32x4_t, uint64x2_t, uint32x2_t, 'N >= 0 && N <= 32', '[2, 3]']
|
||||
- [uint8x16_t, uint16x8_t, uint8x8_t, 'N >= 0 && N <= 8']
|
||||
- [uint16x8_t, uint32x4_t, uint16x4_t, 'N >= 0 && N <= 16']
|
||||
- [uint32x4_t, uint64x2_t, uint32x2_t, 'N >= 0 && N <= 32']
|
||||
compose:
|
||||
- FnCall: [static_assert!, ["{type[3]}"]]
|
||||
- Let: [b, "{neon_type[2]}", {FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]}]
|
||||
- FnCall: ["vshll_n_{neon_type[2]}::<N>", [b]]
|
||||
- Let: [b, "{neon_type[2]}", {FnCall: ['vget_high_{neon_type[0]}', [a]]}]
|
||||
- FnCall: ["vshll_n_{neon_type[2]}", [b], [N]]
|
||||
|
||||
- name: "vshrn_high_n_{neon_type[1]}"
|
||||
doc: "Shift right narrow"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
|
||||
return_type: "{neon_type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [shrn2, 'N = 2']]}]]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [shrn2, 'N = 2']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['2']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [int8x8_t, int16x8_t, int8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
|
||||
- [int16x4_t, int32x4_t, int16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]']
|
||||
- [int32x2_t, int64x2_t, int32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]']
|
||||
- [uint8x8_t, uint16x8_t, uint8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
|
||||
- [uint16x4_t, uint32x4_t, uint16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]']
|
||||
- [uint32x2_t, uint64x2_t, uint32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]']
|
||||
- [int8x8_t, int16x8_t, int8x16_t, 'N >= 1 && N <= 8']
|
||||
- [int16x4_t, int32x4_t, int16x8_t, 'N >= 1 && N <= 16']
|
||||
- [int32x2_t, int64x2_t, int32x4_t, 'N >= 1 && N <= 32']
|
||||
- [uint8x8_t, uint16x8_t, uint8x16_t, 'N >= 1 && N <= 8']
|
||||
- [uint16x4_t, uint32x4_t, uint16x8_t, 'N >= 1 && N <= 16']
|
||||
- [uint32x2_t, uint64x2_t, uint32x4_t, 'N >= 1 && N <= 32']
|
||||
compose:
|
||||
- FnCall: [static_assert!, ["{type[3]}"]]
|
||||
- FnCall:
|
||||
- simd_shuffle!
|
||||
- 'vcombine_{neon_type[0]}'
|
||||
- - a
|
||||
- FnCall: ["vshrn_n_{neon_type[1]}::<N>", [b]]
|
||||
- "{type[4]}"
|
||||
- FnCall: ["vshrn_n_{neon_type[1]}", [b], [N]]
|
||||
|
||||
- name: "vsm3partw1{neon_type.no}"
|
||||
doc: "SM3PARTW1"
|
||||
@@ -9230,6 +9042,7 @@ intrinsics:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sm3partw1]]}]]
|
||||
- FnCall: [unstable, ['feature = "stdarch_neon_sm4"', 'issue = "117226"']]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- uint32x4_t
|
||||
compose:
|
||||
@@ -9248,6 +9061,7 @@ intrinsics:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sm3partw2]]}]]
|
||||
- FnCall: [unstable, ['feature = "stdarch_neon_sm4"', 'issue = "117226"']]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- uint32x4_t
|
||||
compose:
|
||||
@@ -9266,6 +9080,7 @@ intrinsics:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sm3ss1]]}]]
|
||||
- FnCall: [unstable, ['feature = "stdarch_neon_sm4"', 'issue = "117226"']]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- uint32x4_t
|
||||
compose:
|
||||
@@ -9284,6 +9099,7 @@ intrinsics:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sm4ekey]]}]]
|
||||
- FnCall: [unstable, ['feature = "stdarch_neon_sm4"', 'issue = "117226"']]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- uint32x4_t
|
||||
compose:
|
||||
@@ -9302,6 +9118,7 @@ intrinsics:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sm4e]]}]]
|
||||
- FnCall: [unstable, ['feature = "stdarch_neon_sm4"', 'issue = "117226"']]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- uint32x4_t
|
||||
compose:
|
||||
@@ -9338,6 +9155,7 @@ intrinsics:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sha512h]]}]]
|
||||
- FnCall: [stable, ['feature = "stdarch_neon_sha3"', 'since = "1.79.0"']]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- uint64x2_t
|
||||
compose:
|
||||
@@ -9356,6 +9174,7 @@ intrinsics:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sha512h2]]}]]
|
||||
- FnCall: [stable, ['feature = "stdarch_neon_sha3"', 'since = "1.79.0"']]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- uint64x2_t
|
||||
compose:
|
||||
@@ -9374,6 +9193,7 @@ intrinsics:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sha512su0]]}]]
|
||||
- FnCall: [stable, ['feature = "stdarch_neon_sha3"', 'since = "1.79.0"']]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- uint64x2_t
|
||||
compose:
|
||||
@@ -9392,6 +9212,7 @@ intrinsics:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sha512su1]]}]]
|
||||
- FnCall: [stable, ['feature = "stdarch_neon_sha3"', 'since = "1.79.0"']]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- uint64x2_t
|
||||
compose:
|
||||
@@ -9412,6 +9233,7 @@ intrinsics:
|
||||
- FnCall: [unstable, ['feature = "stdarch_neon_sm4"', 'issue = "117226"']]
|
||||
static_defs: ["const IMM2: i32"]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- ['1aq_u32', uint32x4_t, 'sm3tt1a', 'SM3TT1A']
|
||||
- ['1bq_u32', uint32x4_t, 'sm3tt1b', 'SM3TT1B']
|
||||
@@ -9514,7 +9336,7 @@ intrinsics:
|
||||
- transmute
|
||||
- - FnCall:
|
||||
- _vrnd32x_f64
|
||||
- - FnCall: [simd_extract!, [a, 0]]
|
||||
- - FnCall: ['vget{neon_type.lane_nox}', [a], [0]]
|
||||
|
||||
- name: "vrnd32z{neon_type.no}"
|
||||
doc: "Floating-point round to 32-bit integer toward zero"
|
||||
@@ -9558,7 +9380,7 @@ intrinsics:
|
||||
arch: aarch64,arm64ec
|
||||
- FnCall:
|
||||
- transmute
|
||||
- - FnCall: [_vrnd32z_f64, [{FnCall: [simd_extract!, [a, 0]]}]]
|
||||
- - FnCall: [_vrnd32z_f64, [{FnCall: ['vget{neon_type.lane_nox}', [a], [0]]}]]
|
||||
|
||||
- name: "vrnd64x{neon_type.no}"
|
||||
doc: "Floating-point round to 64-bit integer, using current rounding mode"
|
||||
@@ -9602,7 +9424,7 @@ intrinsics:
|
||||
arch: aarch64,arm64ec
|
||||
- FnCall:
|
||||
- transmute
|
||||
- - FnCall: [_vrnd64x_f64, [{FnCall: [simd_extract!, [a, 0]]}]]
|
||||
- - FnCall: [_vrnd64x_f64, [{FnCall: ['vget{neon_type.lane_nox}', [a], [0]]}]]
|
||||
|
||||
- name: "vrnd64z{neon_type.no}"
|
||||
doc: "Floating-point round to 64-bit integer toward zero"
|
||||
@@ -9646,7 +9468,7 @@ intrinsics:
|
||||
arch: aarch64,arm64ec
|
||||
- FnCall:
|
||||
- transmute
|
||||
- - FnCall: [_vrnd64z_f64, [{FnCall: [simd_extract!, [a, 0]]}]]
|
||||
- - FnCall: [_vrnd64z_f64, [{FnCall: ['vget{neon_type.lane_nox}', [a], [0]]}]]
|
||||
|
||||
- name: "vtrn1{neon_type[0].no}"
|
||||
doc: Transpose vectors
|
||||
@@ -9654,8 +9476,9 @@ intrinsics:
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- *neon-stable
|
||||
- FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [trn1]]}]]
|
||||
- FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [trn1]]}]]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [int8x8_t, '[0, 8, 2, 10, 4, 12, 6, 14]']
|
||||
- [int8x16_t, '[0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]']
|
||||
@@ -9684,8 +9507,9 @@ intrinsics:
|
||||
- *neon-fp16
|
||||
- *neon-stable-fp16
|
||||
- *target-not-arm64ec
|
||||
- FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [trn1]]}]]
|
||||
- FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [trn1]]}]]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [float16x4_t, '[0, 4, 2, 6]']
|
||||
- [float16x8_t, '[0, 8, 2, 10, 4, 12, 6, 14]']
|
||||
@@ -9698,8 +9522,9 @@ intrinsics:
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- *neon-stable
|
||||
- FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip1]]}]]
|
||||
- FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [zip1]]}]]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [int32x2_t, '[0, 2]']
|
||||
- [int64x2_t, '[0, 2]']
|
||||
@@ -9717,8 +9542,9 @@ intrinsics:
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- *neon-stable
|
||||
- FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [trn2]]}]]
|
||||
- FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [trn2]]}]]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [int8x8_t, '[1, 9, 3, 11, 5, 13, 7, 15]']
|
||||
- [int8x16_t, '[1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]']
|
||||
@@ -9746,8 +9572,9 @@ intrinsics:
|
||||
- *neon-fp16
|
||||
- *neon-stable-fp16
|
||||
- *target-not-arm64ec
|
||||
- FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [trn2]]}]]
|
||||
- FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [trn2]]}]]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [float16x4_t, '[1, 5, 3, 7]']
|
||||
- [float16x8_t, '[1, 9, 3, 11, 5, 13, 7, 15]']
|
||||
@@ -9760,8 +9587,9 @@ intrinsics:
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- *neon-stable
|
||||
- FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip2]]}]]
|
||||
- FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [zip2]]}]]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [int32x2_t, '[1, 3]']
|
||||
- [int64x2_t, '[1, 3]']
|
||||
@@ -9779,8 +9607,9 @@ intrinsics:
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- *neon-stable
|
||||
- FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip2]]}]]
|
||||
- FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [zip2]]}]]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [int8x8_t, '[4, 12, 5, 13, 6, 14, 7, 15]']
|
||||
- [int8x16_t, '[8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]']
|
||||
@@ -9815,8 +9644,9 @@ intrinsics:
|
||||
- *neon-fp16
|
||||
- *neon-stable-fp16
|
||||
- *target-not-arm64ec
|
||||
- FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip2]]}]]
|
||||
- FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [zip2]]}]]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [float16x4_t, '[2, 6, 3, 7]']
|
||||
- [float16x8_t, '[4, 12, 5, 13, 6, 14, 7, 15]']
|
||||
@@ -9829,8 +9659,9 @@ intrinsics:
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- *neon-stable
|
||||
- FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip1]]}]]
|
||||
- FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [zip1]]}]]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [int8x8_t, '[0, 8, 1, 9, 2, 10, 3, 11]']
|
||||
- [int8x16_t, '[0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]']
|
||||
@@ -9866,8 +9697,9 @@ intrinsics:
|
||||
- *neon-fp16
|
||||
- *neon-stable-fp16
|
||||
- *target-not-arm64ec
|
||||
- FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip1]]}]]
|
||||
- FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [zip1]]}]]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [float16x4_t, '[0, 4, 1, 5]']
|
||||
- [float16x8_t, '[0, 8, 1, 9, 2, 10, 3, 11]']
|
||||
@@ -9880,8 +9712,9 @@ intrinsics:
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- *neon-stable
|
||||
- FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip1]]}]]
|
||||
- FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [zip1]]}]]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [int32x2_t, '[0, 2]']
|
||||
- [int64x2_t, '[0, 2]']
|
||||
@@ -9899,8 +9732,9 @@ intrinsics:
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- *neon-stable
|
||||
- FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [uzp1]]}]]
|
||||
- FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [uzp1]]}]]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [int8x8_t, '[0, 2, 4, 6, 8, 10, 12, 14]']
|
||||
- [int8x16_t, '[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]']
|
||||
@@ -9928,8 +9762,9 @@ intrinsics:
|
||||
- *neon-fp16
|
||||
- *neon-stable-fp16
|
||||
- *target-not-arm64ec
|
||||
- FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [uzp1]]}]]
|
||||
- FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [uzp1]]}]]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [float16x4_t, '[0, 2, 4, 6]']
|
||||
- [float16x8_t, '[0, 2, 4, 6, 8, 10, 12, 14]']
|
||||
@@ -9942,8 +9777,9 @@ intrinsics:
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- *neon-stable
|
||||
- FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip2]]}]]
|
||||
- FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [zip2]]}]]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [int32x2_t, '[1, 3]']
|
||||
- [int64x2_t, '[1, 3]']
|
||||
@@ -9961,8 +9797,9 @@ intrinsics:
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- *neon-stable
|
||||
- FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [uzp2]]}]]
|
||||
- FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [uzp2]]}]]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [int8x8_t, '[1, 3, 5, 7, 9, 11, 13, 15]']
|
||||
- [int8x16_t, '[1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]']
|
||||
@@ -9994,8 +9831,9 @@ intrinsics:
|
||||
- *neon-fp16
|
||||
- *neon-stable-fp16
|
||||
- *target-not-arm64ec
|
||||
- FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [uzp2]]}]]
|
||||
- FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [uzp2]]}]]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [float16x4_t, '[1, 3, 5, 7]']
|
||||
- [float16x8_t, '[1, 3, 5, 7, 9, 11, 13, 15]']
|
||||
@@ -10011,23 +9849,21 @@ intrinsics:
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [uabal2]]}]]
|
||||
- *neon-stable
|
||||
- FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [uabal2]]}]]
|
||||
safety: safe
|
||||
types:
|
||||
- [uint16x8_t, uint8x16_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', '[8, 9, 10, 11, 12, 13, 14, 15]']
|
||||
- [uint32x4_t, uint16x8_t, uint16x4_t, '[4, 5, 6, 7]', '[4, 5, 6, 7]']
|
||||
- [uint64x2_t, uint32x4_t, uint32x2_t, '[2, 3]', '[2, 3]']
|
||||
- [uint16x8_t, uint8x16_t, uint8x8_t]
|
||||
- [uint32x4_t, uint16x8_t, uint16x4_t]
|
||||
- [uint64x2_t, uint32x4_t, uint32x2_t]
|
||||
compose:
|
||||
- Let:
|
||||
- d
|
||||
- "{neon_type[2]}"
|
||||
- FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]
|
||||
- FnCall: ['vget_high_{neon_type[1]}', [b]]
|
||||
- Let:
|
||||
- e
|
||||
- "{neon_type[2]}"
|
||||
- FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]
|
||||
- Let: [f, "{neon_type[2]}", {FnCall: ["vabd_{neon_type[2]}", [d, e]]}]
|
||||
- FnCall: ['vget_high_{neon_type[1]}', [c]]
|
||||
- Let: [f, {FnCall: ["vabd_{neon_type[2]}", [d, e]]}]
|
||||
- FnCall:
|
||||
- simd_add
|
||||
- - a
|
||||
@@ -10039,39 +9875,28 @@ intrinsics:
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- *neon-stable
|
||||
- FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [sabal2]]}]]
|
||||
- FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [sabal2]]}]]
|
||||
safety: safe
|
||||
types:
|
||||
- [int16x8_t, int8x16_t, int8x16_t, '[8, 9, 10, 11, 12, 13, 14, 15]', int8x8_t, uint8x8_t]
|
||||
- [int32x4_t, int16x8_t, int16x8_t, '[4, 5, 6, 7]', int16x4_t, uint16x4_t]
|
||||
- [int64x2_t, int32x4_t, int32x4_t, '[2, 3]', int32x2_t, uint32x2_t]
|
||||
- [int16x8_t, int8x16_t, int8x16_t, int8x8_t, uint8x8_t]
|
||||
- [int32x4_t, int16x8_t, int16x8_t, int16x4_t, uint16x4_t]
|
||||
- [int64x2_t, int32x4_t, int32x4_t, int32x2_t, uint32x2_t]
|
||||
compose:
|
||||
- Let:
|
||||
- d
|
||||
- "{neon_type[4]}"
|
||||
- FnCall:
|
||||
- simd_shuffle!
|
||||
- - b
|
||||
- b
|
||||
- "{type[3]}"
|
||||
- FnCall: ['vget_high_{neon_type[1]}', [b]]
|
||||
- Let:
|
||||
- e
|
||||
- "{neon_type[4]}"
|
||||
- FnCall:
|
||||
- simd_shuffle!
|
||||
- - c
|
||||
- c
|
||||
- "{type[3]}"
|
||||
- FnCall: ['vget_high_{neon_type[2]}', [c]]
|
||||
- Let:
|
||||
- f
|
||||
- "{neon_type[4]}"
|
||||
- FnCall:
|
||||
- "vabd{neon_type[4].no}"
|
||||
- "vabd{neon_type[3].no}"
|
||||
- - d
|
||||
- e
|
||||
- Let:
|
||||
- f
|
||||
- "{neon_type[5]}"
|
||||
- "{neon_type[4]}"
|
||||
- FnCall:
|
||||
- simd_cast
|
||||
- - f
|
||||
@@ -10164,8 +9989,9 @@ intrinsics:
|
||||
return_type: "{neon_type}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmaxnmp]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- float32x2_t
|
||||
- float64x2_t
|
||||
@@ -10182,7 +10008,7 @@ intrinsics:
|
||||
arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st1]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety:
|
||||
unsafe: [neon]
|
||||
types:
|
||||
@@ -10205,7 +10031,7 @@ intrinsics:
|
||||
arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st1]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety:
|
||||
unsafe: [neon]
|
||||
types:
|
||||
@@ -10229,7 +10055,7 @@ intrinsics:
|
||||
arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st1]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety:
|
||||
unsafe: [neon]
|
||||
types:
|
||||
@@ -10256,7 +10082,7 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmla, 'LANE = 0']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['3']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -10271,7 +10097,7 @@ intrinsics:
|
||||
- "vfma{neon_type[0].no}"
|
||||
- - a
|
||||
- b
|
||||
- FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]]
|
||||
- FnCall: ["vdup{neon_type[0].N}", [{FnCall: ['vget{neon_type[1].lane_nox}', [c], [LANE]]}]]
|
||||
|
||||
|
||||
- name: "vfma{type[3]}"
|
||||
@@ -10297,7 +10123,7 @@ intrinsics:
|
||||
- "vfma{neon_type[0].no}"
|
||||
- - a
|
||||
- b
|
||||
- FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]]
|
||||
- FnCall: ["vdup{neon_type[0].N}", [{FnCall: ['vget{neon_type[1].lane_nox}', [c], [LANE]]}]]
|
||||
|
||||
|
||||
# vfms lane f16
|
||||
@@ -10324,7 +10150,7 @@ intrinsics:
|
||||
- "vfms{neon_type[0].no}"
|
||||
- - a
|
||||
- b
|
||||
- FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]]
|
||||
- FnCall: ["vdup{neon_type[0].N}", [{FnCall: ['vget{neon_type[1].lane_nox}', [c], [LANE]]}]]
|
||||
|
||||
|
||||
- name: "vfms{type[1]}"
|
||||
@@ -10350,7 +10176,7 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmadd, 'LANE = 0']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['3']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -10361,16 +10187,16 @@ intrinsics:
|
||||
- "vfma{neon_type.no}"
|
||||
- - a
|
||||
- b
|
||||
- FnCall: ["vdup{neon_type.N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]]
|
||||
- FnCall: ["vdup{neon_type.N}", [{FnCall: ['vget{neon_type.lane_nox}', [c], [LANE]]}]]
|
||||
|
||||
- name: "vfma_laneq_f64"
|
||||
doc: "Floating-point fused multiply-add to accumulator"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmadd, 'LANE = 0']]}]]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fmadd, 'LANE = 0']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['3']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -10381,7 +10207,7 @@ intrinsics:
|
||||
- "vfma{neon_type[0].no}"
|
||||
- - a
|
||||
- b
|
||||
- FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]]
|
||||
- FnCall: ["vdup{neon_type[0].N}", [{FnCall: ['vget{neon_type[1].lane_nox}', [c], [LANE]]}]]
|
||||
|
||||
- name: "vfmaq_lane_f64"
|
||||
doc: "Floating-point fused multiply-add to accumulator"
|
||||
@@ -10390,7 +10216,7 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmla, 'LANE = 0']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['3']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -10401,16 +10227,16 @@ intrinsics:
|
||||
- "vfma{neon_type[0].no}"
|
||||
- - a
|
||||
- b
|
||||
- FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]]
|
||||
- FnCall: ["vdup{neon_type[0].N}", [{FnCall: ['vget{neon_type[1].lane_nox}', [c], [LANE]]}]]
|
||||
|
||||
- name: "vfma{type[2]}"
|
||||
doc: "Floating-point fused multiply-add to accumulator"
|
||||
arguments: ["a: {type[0]}", "b: {type[0]}", "c: {neon_type[1]}"]
|
||||
return_type: "{type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmadd, 'LANE = 0']]}]]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fmadd, 'LANE = 0']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['3']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -10419,7 +10245,7 @@ intrinsics:
|
||||
- ["f64", float64x2_t, "d_laneq_f64", '1']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, ['LANE', "{type[3]}"]]
|
||||
- Let: [c, "{type[0]}", {FnCall: [simd_extract!, [c, 'LANE as u32']]}]
|
||||
- Let: [c, "{type[0]}", {FnCall: ['vget{neon_type[1].lane_nox}', [c], [LANE]]}]
|
||||
- FnCall: ["fma{type[0]}", [b, c, a]]
|
||||
|
||||
- name: "vfmad_lane_f64"
|
||||
@@ -10429,14 +10255,14 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmadd, 'LANE = 0']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['3']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- ["f64", float64x1_t]
|
||||
compose:
|
||||
- FnCall: [static_assert!, ['LANE == 0']]
|
||||
- Let: [c, "{type[0]}", {FnCall: [simd_extract!, [c, 'LANE as u32']]}]
|
||||
- Let: [c, "{type[0]}", {FnCall: ['vget{neon_type[1].lane_nox}', [c], [LANE]]}]
|
||||
- FnCall: [fmaf64, [b, c, a]]
|
||||
|
||||
|
||||
@@ -10461,7 +10287,7 @@ intrinsics:
|
||||
arguments: ["a: {type[0]}", "b: {type[0]}", "v: {neon_type[1]}"]
|
||||
return_type: "{type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmadd, 'LANE = 0']]}]]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fmadd, 'LANE = 0']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['3']]
|
||||
- *neon-fp16
|
||||
- *neon-unstable-f16
|
||||
@@ -10473,7 +10299,7 @@ intrinsics:
|
||||
- ["f16", float16x8_t, 'q_f16', '3']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
|
||||
- Let: [c, "{type[0]}", {FnCall: [simd_extract!, [v, 'LANE as u32']]}]
|
||||
- Let: [c, "{type[0]}", {FnCall: ['vget{neon_type[1].lane_nox}', [v], [LANE]]}]
|
||||
- FnCall: ["vfmah_{type[0]}", [a, b, c]]
|
||||
|
||||
- name: "vfmsh_lane{type[2]}"
|
||||
@@ -10481,7 +10307,7 @@ intrinsics:
|
||||
arguments: ["a: {type[0]}", "b: {type[0]}", "v: {neon_type[1]}"]
|
||||
return_type: "{type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmsub, 'LANE = 0']]}]]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fmsub, 'LANE = 0']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['3']]
|
||||
- *neon-fp16
|
||||
- *neon-unstable-f16
|
||||
@@ -10493,7 +10319,7 @@ intrinsics:
|
||||
- ["f16", float16x8_t, 'q_f16', '3']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
|
||||
- Let: [c, "{type[0]}", {FnCall: [simd_extract!, [v, 'LANE as u32']]}]
|
||||
- Let: [c, "{type[0]}", {FnCall: ['vget{neon_type[1].lane_nox}', [v], [LANE]]}]
|
||||
- FnCall: ["vfmsh_{type[0]}", [a, b, c]]
|
||||
|
||||
- name: "vfms_f64"
|
||||
@@ -10502,7 +10328,7 @@ intrinsics:
|
||||
return_type: "{neon_type}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmsub]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- float64x1_t
|
||||
@@ -10516,7 +10342,7 @@ intrinsics:
|
||||
return_type: "{neon_type}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmls]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- float64x2_t
|
||||
@@ -10530,7 +10356,7 @@ intrinsics:
|
||||
return_type: "{neon_type}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- float64x1_t
|
||||
@@ -10545,7 +10371,7 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmls, 'LANE = 0']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['3']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -10556,7 +10382,7 @@ intrinsics:
|
||||
- [float64x2_t, float64x2_t, '1', q_laneq_f64]
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, '{type[2]}']]
|
||||
- FnCall: ["vfms{neon_type[0].no}", [a, b, {FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]]}]]
|
||||
- FnCall: ["vfms{neon_type[0].no}", [a, b, {FnCall: ["vdup{neon_type[0].N}", [{FnCall: ['vget{neon_type[1].lane_nox}', [c], [LANE]]}]]}]]
|
||||
|
||||
- name: "vfms_lane_f64"
|
||||
doc: "Floating-point fused multiply-subtract to accumulator"
|
||||
@@ -10565,30 +10391,30 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmsub, 'LANE = 0']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['3']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- float64x1_t
|
||||
compose:
|
||||
- FnCall: [static_assert!, ['LANE == 0']]
|
||||
- FnCall: ["vfms{neon_type.no}", [a, b, {FnCall: ["vdup{neon_type.N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]]}]]
|
||||
- FnCall: ["vfms{neon_type.no}", [a, b, {FnCall: ["vdup{neon_type.N}", [{FnCall: ['vget{neon_type.lane_nox}', [c], [LANE]]}]]}]]
|
||||
|
||||
- name: "vfms_laneq_f64"
|
||||
doc: "Floating-point fused multiply-subtract to accumulator"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmsub, 'LANE = 0']]}]]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fmsub, 'LANE = 0']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['3']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [float64x1_t, float64x2_t]
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, '1']]
|
||||
- FnCall: ["vfms{neon_type[0].no}", [a, b, {FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]]}]]
|
||||
- FnCall: ["vfms{neon_type[0].no}", [a, b, {FnCall: ["vdup{neon_type[0].N}", [{FnCall: ['vget{neon_type[1].lane_nox}', [c], [LANE]]}]]}]]
|
||||
|
||||
- name: "vfmsq_lane_f64"
|
||||
doc: "Floating-point fused multiply-subtract to accumulator"
|
||||
@@ -10597,23 +10423,23 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmls, 'LANE = 0']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['3']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [float64x2_t, float64x1_t]
|
||||
compose:
|
||||
- FnCall: [static_assert!, ['LANE == 0']]
|
||||
- FnCall: ["vfms{neon_type[0].no}", [a, b, {FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]]}]]
|
||||
- FnCall: ["vfms{neon_type[0].no}", [a, b, {FnCall: ["vdup{neon_type[0].N}", [{FnCall: ['vget{neon_type[1].lane_nox}', [c], [LANE]]}]]}]]
|
||||
|
||||
- name: "vfms{type[2]}"
|
||||
doc: "Floating-point fused multiply-subtract to accumulator"
|
||||
arguments: ["a: {type[0]}", "b: {type[0]}", "c: {neon_type[1]}"]
|
||||
return_type: "{type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmsub, 'LANE = 0']]}]]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fmsub, 'LANE = 0']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['3']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -10631,7 +10457,7 @@ intrinsics:
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmeq]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [float32x2_t, uint32x2_t, 'f32x2', 'f32x2::new(0.0, 0.0)']
|
||||
@@ -10665,18 +10491,18 @@ intrinsics:
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["f32", "u32", "s_f32"]
|
||||
- ["f64", "u64", "d_f64"]
|
||||
compose:
|
||||
- FnCall:
|
||||
- simd_extract!
|
||||
- 'vget_lane_{type[1]}'
|
||||
- - FnCall:
|
||||
- "vceqz_{type[0]}"
|
||||
- - FnCall: ["vdup_n_{type[0]}", [a]]
|
||||
- '0'
|
||||
- - '0'
|
||||
|
||||
- name: "vceqz{type[2]}"
|
||||
doc: "Floating-point compare bitwise equal to zero"
|
||||
@@ -10692,11 +10518,11 @@ intrinsics:
|
||||
- ["f16", "u16", "h_f16"]
|
||||
compose:
|
||||
- FnCall:
|
||||
- simd_extract!
|
||||
- 'vget_lane_{type[1]}'
|
||||
- - FnCall:
|
||||
- "vceqz_{type[0]}"
|
||||
- - FnCall: ["vdup_n_{type[0]}", [a]]
|
||||
- '0'
|
||||
- - '0'
|
||||
|
||||
- name: "vceqzd_{type[2]}"
|
||||
doc: "Compare bitwise equal to zero"
|
||||
@@ -10704,7 +10530,7 @@ intrinsics:
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmp]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["i64", "u64", "s64"]
|
||||
@@ -10722,7 +10548,7 @@ intrinsics:
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmeq]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [int8x8_t, uint8x8_t, i8x8, 'i8x8::new(0, 0, 0, 0, 0, 0, 0, 0)']
|
||||
@@ -10750,7 +10576,7 @@ intrinsics:
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmeq]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [uint8x8_t, uint8x8_t, u8x8, 'u8x8::new(0, 0, 0, 0, 0, 0, 0, 0)']
|
||||
@@ -10774,7 +10600,7 @@ intrinsics:
|
||||
return_type: "{neon_type}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmhs]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- uint64x1_t
|
||||
@@ -10788,19 +10614,19 @@ intrinsics:
|
||||
return_type: "{type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["s_f32", "f32", "u32"]
|
||||
- ["d_f64", "f64", "u64"]
|
||||
compose:
|
||||
- FnCall:
|
||||
- simd_extract!
|
||||
- 'vget_lane_{type[2]}'
|
||||
- - FnCall:
|
||||
- "vcge_{type[1]}"
|
||||
- - FnCall: ["vdup_n_{type[1]}", [a]]
|
||||
- FnCall: ["vdup_n_{type[1]}", [b]]
|
||||
- '0'
|
||||
- - '0'
|
||||
|
||||
|
||||
- name: "vcge{type[0]}"
|
||||
@@ -10817,12 +10643,12 @@ intrinsics:
|
||||
- ["h_f16", "f16", "u16"]
|
||||
compose:
|
||||
- FnCall:
|
||||
- simd_extract!
|
||||
- 'vget_lane_{type[2]}'
|
||||
- - FnCall:
|
||||
- "vcge_{type[1]}"
|
||||
- - FnCall: ["vdup_n_{type[1]}", [a]]
|
||||
- FnCall: ["vdup_n_{type[1]}", [b]]
|
||||
- '0'
|
||||
- - '0'
|
||||
|
||||
- name: "vcge{neon_type[0].no}"
|
||||
doc: "Floating-point compare greater than or equal"
|
||||
@@ -10830,7 +10656,7 @@ intrinsics:
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmge]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [float64x1_t, uint64x1_t]
|
||||
@@ -10844,7 +10670,7 @@ intrinsics:
|
||||
return_type: "{type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmp]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["d_s64", "i64", "u64", s64]
|
||||
@@ -10863,7 +10689,7 @@ intrinsics:
|
||||
return_type: "{neon_type}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmhi]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- uint64x1_t
|
||||
@@ -10877,7 +10703,7 @@ intrinsics:
|
||||
return_type: "{type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmp]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["s64", "i64", "u64"]
|
||||
@@ -10896,7 +10722,7 @@ intrinsics:
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmtst]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [uint64x1_t, u64x1, 'u64x1::new(0)']
|
||||
@@ -10912,7 +10738,7 @@ intrinsics:
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmge]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [float32x2_t, uint32x2_t, f32x2, 'f32x2::new(0.0, 0.0)']
|
||||
@@ -10932,18 +10758,18 @@ intrinsics:
|
||||
return_type: "{type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["s_f32", "f32", "u32"]
|
||||
- ["d_f64", "f64", "u64"]
|
||||
compose:
|
||||
- FnCall:
|
||||
- simd_extract!
|
||||
- 'vget_lane_{type[2]}'
|
||||
- - FnCall:
|
||||
- "vcgez_{type[1]}"
|
||||
- - FnCall: ["vdup_n_{type[1]}", [a]]
|
||||
- '0'
|
||||
- - '0'
|
||||
|
||||
|
||||
- name: "vcgez{type[0]}"
|
||||
@@ -10960,11 +10786,11 @@ intrinsics:
|
||||
- ["h_f16", "f16", "u16"]
|
||||
compose:
|
||||
- FnCall:
|
||||
- simd_extract!
|
||||
- 'vget_lane_{type[2]}'
|
||||
- - FnCall:
|
||||
- "vcgez_{type[1]}"
|
||||
- - FnCall: ["vdup_n_{type[1]}", [a]]
|
||||
- '0'
|
||||
- - '0'
|
||||
|
||||
- name: "vclezd_s64"
|
||||
doc: "Compare less than or equal to zero"
|
||||
@@ -10972,7 +10798,7 @@ intrinsics:
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmp]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["i64", "u64"]
|
||||
@@ -10987,7 +10813,7 @@ intrinsics:
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmp]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["i64", "u64", 's64']
|
||||
@@ -11006,7 +10832,7 @@ intrinsics:
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmgt]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [int8x8_t, uint8x8_t, i8x8, 'i8x8::new(0, 0, 0, 0, 0, 0, 0, 0)']
|
||||
@@ -11030,7 +10856,7 @@ intrinsics:
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmp]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["i64", "u64"]
|
||||
@@ -11047,7 +10873,7 @@ intrinsics:
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmgt]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [float32x2_t, uint32x2_t, f32x2, 'f32x2::new(0.0, 0.0)']
|
||||
@@ -11064,18 +10890,18 @@ intrinsics:
|
||||
return_type: "{type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["s_f32", "f32", "u32"]
|
||||
- ["d_f64", "f64", "u64"]
|
||||
compose:
|
||||
- FnCall:
|
||||
- "simd_extract!"
|
||||
- 'vget_lane_{type[2]}'
|
||||
- - FnCall:
|
||||
- "vcgtz_{type[1]}"
|
||||
- - FnCall: ["vdup_n_{type[1]}", [a]]
|
||||
- '0'
|
||||
- - '0'
|
||||
|
||||
- name: "vcgtz{type[0]}"
|
||||
doc: "Floating-point compare greater than zero"
|
||||
@@ -11091,11 +10917,11 @@ intrinsics:
|
||||
- ["h_f16", "f16", "u16"]
|
||||
compose:
|
||||
- FnCall:
|
||||
- "simd_extract!"
|
||||
- 'vget_lane_{type[2]}'
|
||||
- - FnCall:
|
||||
- "vcgtz_{type[1]}"
|
||||
- - FnCall: ["vdup_n_{type[1]}", [a]]
|
||||
- '0'
|
||||
- - '0'
|
||||
|
||||
- name: "vcvt{neon_type[1].no}_{neon_type[0]}"
|
||||
doc: "Floating-point convert to unsigned fixed-point, rounding toward zero"
|
||||
@@ -11103,7 +10929,7 @@ intrinsics:
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzu]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [float64x1_t, uint64x1_t]
|
||||
@@ -11121,7 +10947,7 @@ intrinsics:
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [float64x1_t, "f64"]
|
||||
@@ -11139,7 +10965,7 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul, 'LANE = 0']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['2']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -11151,7 +10977,7 @@ intrinsics:
|
||||
- - a
|
||||
- FnCall:
|
||||
- "transmute::<f64, _>"
|
||||
- - FnCall: [simd_extract!, [b, 'LANE as u32']]
|
||||
- - FnCall: ['vget{neon_type.lane_nox}', [b], [LANE]]
|
||||
|
||||
- name: "vmulq_lane_f64"
|
||||
doc: "Floating-point multiply"
|
||||
@@ -11160,7 +10986,7 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul, 'LANE = 0']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['2']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -11170,7 +10996,7 @@ intrinsics:
|
||||
- FnCall:
|
||||
- simd_mul
|
||||
- - a
|
||||
- FnCall: ["simd_shuffle!", [b, b, '[LANE as u32; 2]']]
|
||||
- FnCall: [vdupq_lane_f64, [b], [LANE]]
|
||||
|
||||
- name: "vmuld_lane_f64"
|
||||
doc: "Floating-point multiply"
|
||||
@@ -11179,14 +11005,14 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul, 'LANE = 0']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['2']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- ["f64", float64x1_t]
|
||||
compose:
|
||||
- FnCall: [static_assert!, ['LANE == 0']]
|
||||
- Let: [b, '{type[0]}', {FnCall: [simd_extract!, [b, 'LANE as u32']]}]
|
||||
- Let: [b, '{type[0]}', {FnCall: ['vget{neon_type[1].lane_nox}', [b], [LANE]]}]
|
||||
- Identifier: ['a * b', Symbol]
|
||||
|
||||
- name: "vmul_laneq_f64"
|
||||
@@ -11196,7 +11022,7 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul, 'LANE = 0']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['2']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -11208,7 +11034,7 @@ intrinsics:
|
||||
- - a
|
||||
- FnCall:
|
||||
- "transmute::<f64, _>"
|
||||
- - FnCall: [simd_extract!, [b, 'LANE as u32']]
|
||||
- - FnCall: ['vget{neon_type[1].lane_nox}', [b], [LANE]]
|
||||
|
||||
- name: "vmulq_laneq_f64"
|
||||
doc: "Floating-point multiply"
|
||||
@@ -11217,7 +11043,7 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul, 'LANE = 0']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['2']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -11227,7 +11053,7 @@ intrinsics:
|
||||
- FnCall:
|
||||
- simd_mul
|
||||
- - a
|
||||
- FnCall: [simd_shuffle!, [b, b, '[LANE as u32; 2]']]
|
||||
- FnCall: [vdupq_laneq_f64, [b], [LANE]]
|
||||
|
||||
|
||||
# vmulq_laneq_f16
|
||||
@@ -11244,14 +11070,14 @@ intrinsics:
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [float16x4_t, float16x8_t, '_lane', "[LANE as u32; 4]"]
|
||||
- [float16x8_t, float16x8_t, 'q_lane', "[LANE as u32; 8]"]
|
||||
- [float16x4_t, float16x8_t, '_lane']
|
||||
- [float16x8_t, float16x8_t, 'q_lane']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, '3']]
|
||||
- FnCall:
|
||||
- simd_mul
|
||||
- - a
|
||||
- FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]
|
||||
- FnCall: ['vdup{neon_type[0].laneq_nox}', [b], [LANE]]
|
||||
|
||||
|
||||
- name: "vmul{type[1]}_{type[0]}"
|
||||
@@ -11277,7 +11103,7 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul, 'LANE = 0']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['2']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -11286,7 +11112,7 @@ intrinsics:
|
||||
- ["f64", float64x2_t, "d_laneq_f64", '1']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
|
||||
- Let: [b, '{type[0]}', {FnCall: [simd_extract!, [b, 'LANE as u32']]}]
|
||||
- Let: [b, '{type[0]}', {FnCall: ['vget{neon_type[1].lane_nox}', [b], [LANE]]}]
|
||||
- Identifier: ['a * b', Symbol]
|
||||
|
||||
|
||||
@@ -11307,7 +11133,7 @@ intrinsics:
|
||||
- ["f16", float16x8_t, "h_laneq_f16", '3']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
|
||||
- Let: [b, '{type[0]}', {FnCall: [simd_extract!, [b, 'LANE as u32']]}]
|
||||
- Let: [b, '{type[0]}', {FnCall: ['vget{neon_type[1].lane_nox}', [b], [LANE]]}]
|
||||
- Identifier: ['a * b', Symbol]
|
||||
|
||||
|
||||
@@ -11318,7 +11144,7 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [srshr, 'N = 2']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['2']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -11333,8 +11159,8 @@ intrinsics:
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smlsl2]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [smlsl2]]}]]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [int32x4_t, int16x8_t, "i16"]
|
||||
@@ -11347,8 +11173,8 @@ intrinsics:
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umlsl2]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [umlsl2]]}]]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [uint32x4_t, uint16x8_t, "u16"]
|
||||
@@ -11361,46 +11187,46 @@ intrinsics:
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smlsl2, 'LANE = 1']]}]]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [smlsl2, 'LANE = 1']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['3']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [int32x4_t, int16x8_t, int16x4_t, '2', '[LANE as u32; 8]']
|
||||
- [int32x4_t, int16x8_t, int16x8_t, '3', '[LANE as u32; 8]']
|
||||
- [int64x2_t, int32x4_t, int32x2_t, '1', '[LANE as u32; 4]']
|
||||
- [int64x2_t, int32x4_t, int32x4_t, '2', '[LANE as u32; 4]']
|
||||
- [int32x4_t, int16x8_t, int16x4_t, '2']
|
||||
- [int32x4_t, int16x8_t, int16x8_t, '3']
|
||||
- [int64x2_t, int32x4_t, int32x2_t, '1']
|
||||
- [int64x2_t, int32x4_t, int32x4_t, '2']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
|
||||
- FnCall:
|
||||
- "vmlsl_high_{neon_type[1]}"
|
||||
- - a
|
||||
- b
|
||||
- FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]
|
||||
- FnCall: ['vdupq_lane{neon_type[2].no}', [c], [LANE]]
|
||||
|
||||
- name: "vmlsl_high_lane{neon_type[2].no}"
|
||||
doc: "Multiply-subtract long"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umlsl2, 'LANE = 1']]}]]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [umlsl2, 'LANE = 1']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['3']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [uint32x4_t, uint16x8_t, uint16x4_t, '2', '[LANE as u32; 8]']
|
||||
- [uint32x4_t, uint16x8_t, uint16x8_t, '3', '[LANE as u32; 8]']
|
||||
- [uint64x2_t, uint32x4_t, uint32x2_t, '1', '[LANE as u32; 4]']
|
||||
- [uint64x2_t, uint32x4_t, uint32x4_t, '2', '[LANE as u32; 4]']
|
||||
- [uint32x4_t, uint16x8_t, uint16x4_t, '2']
|
||||
- [uint32x4_t, uint16x8_t, uint16x8_t, '3']
|
||||
- [uint64x2_t, uint32x4_t, uint32x2_t, '1']
|
||||
- [uint64x2_t, uint32x4_t, uint32x4_t, '2']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
|
||||
- FnCall:
|
||||
- "vmlsl_high_{neon_type[1]}"
|
||||
- - a
|
||||
- b
|
||||
- FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]
|
||||
- FnCall: ['vdupq_lane{neon_type[2].no}', [c], [LANE]]
|
||||
|
||||
- name: "vclt{neon_type[0].no}"
|
||||
doc: "Floating-point compare less than"
|
||||
@@ -11408,7 +11234,7 @@ intrinsics:
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmgt]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [float64x1_t, uint64x1_t]
|
||||
@@ -11422,19 +11248,19 @@ intrinsics:
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["f32", "u32", 's_f32']
|
||||
- ["f64", "u64", 'd_f64']
|
||||
compose:
|
||||
- FnCall:
|
||||
- simd_extract!
|
||||
- 'vget_lane_{type[1]}'
|
||||
- - FnCall:
|
||||
- "vclt_{type[0]}"
|
||||
- - FnCall: ["vdup_n_{type[0]}", [a]]
|
||||
- FnCall: ["vdup_n_{type[0]}", [b]]
|
||||
- '0'
|
||||
- - '0'
|
||||
|
||||
|
||||
- name: "vclt{type[2]}"
|
||||
@@ -11451,29 +11277,38 @@ intrinsics:
|
||||
- ["f16", "u16", 'h_f16']
|
||||
compose:
|
||||
- FnCall:
|
||||
- simd_extract!
|
||||
- 'vget_lane_{type[1]}'
|
||||
- - FnCall:
|
||||
- "vclt_{type[0]}"
|
||||
- - FnCall: ["vdup_n_{type[0]}", [a]]
|
||||
- FnCall: ["vdup_n_{type[0]}", [b]]
|
||||
- '0'
|
||||
- - '0'
|
||||
|
||||
- name: "vabdl_high_{neon_type[0]}"
|
||||
doc: "Unsigned Absolute difference Long"
|
||||
- name: "vabdl_high{neon_type[0].noq}"
|
||||
doc: Unsigned Absolute difference Long
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uabdl2]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [uabdl2]]}]]
|
||||
safety: safe
|
||||
types:
|
||||
- [uint8x16_t, uint16x8_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]']
|
||||
- [uint16x8_t, uint32x4_t, uint16x4_t, '[4, 5, 6, 7]']
|
||||
- [uint32x4_t, uint64x2_t, uint32x2_t, '[2, 3]']
|
||||
- [uint8x16_t, uint16x8_t]
|
||||
- [uint16x8_t, uint32x4_t]
|
||||
- [uint32x4_t, uint64x2_t]
|
||||
compose:
|
||||
- Let: [c, "{neon_type[2]}", {FnCall: [simd_shuffle!, [a, a, "{type[3]}"]]}]
|
||||
- Let: [d, "{neon_type[2]}", {FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]}]
|
||||
- FnCall: [simd_cast, [{FnCall: ["vabd_{neon_type[0]}", [c, d]]}]]
|
||||
- Let:
|
||||
- c
|
||||
- FnCall: ['vget_high_{neon_type[0]}', [a]]
|
||||
- Let:
|
||||
- d
|
||||
- FnCall: ['vget_high_{neon_type[0]}', [b]]
|
||||
- FnCall:
|
||||
- simd_cast
|
||||
- - FnCall:
|
||||
- "vabd_{neon_type[0]}"
|
||||
- - c
|
||||
- d
|
||||
|
||||
- name: "vfms_n_f64"
|
||||
doc: "Floating-point fused Multiply-subtract to accumulator(vector)"
|
||||
@@ -11481,7 +11316,7 @@ intrinsics:
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmsub]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [float64x1_t, "f64"]
|
||||
@@ -11498,7 +11333,7 @@ intrinsics:
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmls]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [float64x2_t, "f64"]
|
||||
@@ -11539,8 +11374,9 @@ intrinsics:
|
||||
return_type: "{type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fminnmp]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- ['s_f32', float32x2_t, "f32"]
|
||||
- ['qd_f64', float64x2_t, "f64"]
|
||||
@@ -11557,8 +11393,9 @@ intrinsics:
|
||||
return_type: "{type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmaxnmp]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- ['s_f32', float32x2_t, "f32"]
|
||||
- ['qd_f64', float64x2_t, "f64"]
|
||||
@@ -11575,7 +11412,7 @@ intrinsics:
|
||||
return_type: "{type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmp]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["s64", "i64", "u64"]
|
||||
@@ -11595,7 +11432,7 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmulh, 'LANE = 0']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['2']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -11610,14 +11447,14 @@ intrinsics:
|
||||
- - a
|
||||
- FnCall:
|
||||
- "vdup{neon_type[0].N}"
|
||||
- - FnCall: [simd_extract!, [b, 'LANE as u32']]
|
||||
- - FnCall: ['vget{neon_type[1].lane_nox}', [b], [LANE]]
|
||||
|
||||
- name: "vqabs{type[2]}"
|
||||
doc: "Signed saturating absolute value"
|
||||
arguments: ["a: {type[0]}"]
|
||||
return_type: "{type[0]}"
|
||||
attr:
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
- FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [sqabs]]}]]
|
||||
safety: safe
|
||||
types:
|
||||
@@ -11625,16 +11462,16 @@ intrinsics:
|
||||
- ["i16", "s16", 'h_s16']
|
||||
compose:
|
||||
- FnCall:
|
||||
- "simd_extract!"
|
||||
- 'vget_lane_{type[0]}'
|
||||
- - FnCall: ["vqabs_{type[1]}", [{FnCall: ["vdup_n_{type[1]}", [a]]}]]
|
||||
- '0'
|
||||
- - '0'
|
||||
|
||||
- name: "vqabs{type[1]}"
|
||||
doc: "Signed saturating absolute value"
|
||||
arguments: ["a: {type[0]}"]
|
||||
return_type: "{type[0]}"
|
||||
attr:
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
- FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [sqabs]]}]]
|
||||
safety: safe
|
||||
types:
|
||||
@@ -11652,8 +11489,8 @@ intrinsics:
|
||||
arguments: ["a: {neon_type[0]}", "b: {type[1]}"]
|
||||
return_type: "{neon_type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smull2]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [smull2]]}]]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [int16x8_t, "i16", int32x4_t]
|
||||
@@ -11669,8 +11506,8 @@ intrinsics:
|
||||
arguments: ["a: {neon_type[0]}", "b: {type[1]}"]
|
||||
return_type: "{neon_type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umull2]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [umull2]]}]]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [uint16x8_t, "u16", uint32x4_t]
|
||||
@@ -11686,44 +11523,44 @@ intrinsics:
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
|
||||
return_type: "{neon_type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smull2, 'LANE = 1']]}]]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [smull2, 'LANE = 1']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['2']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [int16x8_t, int16x4_t, int32x4_t, '2', '[LANE as u32; 8]']
|
||||
- [int16x8_t, int16x8_t, int32x4_t, '3', '[LANE as u32; 8]']
|
||||
- [int32x4_t, int32x2_t, int64x2_t, '1', '[LANE as u32; 4]']
|
||||
- [int32x4_t, int32x4_t, int64x2_t, '2', '[LANE as u32; 4]']
|
||||
- [int16x8_t, int16x4_t, int32x4_t, '2']
|
||||
- [int16x8_t, int16x8_t, int32x4_t, '3']
|
||||
- [int32x4_t, int32x2_t, int64x2_t, '1']
|
||||
- [int32x4_t, int32x4_t, int64x2_t, '2']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]]
|
||||
- FnCall:
|
||||
- "vmull_high_{neon_type[0]}"
|
||||
- - a
|
||||
- FnCall: [simd_shuffle!, [b, b, '{type[4]}']]
|
||||
- FnCall: ['vdupq_lane{neon_type[1].no}', [b], [LANE]]
|
||||
|
||||
- name: "vmull_high_lane{neon_type[1].no}"
|
||||
doc: "Multiply long"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
|
||||
return_type: "{neon_type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umull2, 'LANE = 1']]}]]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [umull2, 'LANE = 1']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['2']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [uint16x8_t, uint16x4_t, uint32x4_t, '2', '[LANE as u32; 8]']
|
||||
- [uint16x8_t, uint16x8_t, uint32x4_t, '3', '[LANE as u32; 8]']
|
||||
- [uint32x4_t, uint32x2_t, uint64x2_t, '1', '[LANE as u32; 4]']
|
||||
- [uint32x4_t, uint32x4_t, uint64x2_t, '2', '[LANE as u32; 4]']
|
||||
- [uint16x8_t, uint16x4_t, uint32x4_t, '2']
|
||||
- [uint16x8_t, uint16x8_t, uint32x4_t, '3']
|
||||
- [uint32x4_t, uint32x2_t, uint64x2_t, '1']
|
||||
- [uint32x4_t, uint32x4_t, uint64x2_t, '2']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]]
|
||||
- FnCall:
|
||||
- "vmull_high_{neon_type[0]}"
|
||||
- - a
|
||||
- FnCall: [simd_shuffle!, [b, b, '{type[4]}']]
|
||||
- FnCall: ['vdupq_lane{neon_type[1].no}', [b], [LANE]]
|
||||
|
||||
- name: "vrsqrte{neon_type.no}"
|
||||
doc: "Reciprocal square-root estimate."
|
||||
@@ -11731,7 +11568,7 @@ intrinsics:
|
||||
return_type: "{neon_type}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frsqrte]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- float64x1_t
|
||||
@@ -11749,7 +11586,7 @@ intrinsics:
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frsqrte]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["s_f32", "f32"]
|
||||
@@ -11788,8 +11625,9 @@ intrinsics:
|
||||
return_type: "{neon_type}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fminnmp]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- float32x2_t
|
||||
- float64x2_t
|
||||
@@ -11808,7 +11646,7 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshlu, 'N = 2']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['1']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -11819,11 +11657,11 @@ intrinsics:
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [N, "{type[3]}"]]
|
||||
- FnCall:
|
||||
- simd_extract!
|
||||
- 'vget_lane_{type[2]}'
|
||||
- - FnCall:
|
||||
- "vqshlu_n_{type[4]}::<N>"
|
||||
- - FnCall: ["vdup_n_{type[4]}", [a]]
|
||||
- '0'
|
||||
- - '0'
|
||||
|
||||
- name: "vcvta{neon_type[1].no}_{neon_type[0]}"
|
||||
doc: "Floating-point convert to unsigned integer, rounding to nearest with ties to away"
|
||||
@@ -11831,7 +11669,7 @@ intrinsics:
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtau]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [float32x2_t, uint32x2_t]
|
||||
@@ -11873,7 +11711,7 @@ intrinsics:
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzs]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [float64x1_t, int64x1_t]
|
||||
@@ -11896,6 +11734,7 @@ intrinsics:
|
||||
- *target-not-arm64ec
|
||||
safety: safe
|
||||
types:
|
||||
- ["f16", "i16", 'h']
|
||||
- ["f16", "i32", 'h']
|
||||
- ["f16", "i64", 'h']
|
||||
compose:
|
||||
@@ -11906,22 +11745,6 @@ intrinsics:
|
||||
- link: "llvm.aarch64.neon.fcvtms.{type[1]}.{type[0]}"
|
||||
arch: aarch64,arm64ec
|
||||
|
||||
- name: "vcvtm{type[2]}_{type[1]}_{type[0]}"
|
||||
doc: "Floating-point convert to integer, rounding towards minus infinity"
|
||||
arguments: ["a: {type[0]}"]
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtms]]}]]
|
||||
- *neon-fp16
|
||||
- *neon-unstable-f16
|
||||
- *target-not-arm64ec
|
||||
safety: safe
|
||||
types:
|
||||
- ["f16", "i16", 'h', 'i32']
|
||||
compose:
|
||||
- 'vcvtmh_{type[3]}_f16(a) as i16'
|
||||
|
||||
|
||||
- name: "vcvtm{type[2]}_{type[1]}_{type[0]}"
|
||||
doc: "Floating-point convert to unsigned integer, rounding towards minus infinity"
|
||||
arguments: ["a: {type[0]}"]
|
||||
@@ -11933,6 +11756,7 @@ intrinsics:
|
||||
- *target-not-arm64ec
|
||||
safety: safe
|
||||
types:
|
||||
- ["f16", "u16", 'h']
|
||||
- ["f16", "u32", 'h']
|
||||
- ["f16", "u64", 'h']
|
||||
compose:
|
||||
@@ -11943,28 +11767,13 @@ intrinsics:
|
||||
- link: "llvm.aarch64.neon.fcvtmu.{type[1]}.{type[0]}"
|
||||
arch: aarch64,arm64ec
|
||||
|
||||
- name: "vcvtm{type[2]}_{type[1]}_{type[0]}"
|
||||
doc: "Floating-point convert to integer, rounding towards minus infinity"
|
||||
arguments: ["a: {type[0]}"]
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtmu]]}]]
|
||||
- *neon-fp16
|
||||
- *neon-unstable-f16
|
||||
- *target-not-arm64ec
|
||||
safety: safe
|
||||
types:
|
||||
- ["f16", "u16", 'h', 'u32']
|
||||
compose:
|
||||
- 'vcvtmh_{type[3]}_f16(a) as u16'
|
||||
|
||||
- name: "vmlal_high_n_{neon_type[1]}"
|
||||
doc: "Multiply-add long"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smlal2]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [smlal2]]}]]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [int32x4_t, int16x8_t, "i16"]
|
||||
@@ -11981,8 +11790,8 @@ intrinsics:
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umlal2]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [umlal2]]}]]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [uint32x4_t, uint16x8_t, "u16"]
|
||||
@@ -11999,38 +11808,38 @@ intrinsics:
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smlal2, 'LANE = 1']]}]]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [smlal2, 'LANE = 1']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['3']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [int32x4_t, int16x8_t, int16x4_t, '2', '[LANE as u32; 8]']
|
||||
- [int32x4_t, int16x8_t, int16x8_t, '3', '[LANE as u32; 8]']
|
||||
- [int64x2_t, int32x4_t, int32x2_t, '1', '[LANE as u32; 4]']
|
||||
- [int64x2_t, int32x4_t, int32x4_t, '2', '[LANE as u32; 4]']
|
||||
- [int32x4_t, int16x8_t, int16x4_t, '2']
|
||||
- [int32x4_t, int16x8_t, int16x8_t, '3']
|
||||
- [int64x2_t, int32x4_t, int32x2_t, '1']
|
||||
- [int64x2_t, int32x4_t, int32x4_t, '2']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
|
||||
- FnCall: ['vmlal_high_{neon_type[2]}', [a, b, {FnCall: [simd_shuffle!, [c, c, '{type[4]}']]}]]
|
||||
- FnCall: ['vmlal_high_{neon_type[2]}', [a, b, {FnCall: ['vdupq_lane{neon_type[2].no}', [c], [LANE]]}]]
|
||||
|
||||
- name: "vmlal_high_lane{neon_type[2].no}"
|
||||
doc: "Multiply-add long"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umlal2, 'LANE = 1']]}]]
|
||||
- FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [umlal2, 'LANE = 1']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['3']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [uint32x4_t, uint16x8_t, uint16x4_t, '2', '[LANE as u32; 8]']
|
||||
- [uint32x4_t, uint16x8_t, uint16x8_t, '3', '[LANE as u32; 8]']
|
||||
- [uint64x2_t, uint32x4_t, uint32x2_t, '1', '[LANE as u32; 4]']
|
||||
- [uint64x2_t, uint32x4_t, uint32x4_t, '2', '[LANE as u32; 4]']
|
||||
- [uint32x4_t, uint16x8_t, uint16x4_t, '2']
|
||||
- [uint32x4_t, uint16x8_t, uint16x8_t, '3']
|
||||
- [uint64x2_t, uint32x4_t, uint32x2_t, '1']
|
||||
- [uint64x2_t, uint32x4_t, uint32x4_t, '2']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
|
||||
- FnCall: ['vmlal_high_{neon_type[2]}', [a, b, {FnCall: [simd_shuffle!, [c, c, '{type[4]}']]}]]
|
||||
- FnCall: ['vmlal_high_{neon_type[2]}', [a, b, {FnCall: ['vdupq_lane{neon_type[2].no}', [c], [LANE]]}]]
|
||||
|
||||
- name: "vrsrad_n_u64"
|
||||
doc: "Unsigned rounding shift right and accumulate."
|
||||
@@ -12039,7 +11848,7 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [urshr, 'N = 2']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['2']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
@@ -12055,7 +11864,7 @@ intrinsics:
|
||||
return_type: "{neon_type}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmhs]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- uint64x1_t
|
||||
@@ -12069,7 +11878,7 @@ intrinsics:
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld4r]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety:
|
||||
unsafe: [neon]
|
||||
types:
|
||||
@@ -12092,7 +11901,7 @@ intrinsics:
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld4r]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety:
|
||||
unsafe: [neon]
|
||||
types:
|
||||
@@ -12109,7 +11918,7 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [target_feature, ['enable = "neon,aes"']]
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld4r]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety:
|
||||
unsafe: [neon]
|
||||
types:
|
||||
@@ -12125,21 +11934,29 @@ intrinsics:
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [int8x8_t, int8x8x4_t]
|
||||
- [int8x8_t, 'int8x8x4_t', 'int8x16x2', 'int8x8', 'i8x8::splat(32)']
|
||||
compose:
|
||||
- Let:
|
||||
- x
|
||||
- FnCall:
|
||||
- '{type[2]}_t'
|
||||
- - FnCall: ['vcombine{neon_type[0].no}', ['b.0', 'b.1']]
|
||||
- FnCall: ['vcombine{neon_type[0].no}', ['b.2', 'b.3']]
|
||||
- FnCall:
|
||||
- "vqtbx2"
|
||||
- - FnCall: [transmute, [a]]
|
||||
- FnCall:
|
||||
- transmute
|
||||
- - FnCall: ["vcombine{neon_type[0].noq}", ["b.0", "b.1"]]
|
||||
- FnCall:
|
||||
- transmute
|
||||
- - FnCall: ["vcombine{neon_type[0].noq}", ["b.2", "b.3"]]
|
||||
- FnCall: [transmute, [c]]
|
||||
- simd_select
|
||||
- - FnCall:
|
||||
- "simd_lt::<{type[3]}_t, int8x8_t>"
|
||||
- - c
|
||||
- FnCall: [transmute, ["{type[4]}"]]
|
||||
- FnCall:
|
||||
- 'vqtbx2{neon_type[0].no}'
|
||||
- - a
|
||||
- x
|
||||
- FnCall: ['vreinterpret_u8{neon_type[0].no}', [c]]
|
||||
- a
|
||||
|
||||
- name: "vtbx4{neon_type[0].no}"
|
||||
doc: "Extended table look-up"
|
||||
@@ -12147,25 +11964,30 @@ intrinsics:
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
big_endian_inverse: false
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [uint8x8_t, uint8x8x4_t, uint8x8_t]
|
||||
- [poly8x8_t, poly8x8x4_t, uint8x8_t]
|
||||
- [uint8x8_t, 'uint8x8x4_t', 'uint8x8_t', 'uint8x16x2', 'uint8x8', 'u8x8::splat(32)']
|
||||
- [poly8x8_t, 'poly8x8x4_t', 'uint8x8_t', 'poly8x16x2', 'uint8x8', 'u8x8::splat(32)']
|
||||
compose:
|
||||
- Let:
|
||||
- x
|
||||
- FnCall:
|
||||
- '{type[3]}_t'
|
||||
- - FnCall: ['vcombine{neon_type[0].no}', ['b.0', 'b.1']]
|
||||
- FnCall: ['vcombine{neon_type[0].no}', ['b.2', 'b.3']]
|
||||
- FnCall:
|
||||
- transmute
|
||||
- - FnCall:
|
||||
- "vqtbx2"
|
||||
- - FnCall: [transmute, [a]]
|
||||
- FnCall:
|
||||
- transmute
|
||||
- - FnCall: ["vcombine{neon_type[0].noq}", ["b.0", "b.1"]]
|
||||
- FnCall:
|
||||
- transmute
|
||||
- - FnCall: ["vcombine{neon_type[0].noq}", ["b.2", "b.3"]]
|
||||
- c
|
||||
- simd_select
|
||||
- - FnCall:
|
||||
- "simd_lt::<{type[4]}_t, int8x8_t>"
|
||||
- - c
|
||||
- FnCall: [transmute, ["{type[5]}"]]
|
||||
- FnCall:
|
||||
- 'vqtbx2{neon_type[0].no}'
|
||||
- - a
|
||||
- x
|
||||
- c
|
||||
- a
|
||||
|
||||
- name: "vtbl1{neon_type[0].no}"
|
||||
doc: "Table look-up"
|
||||
@@ -12173,7 +11995,7 @@ intrinsics:
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [int8x8_t, 'int8x8_t', 'unsafe {{ transmute(b) }}']
|
||||
@@ -12188,26 +12010,21 @@ intrinsics:
|
||||
- 'unsafe {{ crate::mem::zeroed() }}'
|
||||
- Identifier: ['{type[2]}', Symbol]
|
||||
|
||||
- name: "vtbl2{neon_type[1].noq}"
|
||||
- name: "vtbl2{neon_type[2].no}"
|
||||
doc: "Table look-up"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
|
||||
return_type: "{neon_type[1]}"
|
||||
return_type: "{neon_type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [int8x8x2_t, 'int8x8_t']
|
||||
- ['int8x8x2_t', 'int8x8_t', 'int8x8_t']
|
||||
compose:
|
||||
- FnCall:
|
||||
- vqtbl1
|
||||
- - FnCall:
|
||||
- transmute
|
||||
- - FnCall:
|
||||
- 'vcombine{neon_type[1].noq}'
|
||||
- - 'a.0'
|
||||
- 'a.1'
|
||||
- FnCall: [transmute, [b]]
|
||||
- 'vqtbl1{neon_type[2].noq}'
|
||||
- - FnCall: ['vcombine{neon_type[2].noq}', ['a.0', 'a.1']]
|
||||
- FnCall: ['vreinterpret_u8{neon_type[2].noq}', [b]]
|
||||
|
||||
- name: "vtbl2{neon_type[2].no}"
|
||||
doc: "Table look-up"
|
||||
@@ -12215,128 +12032,107 @@ intrinsics:
|
||||
return_type: "{neon_type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
big_endian_inverse: false
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [uint8x8x2_t, 'uint8x8_t', 'uint8x8_t']
|
||||
- [poly8x8x2_t, 'uint8x8_t', 'poly8x8_t']
|
||||
- ['uint8x8x2_t', 'uint8x8_t', 'uint8x8_t']
|
||||
- ['poly8x8x2_t', 'uint8x8_t', 'poly8x8_t']
|
||||
compose:
|
||||
- FnCall:
|
||||
- transmute
|
||||
- - FnCall:
|
||||
- vqtbl1
|
||||
- - FnCall:
|
||||
- transmute
|
||||
- - FnCall:
|
||||
- 'vcombine{neon_type[2].noq}'
|
||||
- - 'a.0'
|
||||
- 'a.1'
|
||||
- b
|
||||
- 'vqtbl1{neon_type[2].noq}'
|
||||
- - FnCall: ['vcombine{neon_type[2].noq}', ['a.0', 'a.1']]
|
||||
- b
|
||||
|
||||
- name: "vtbl3{neon_type[1].no}"
|
||||
doc: "Table look-up"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
|
||||
return_type: "{neon_type[1]}"
|
||||
return_type: "{neon_type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [int8x8x3_t, 'int8x8_t', 'int8x16x2']
|
||||
- ['int8x8x3_t', 'int8x8_t', 'int8x8_t', 'int8x16x2']
|
||||
compose:
|
||||
- Let:
|
||||
- x
|
||||
- FnCall:
|
||||
- '{type[2]}_t'
|
||||
- - FnCall: ['vcombine{neon_type[1].no}', ['a.0', 'a.1']]
|
||||
- FnCall: ['vcombine{neon_type[1].no}', ['a.2', 'unsafe {{ crate::mem::zeroed() }}']]
|
||||
- x
|
||||
- FnCall:
|
||||
- '{type[3]}_t'
|
||||
- - FnCall: ['vcombine{neon_type[2].no}', ['a.0', 'a.1']]
|
||||
- FnCall: ['vcombine{neon_type[2].no}', ['a.2', 'unsafe {{ crate::mem::zeroed() }}']]
|
||||
- FnCall:
|
||||
- transmute
|
||||
- - FnCall:
|
||||
- vqtbl2
|
||||
- - FnCall: [transmute, ['x.0']]
|
||||
- FnCall: [transmute, ['x.1']]
|
||||
- FnCall: [transmute, [b]]
|
||||
- 'vqtbl2{neon_type[2].no}'
|
||||
- - x
|
||||
- FnCall: ['vreinterpret_u8{neon_type[2].noq}', [b]]
|
||||
|
||||
- name: "vtbl3{neon_type[3].no}"
|
||||
|
||||
- name: "vtbl3{neon_type[2].no}"
|
||||
doc: "Table look-up"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
|
||||
return_type: "{neon_type[3]}"
|
||||
return_type: "{neon_type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [uint8x8x3_t, 'uint8x8_t', 'uint8x16x2', 'uint8x8_t']
|
||||
- [poly8x8x3_t, 'uint8x8_t', 'poly8x16x2', 'poly8x8_t']
|
||||
big_endian_inverse: false
|
||||
- [uint8x8x3_t, 'uint8x8_t', 'uint8x8_t', 'uint8x16x2']
|
||||
- [poly8x8x3_t, 'uint8x8_t', 'poly8x8_t', 'poly8x16x2']
|
||||
compose:
|
||||
- Let:
|
||||
- x
|
||||
- FnCall:
|
||||
- '{type[2]}_t'
|
||||
- - FnCall: ['vcombine{neon_type[3].no}', ['a.0', 'a.1']]
|
||||
- FnCall: ['vcombine{neon_type[3].no}', ['a.2', 'unsafe {{ crate::mem::zeroed() }}']]
|
||||
- x
|
||||
- FnCall:
|
||||
- '{type[3]}_t'
|
||||
- - FnCall: ['vcombine{neon_type[2].no}', ['a.0', 'a.1']]
|
||||
- FnCall: ['vcombine{neon_type[2].no}', ['a.2', 'unsafe {{ crate::mem::zeroed() }}']]
|
||||
- FnCall:
|
||||
- transmute
|
||||
- - FnCall:
|
||||
- vqtbl2
|
||||
- - FnCall: [transmute, ['x.0']]
|
||||
- FnCall: [transmute, ['x.1']]
|
||||
- b
|
||||
- 'vqtbl2{neon_type[2].no}'
|
||||
- - x
|
||||
- b
|
||||
|
||||
- name: "vtbl4{neon_type[1].no}"
|
||||
- name: "vtbl4{neon_type[2].no}"
|
||||
doc: "Table look-up"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
|
||||
return_type: "{neon_type[1]}"
|
||||
return_type: "{neon_type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [int8x8x4_t, 'int8x8_t', 'int8x16x2']
|
||||
- ['int8x8x4_t', 'int8x8_t', 'int8x8_t', 'int8x16x2']
|
||||
compose:
|
||||
- Let:
|
||||
- x
|
||||
- FnCall:
|
||||
- '{type[2]}_t'
|
||||
- - FnCall: ['vcombine{neon_type[1].no}', ['a.0', 'a.1']]
|
||||
- FnCall: ['vcombine{neon_type[1].no}', ['a.2', 'a.3']]
|
||||
- x
|
||||
- FnCall:
|
||||
- '{type[3]}_t'
|
||||
- - FnCall: ['vcombine{neon_type[1].no}', ['a.0', 'a.1']]
|
||||
- FnCall: ['vcombine{neon_type[1].no}', ['a.2', 'a.3']]
|
||||
- FnCall:
|
||||
- transmute
|
||||
- - FnCall:
|
||||
- 'vqtbl2'
|
||||
- - FnCall: [transmute, ['x.0']]
|
||||
- FnCall: [transmute, ['x.1']]
|
||||
- FnCall: [transmute, [b]]
|
||||
- 'vqtbl2{neon_type[2].no}'
|
||||
- - x
|
||||
- FnCall: ['vreinterpret_u8{neon_type[2].noq}', [b]]
|
||||
|
||||
- name: "vtbl4{neon_type[3].no}"
|
||||
- name: "vtbl4{neon_type[2].no}"
|
||||
doc: "Table look-up"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
|
||||
return_type: "{neon_type[3]}"
|
||||
return_type: "{neon_type[2]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [uint8x8x4_t, 'uint8x8_t', 'uint8x16x2', 'uint8x8_t']
|
||||
- [poly8x8x4_t, 'uint8x8_t', 'poly8x16x2', 'poly8x8_t']
|
||||
big_endian_inverse: false
|
||||
- [uint8x8x4_t, 'uint8x8_t', 'uint8x8_t', 'uint8x16x2']
|
||||
- [poly8x8x4_t, 'uint8x8_t', 'poly8x8_t', 'poly8x16x2']
|
||||
compose:
|
||||
- Let:
|
||||
- x
|
||||
- FnCall:
|
||||
- '{type[2]}_t'
|
||||
- - FnCall: ['vcombine{neon_type[3].no}', ['a.0', 'a.1']]
|
||||
- FnCall: ['vcombine{neon_type[3].no}', ['a.2', 'a.3']]
|
||||
- x
|
||||
- FnCall:
|
||||
- '{type[3]}_t'
|
||||
- - FnCall: ['vcombine{neon_type[2].no}', ['a.0', 'a.1']]
|
||||
- FnCall: ['vcombine{neon_type[2].no}', ['a.2', 'a.3']]
|
||||
- FnCall:
|
||||
- transmute
|
||||
- - FnCall:
|
||||
- 'vqtbl2'
|
||||
- - FnCall: [transmute, ['x.0']]
|
||||
- FnCall: [transmute, ['x.1']]
|
||||
- b
|
||||
- 'vqtbl2{neon_type[2].no}'
|
||||
- - x
|
||||
- b
|
||||
|
||||
- name: "vqtbx1{neon_type[0].no}"
|
||||
doc: "Extended table look-up"
|
||||
@@ -12344,8 +12140,9 @@ intrinsics:
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [int8x8_t, int8x16_t, uint8x8_t, vqtbx1]
|
||||
- [int8x16_t, int8x16_t, uint8x16_t, vqtbx1q]
|
||||
@@ -12358,14 +12155,14 @@ intrinsics:
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [uint8x8_t, "uint8x16_t", uint8x8_t, "vqtbx1", "_u8"]
|
||||
- [poly8x8_t, "poly8x16_t", uint8x8_t, "vqtbx1", "_p8"]
|
||||
- [uint8x16_t, "uint8x16_t", uint8x16_t, "vqtbx1q", "q_u8"]
|
||||
- [poly8x16_t, "poly8x16_t", uint8x16_t, "vqtbx1q", "q_p8"]
|
||||
big_endian_inverse: false
|
||||
big_endian_inverse: true
|
||||
compose:
|
||||
- FnCall:
|
||||
- transmute
|
||||
@@ -12381,29 +12178,48 @@ intrinsics:
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [int8x8_t, "int8x8_t", "transmute(c)", "i8x8::splat(8)", "int8x8"]
|
||||
- [uint8x8_t, "uint8x8_t", "c", "u8x8::splat(8)", "uint8x8"]
|
||||
- [poly8x8_t, "uint8x8_t", "c", "u8x8::splat(8)", "uint8x8"]
|
||||
- [int8x8_t, "int8x8_t", "int8x8", "i8x8::splat(8)"]
|
||||
compose:
|
||||
- FnCall:
|
||||
- simd_select
|
||||
- - FnCall:
|
||||
- "simd_lt::<{type[4]}_t, int8x8_t>"
|
||||
- - c
|
||||
- FnCall: [transmute, ["{type[3]}"]]
|
||||
- FnCall:
|
||||
- transmute
|
||||
- - FnCall:
|
||||
- "vqtbx1"
|
||||
- - "transmute(a)"
|
||||
- FnCall:
|
||||
- transmute
|
||||
- - FnCall: ["vcombine{neon_type[0].no}", [b, "crate::mem::zeroed()"]]
|
||||
- "{type[2]}"
|
||||
- a
|
||||
- simd_select
|
||||
- - FnCall:
|
||||
- "simd_lt::<{type[2]}_t, int8x8_t>"
|
||||
- - c
|
||||
- FnCall: [transmute, ["{type[3]}"]]
|
||||
- FnCall:
|
||||
- 'vqtbx1{neon_type[0].no}'
|
||||
- - a
|
||||
- FnCall: ['vcombine{neon_type[0].no}', [b, 'crate::mem::zeroed()']]
|
||||
- FnCall: ['vreinterpret_u8{neon_type[0].no}', [c]]
|
||||
- a
|
||||
|
||||
- name: "vtbx1{neon_type[0].no}"
|
||||
doc: "Extended table look-up"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [uint8x8_t, "uint8x8_t", "uint8x8", "u8x8::splat(8)"]
|
||||
- [poly8x8_t, "uint8x8_t", "uint8x8", "u8x8::splat(8)"]
|
||||
compose:
|
||||
- FnCall:
|
||||
- simd_select
|
||||
- - FnCall:
|
||||
- "simd_lt::<{type[2]}_t, int8x8_t>"
|
||||
- - c
|
||||
- FnCall: [transmute, ["{type[3]}"]]
|
||||
- FnCall:
|
||||
- 'vqtbx1{neon_type[0].no}'
|
||||
- - a
|
||||
- FnCall: ['vcombine{neon_type[0].no}', [b, 'crate::mem::zeroed()']]
|
||||
- c
|
||||
- a
|
||||
|
||||
- name: "vtbx2{neon_type[0].no}"
|
||||
doc: "Extended table look-up"
|
||||
@@ -12411,18 +12227,23 @@ intrinsics:
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [int8x8_t, 'int8x8x2_t']
|
||||
- [int8x8_t, 'int8x8x2_t', 'int8x8', 'i8x8::splat(16)']
|
||||
compose:
|
||||
- FnCall:
|
||||
- vqtbx1
|
||||
- - FnCall: [transmute, [a]]
|
||||
- FnCall:
|
||||
- transmute
|
||||
- - FnCall: ["vcombine{neon_type[0].no}", ['b.0', 'b.1']]
|
||||
- FnCall: [transmute, [c]]
|
||||
- simd_select
|
||||
- - FnCall:
|
||||
- "simd_lt::<{type[2]}_t, int8x8_t>"
|
||||
- - c
|
||||
- FnCall: [transmute, ["{type[3]}"]]
|
||||
- FnCall:
|
||||
- 'vqtbx1{neon_type[0].no}'
|
||||
- - a
|
||||
- FnCall: ["vcombine{neon_type[0].no}", ['b.0', 'b.1']]
|
||||
- FnCall: ['vreinterpret_u8{neon_type[0].no}', [c]]
|
||||
- a
|
||||
|
||||
- name: "vtbx2{neon_type[0].no}"
|
||||
doc: "Extended table look-up"
|
||||
@@ -12430,22 +12251,24 @@ intrinsics:
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
big_endian_inverse: false
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [uint8x8_t, 'uint8x8x2_t', uint8x8_t]
|
||||
- [poly8x8_t, 'poly8x8x2_t', uint8x8_t]
|
||||
- [uint8x8_t, 'uint8x8x2_t', uint8x8_t, 'uint8x8', 'u8x8::splat(16)']
|
||||
- [poly8x8_t, 'poly8x8x2_t', uint8x8_t, 'uint8x8', 'u8x8::splat(16)']
|
||||
compose:
|
||||
- FnCall:
|
||||
- transmute
|
||||
- - FnCall:
|
||||
- vqtbx1
|
||||
- - FnCall: [transmute, [a]]
|
||||
- FnCall:
|
||||
- transmute
|
||||
- - FnCall: ["vcombine{neon_type[0].no}", ['b.0', 'b.1']]
|
||||
- c
|
||||
- simd_select
|
||||
- - FnCall:
|
||||
- "simd_lt::<{type[3]}_t, int8x8_t>"
|
||||
- - c
|
||||
- FnCall: [transmute, ["{type[4]}"]]
|
||||
- FnCall:
|
||||
- 'vqtbx1{neon_type[0].no}'
|
||||
- - a
|
||||
- FnCall: ["vcombine{neon_type[0].no}", ['b.0', 'b.1']]
|
||||
- c
|
||||
- a
|
||||
|
||||
- name: "vtbx3{neon_type[0].no}"
|
||||
doc: "Extended table look-up"
|
||||
@@ -12453,34 +12276,29 @@ intrinsics:
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [int8x8_t, 'int8x8x3_t', 'int8x16x2', 'i8x8::splat(24)', 'int8x8']
|
||||
- [int8x8_t, 'int8x8x3_t', 'int8x16x2', 'int8x8', 'i8x8::splat(24)']
|
||||
compose:
|
||||
- Let:
|
||||
- x
|
||||
- FnCall:
|
||||
- '{type[2]}_t'
|
||||
- - FnCall: ['vcombine{neon_type[0].no}', ['b.0', 'b.1']]
|
||||
- FnCall: ['vcombine{neon_type[0].no}', ['b.2', 'unsafe {{ crate::mem::zeroed() }}']]
|
||||
- x
|
||||
- FnCall:
|
||||
- '{type[2]}_t'
|
||||
- - FnCall: ['vcombine{neon_type[0].no}', ['b.0', 'b.1']]
|
||||
- FnCall: ['vcombine{neon_type[0].no}', ['b.2', 'unsafe {{ crate::mem::zeroed() }}']]
|
||||
- FnCall:
|
||||
- transmute
|
||||
- - FnCall:
|
||||
- simd_select
|
||||
- - FnCall:
|
||||
- 'simd_lt::<{type[4]}_t, int8x8_t>'
|
||||
- - FnCall: [transmute, [c]]
|
||||
- FnCall: [transmute, ['{type[3]}']]
|
||||
- FnCall:
|
||||
- transmute
|
||||
- - FnCall:
|
||||
- 'vqtbx2'
|
||||
- - FnCall: [transmute, [a]]
|
||||
- FnCall: [transmute, ['x.0']]
|
||||
- FnCall: [transmute, ['x.1']]
|
||||
- FnCall: [transmute, [c]]
|
||||
- a
|
||||
- simd_select
|
||||
- - FnCall:
|
||||
- "simd_lt::<{type[3]}_t, int8x8_t>"
|
||||
- - c
|
||||
- FnCall: [transmute, ["{type[4]}"]]
|
||||
- FnCall:
|
||||
- 'vqtbx2{neon_type[0].no}'
|
||||
- - a
|
||||
- x
|
||||
- FnCall: ['vreinterpret_u8{neon_type[0].no}', [c]]
|
||||
- a
|
||||
|
||||
- name: "vtbx3{neon_type[0].no}"
|
||||
doc: "Extended table look-up"
|
||||
@@ -12488,12 +12306,11 @@ intrinsics:
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [uint8x8_t, 'uint8x8x3_t', 'uint8x16x2', 'u8x8::splat(24)', 'uint8x8']
|
||||
- [poly8x8_t, 'poly8x8x3_t', 'poly8x16x2', 'u8x8::splat(24)', 'poly8x8']
|
||||
big_endian_inverse: false
|
||||
- [uint8x8_t, 'uint8x8x3_t', 'uint8x16x2', 'uint8x8', 'u8x8::splat(24)']
|
||||
- [poly8x8_t, 'poly8x8x3_t', 'poly8x16x2', 'uint8x8', 'u8x8::splat(24)']
|
||||
compose:
|
||||
- Let:
|
||||
- x
|
||||
@@ -12502,22 +12319,17 @@ intrinsics:
|
||||
- - FnCall: ['vcombine{neon_type[0].no}', ['b.0', 'b.1']]
|
||||
- FnCall: ['vcombine{neon_type[0].no}', ['b.2', 'unsafe {{ crate::mem::zeroed() }}']]
|
||||
- FnCall:
|
||||
- transmute
|
||||
- - FnCall:
|
||||
- simd_select
|
||||
- - FnCall:
|
||||
- 'simd_lt::<{type[4]}_t, int8x8_t>'
|
||||
- - FnCall: [transmute, [c]]
|
||||
- FnCall: [transmute, ['{type[3]}']]
|
||||
- FnCall:
|
||||
- transmute
|
||||
- - FnCall:
|
||||
- 'vqtbx2'
|
||||
- - FnCall: [transmute, [a]]
|
||||
- FnCall: [transmute, ['x.0']]
|
||||
- FnCall: [transmute, ['x.1']]
|
||||
- c
|
||||
- a
|
||||
- simd_select
|
||||
- - FnCall:
|
||||
- "simd_lt::<{type[3]}_t, int8x8_t>"
|
||||
- - c
|
||||
- FnCall: [transmute, ["{type[4]}"]]
|
||||
- FnCall:
|
||||
- 'vqtbx2{neon_type[0].no}'
|
||||
- - a
|
||||
- x
|
||||
- c
|
||||
- a
|
||||
|
||||
- name: "vqtbl1{neon_type[3].no}"
|
||||
doc: "Table look-up"
|
||||
@@ -12525,8 +12337,9 @@ intrinsics:
|
||||
return_type: "{neon_type[3]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- ['int8x16_t', uint8x8_t, 'vqtbl1', 'int8x8_t']
|
||||
- ['int8x16_t', uint8x16_t, 'vqtbl1q', 'int8x16_t']
|
||||
@@ -12539,14 +12352,14 @@ intrinsics:
|
||||
return_type: "{neon_type[3]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ['uint8x16_t', uint8x8_t, 'vqtbl1', 'uint8x8_t']
|
||||
- ['poly8x16_t', uint8x8_t, 'vqtbl1', 'poly8x8_t']
|
||||
- ['uint8x16_t', uint8x16_t, 'vqtbl1q', 'uint8x16_t']
|
||||
- ['poly8x16_t', uint8x16_t, 'vqtbl1q', 'poly8x16_t']
|
||||
big_endian_inverse: false
|
||||
big_endian_inverse: true
|
||||
compose:
|
||||
- FnCall:
|
||||
- transmute
|
||||
@@ -12561,8 +12374,9 @@ intrinsics:
|
||||
return_type: "{neon_type[3]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- ['int8x16x2_t', uint8x8_t, 'vqtbl2', 'int8x8_t']
|
||||
- ['int8x16x2_t', uint8x16_t, 'vqtbl2q', 'int8x16_t']
|
||||
@@ -12575,8 +12389,8 @@ intrinsics:
|
||||
return_type: "{neon_type[3]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
big_endian_inverse: false
|
||||
- *neon-stable
|
||||
big_endian_inverse: true
|
||||
safety: safe
|
||||
types:
|
||||
- ['uint8x16x2_t', uint8x8_t, 'vqtbl2', 'uint8x8_t']
|
||||
@@ -12598,8 +12412,9 @@ intrinsics:
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [int8x8_t, 'int8x16x2_t', uint8x8_t, 'vqtbx2']
|
||||
- [int8x16_t, 'int8x16x2_t', uint8x16_t, 'vqtbx2q']
|
||||
@@ -12612,8 +12427,8 @@ intrinsics:
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
big_endian_inverse: false
|
||||
- *neon-stable
|
||||
big_endian_inverse: true
|
||||
safety: safe
|
||||
types:
|
||||
- [uint8x8_t, 'uint8x16x2_t', uint8x8_t, 'vqtbx2']
|
||||
@@ -12636,8 +12451,8 @@ intrinsics:
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
big_endian_inverse: false
|
||||
- *neon-stable
|
||||
big_endian_inverse: true
|
||||
safety: safe
|
||||
types:
|
||||
- ['int8x8_t', 'int8x16x3_t', uint8x8_t, 'vqtbl3']
|
||||
@@ -12651,8 +12466,8 @@ intrinsics:
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
big_endian_inverse: false
|
||||
- *neon-stable
|
||||
big_endian_inverse: true
|
||||
safety: safe
|
||||
types:
|
||||
- ['uint8x8_t', 'uint8x16x3_t', uint8x8_t, 'vqtbl3']
|
||||
@@ -12675,8 +12490,9 @@ intrinsics:
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [int8x8_t, 'int8x16x3_t', uint8x8_t, 'vqtbx3']
|
||||
- [int8x16_t, 'int8x16x3_t', uint8x16_t, 'vqtbx3q']
|
||||
@@ -12689,8 +12505,8 @@ intrinsics:
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
big_endian_inverse: false
|
||||
- *neon-stable
|
||||
big_endian_inverse: true
|
||||
safety: safe
|
||||
types:
|
||||
- [uint8x8_t, 'uint8x16x3_t', uint8x8_t, 'vqtbx3']
|
||||
@@ -12714,8 +12530,8 @@ intrinsics:
|
||||
return_type: "{neon_type[3]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
big_endian_inverse: false
|
||||
- *neon-stable
|
||||
big_endian_inverse: true
|
||||
safety: safe
|
||||
types:
|
||||
- ['int8x16x4_t', uint8x8_t, 'vqtbl4', 'int8x8_t']
|
||||
@@ -12729,8 +12545,8 @@ intrinsics:
|
||||
return_type: "{neon_type[3]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
big_endian_inverse: false
|
||||
- *neon-stable
|
||||
big_endian_inverse: true
|
||||
safety: safe
|
||||
types:
|
||||
- ['uint8x16x4_t', uint8x8_t, 'vqtbl4', 'uint8x8_t']
|
||||
@@ -12754,8 +12570,9 @@ intrinsics:
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [int8x8_t, 'int8x16x4_t', uint8x8_t, 'vqtbx4']
|
||||
- [int8x16_t, 'int8x16x4_t', uint8x16_t, 'vqtbx4q']
|
||||
@@ -12768,8 +12585,8 @@ intrinsics:
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
big_endian_inverse: false
|
||||
- *neon-stable
|
||||
big_endian_inverse: true
|
||||
safety: safe
|
||||
types:
|
||||
- [uint8x8_t, 'uint8x16x4_t', uint8x8_t, 'vqtbx4']
|
||||
@@ -12795,7 +12612,7 @@ intrinsics:
|
||||
return_type: "{neon_type[3]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["vqtbl1", "int8x16_t", "uint8x8_t", "int8x8_t"]
|
||||
@@ -12814,7 +12631,7 @@ intrinsics:
|
||||
return_type: "{neon_type[3]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["vqtbl2", "int8x16_t", "uint8x8_t", "int8x8_t"]
|
||||
@@ -12833,8 +12650,7 @@ intrinsics:
|
||||
return_type: "{neon_type[3]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
big_endian_inverse: false
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["vqtbl3", int8x16_t, uint8x8_t, int8x8_t]
|
||||
@@ -12853,8 +12669,7 @@ intrinsics:
|
||||
return_type: "{neon_type[3]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
big_endian_inverse: false
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- ["vqtbl4", int8x16_t, uint8x8_t, int8x8_t]
|
||||
@@ -12873,7 +12688,7 @@ intrinsics:
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [vqtbx1, "int8x8_t", "int8x16_t", "uint8x8_t"]
|
||||
@@ -12892,7 +12707,7 @@ intrinsics:
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [vqtbx2, "int8x8_t", "int8x16_t", "uint8x8_t"]
|
||||
@@ -12911,7 +12726,7 @@ intrinsics:
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [vqtbx3, "int8x8_t", "int8x16_t", "uint8x8_t"]
|
||||
@@ -12930,7 +12745,7 @@ intrinsics:
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety: safe
|
||||
types:
|
||||
- [vqtbx4, "int8x8_t", "int8x16_t", "uint8x8_t"]
|
||||
@@ -12949,7 +12764,7 @@ intrinsics:
|
||||
attr:
|
||||
- FnCall: [target_feature, ['enable = "{type[2]}"']]
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ldr]]}]]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety:
|
||||
unsafe: [neon]
|
||||
types:
|
||||
@@ -13016,7 +12831,7 @@ intrinsics:
|
||||
- FnCall: [target_feature, ['enable = "{type[2]}"']]
|
||||
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [str]]}]]
|
||||
- FnCall: [allow, ['clippy::cast_ptr_alignment']]
|
||||
- FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
|
||||
- *neon-stable
|
||||
safety:
|
||||
unsafe: [neon]
|
||||
types:
|
||||
@@ -13218,6 +13033,7 @@ intrinsics:
|
||||
- *neon-stable
|
||||
assert_instr: [addp]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [int8x16_t, "16"]
|
||||
- [int16x8_t, "8"]
|
||||
@@ -13506,6 +13322,7 @@ intrinsics:
|
||||
- *neon-stable
|
||||
assert_instr: ['sminp']
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- int8x16_t
|
||||
- int16x8_t
|
||||
@@ -13525,6 +13342,7 @@ intrinsics:
|
||||
- *neon-stable
|
||||
assert_instr: ['uminp']
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- uint8x16_t
|
||||
- uint16x8_t
|
||||
@@ -13544,6 +13362,7 @@ intrinsics:
|
||||
- *neon-stable
|
||||
assert_instr: ['fminp']
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- float32x4_t
|
||||
- float64x2_t
|
||||
@@ -13562,6 +13381,7 @@ intrinsics:
|
||||
- *neon-stable
|
||||
assert_instr: ['smaxp']
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- int8x16_t
|
||||
- int16x8_t
|
||||
@@ -13581,6 +13401,7 @@ intrinsics:
|
||||
- *neon-stable
|
||||
assert_instr: ['umaxp']
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- uint8x16_t
|
||||
- uint16x8_t
|
||||
@@ -13600,6 +13421,7 @@ intrinsics:
|
||||
- *neon-stable
|
||||
assert_instr: ['fmaxp']
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- float32x4_t
|
||||
- float64x2_t
|
||||
@@ -13754,6 +13576,7 @@ intrinsics:
|
||||
- *target-not-arm64ec
|
||||
assert_instr: [fmlal2]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [float32x2_t, float16x4_t, '_high_']
|
||||
- [float32x4_t, float16x8_t, 'q_high_']
|
||||
@@ -13765,7 +13588,7 @@ intrinsics:
|
||||
arch: aarch64,arm64ec
|
||||
|
||||
|
||||
- name: "vfmlal{type[3]}{neon_type[1]}"
|
||||
- name: "vfmlal{type[3]}_high_{neon_type[1]}"
|
||||
doc: "Floating-point fused Multiply-Add Long to accumulator (by element)."
|
||||
arguments: ["r: {neon_type[0]}", "a: {neon_type[1]}", "b: {neon_type[2]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
@@ -13779,18 +13602,17 @@ intrinsics:
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [float32x2_t, float16x4_t, float16x4_t, '_lane_high_', '_high_', '2']
|
||||
- [float32x2_t, float16x4_t, float16x8_t, '_laneq_high_', '_high_', '3']
|
||||
- [float32x4_t, float16x8_t, float16x4_t, 'q_lane_high_', 'q_high_', '2']
|
||||
- [float32x4_t, float16x8_t, float16x8_t, 'q_laneq_high_', 'q_high_', '3']
|
||||
- [float32x2_t, float16x4_t, float16x4_t, '_lane', '_high_', '2']
|
||||
- [float32x2_t, float16x4_t, float16x8_t, '_laneq', '_high_', '3']
|
||||
- [float32x4_t, float16x8_t, float16x4_t, 'q_lane', 'q_high_', '2']
|
||||
- [float32x4_t, float16x8_t, float16x8_t, 'q_laneq', 'q_high_', '3']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, "{type[5]}"]]
|
||||
- FnCall:
|
||||
- "vfmlal{type[4]}{neon_type[1]}"
|
||||
- - r
|
||||
- a
|
||||
- FnCall: ["vdup{neon_type[1].N}", [{FnCall: [simd_extract!, [b, 'LANE as u32']]}]]
|
||||
|
||||
- FnCall: ["vdup{type[3]}_{neon_type[2]}", [b], [LANE]]
|
||||
|
||||
- name: "vfmlal{type[2]}{neon_type[1]}"
|
||||
doc: "Floating-point fused Multiply-Add Long to accumulator (vector)."
|
||||
@@ -13803,6 +13625,7 @@ intrinsics:
|
||||
- *target-not-arm64ec
|
||||
assert_instr: [fmlal]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [float32x2_t, float16x4_t, '_low_']
|
||||
- [float32x4_t, float16x8_t, 'q_low_']
|
||||
@@ -13814,7 +13637,7 @@ intrinsics:
|
||||
arch: aarch64,arm64ec
|
||||
|
||||
|
||||
- name: "vfmlal{type[3]}{neon_type[1]}"
|
||||
- name: "vfmlal{type[3]}_low_{neon_type[1]}"
|
||||
doc: "Floating-point fused Multiply-Add Long to accumulator (by element)."
|
||||
arguments: ["r: {neon_type[0]}", "a: {neon_type[1]}", "b: {neon_type[2]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
@@ -13828,18 +13651,17 @@ intrinsics:
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [float32x2_t, float16x4_t, float16x4_t, '_lane_low_', '_low_', '2']
|
||||
- [float32x2_t, float16x4_t, float16x8_t, '_laneq_low_', '_low_', '3']
|
||||
- [float32x4_t, float16x8_t, float16x4_t, 'q_lane_low_', 'q_low_', '2']
|
||||
- [float32x4_t, float16x8_t, float16x8_t, 'q_laneq_low_', 'q_low_', '3']
|
||||
- [float32x2_t, float16x4_t, float16x4_t, '_lane', '_low_', '2']
|
||||
- [float32x2_t, float16x4_t, float16x8_t, '_laneq', '_low_', '3']
|
||||
- [float32x4_t, float16x8_t, float16x4_t, 'q_lane', 'q_low_', '2']
|
||||
- [float32x4_t, float16x8_t, float16x8_t, 'q_laneq', 'q_low_', '3']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, "{type[5]}"]]
|
||||
- FnCall:
|
||||
- "vfmlal{type[4]}{neon_type[1]}"
|
||||
- - r
|
||||
- a
|
||||
- FnCall: ["vdup{neon_type[1].N}", [{FnCall: [simd_extract!, [b, 'LANE as u32']]}]]
|
||||
|
||||
- FnCall: ["vdup{type[3]}_{neon_type[2]}", [b], [LANE]]
|
||||
|
||||
- name: "vfmlsl{type[2]}{neon_type[1]}"
|
||||
doc: "Floating-point fused Multiply-Subtract Long from accumulator (vector)."
|
||||
@@ -13852,6 +13674,7 @@ intrinsics:
|
||||
- *target-not-arm64ec
|
||||
assert_instr: [fmlsl2]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [float32x2_t, float16x4_t, '_high_']
|
||||
- [float32x4_t, float16x8_t, 'q_high_']
|
||||
@@ -13862,7 +13685,7 @@ intrinsics:
|
||||
- link: "llvm.aarch64.neon.fmlsl2.{neon_type[0]}.{neon_type[1]}"
|
||||
arch: aarch64,arm64ec
|
||||
|
||||
- name: "vfmlsl{type[3]}{neon_type[1]}"
|
||||
- name: "vfmlsl{type[3]}_high_{neon_type[1]}"
|
||||
doc: "Floating-point fused Multiply-Subtract Long from accumulator (by element)."
|
||||
arguments: ["r: {neon_type[0]}", "a: {neon_type[1]}", "b: {neon_type[2]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
@@ -13876,18 +13699,17 @@ intrinsics:
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [float32x2_t, float16x4_t, float16x4_t, '_lane_high_', '_high_', '2']
|
||||
- [float32x2_t, float16x4_t, float16x8_t, '_laneq_high_', '_high_', '3']
|
||||
- [float32x4_t, float16x8_t, float16x4_t, 'q_lane_high_', 'q_high_', '2']
|
||||
- [float32x4_t, float16x8_t, float16x8_t, 'q_laneq_high_', 'q_high_', '3']
|
||||
- [float32x2_t, float16x4_t, float16x4_t, '_lane', '_high_', '2']
|
||||
- [float32x2_t, float16x4_t, float16x8_t, '_laneq', '_high_', '3']
|
||||
- [float32x4_t, float16x8_t, float16x4_t, 'q_lane', 'q_high_', '2']
|
||||
- [float32x4_t, float16x8_t, float16x8_t, 'q_laneq', 'q_high_', '3']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, "{type[5]}"]]
|
||||
- FnCall:
|
||||
- "vfmlsl{type[4]}{neon_type[1]}"
|
||||
- - r
|
||||
- a
|
||||
- FnCall: ["vdup{neon_type[1].N}", [{FnCall: [simd_extract!, [b, 'LANE as u32']]}]]
|
||||
|
||||
- FnCall: ["vdup{type[3]}_{neon_type[2]}", [b], [LANE]]
|
||||
|
||||
- name: "vfmlsl{type[2]}{neon_type[1]}"
|
||||
doc: "Floating-point fused Multiply-Subtract Long from accumulator (vector)."
|
||||
@@ -13900,6 +13722,7 @@ intrinsics:
|
||||
- *target-not-arm64ec
|
||||
assert_instr: [fmlsl]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [float32x2_t, float16x4_t, '_low_']
|
||||
- [float32x4_t, float16x8_t, 'q_low_']
|
||||
@@ -13910,7 +13733,7 @@ intrinsics:
|
||||
- link: "llvm.aarch64.neon.fmlsl.{neon_type[0]}.{neon_type[1]}"
|
||||
arch: aarch64,arm64ec
|
||||
|
||||
- name: "vfmlsl{type[3]}{neon_type[1]}"
|
||||
- name: "vfmlsl{type[3]}_low_{neon_type[1]}"
|
||||
doc: "Floating-point fused Multiply-Subtract Long from accumulator (by element)."
|
||||
arguments: ["r: {neon_type[0]}", "a: {neon_type[1]}", "b: {neon_type[2]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
@@ -13924,17 +13747,17 @@ intrinsics:
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [float32x2_t, float16x4_t, float16x4_t, '_lane_low_', '_low_', '2']
|
||||
- [float32x2_t, float16x4_t, float16x8_t, '_laneq_low_', '_low_', '3']
|
||||
- [float32x4_t, float16x8_t, float16x4_t, 'q_lane_low_', 'q_low_', '2']
|
||||
- [float32x4_t, float16x8_t, float16x8_t, 'q_laneq_low_', 'q_low_', '3']
|
||||
- [float32x2_t, float16x4_t, float16x4_t, '_lane', '_low_', '2']
|
||||
- [float32x2_t, float16x4_t, float16x8_t, '_laneq', '_low_', '3']
|
||||
- [float32x4_t, float16x8_t, float16x4_t, 'q_lane', 'q_low_', '2']
|
||||
- [float32x4_t, float16x8_t, float16x8_t, 'q_laneq', 'q_low_', '3']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, "{type[5]}"]]
|
||||
- FnCall:
|
||||
- "vfmlsl{type[4]}{neon_type[1]}"
|
||||
- - r
|
||||
- a
|
||||
- FnCall: ["vdup{neon_type[1].N}", [{FnCall: [simd_extract!, [b, 'LANE as u32']]}]]
|
||||
- FnCall: ["vdup{type[3]}_{neon_type[2]}", [b], [LANE]]
|
||||
|
||||
- name: "vamax{neon_type.no}"
|
||||
doc: "Multi-vector floating-point absolute maximum"
|
||||
@@ -14352,3 +14175,49 @@ intrinsics:
|
||||
links:
|
||||
- link: "llvm.aarch64.fjcvtzs"
|
||||
arch: aarch64,arm64ec
|
||||
|
||||
- name: "{type[0]}"
|
||||
doc: "Duplicate vector element to vector or scalar"
|
||||
arguments: ["a: {type[1]}"]
|
||||
return_type: "{neon_type[2]}"
|
||||
attr:
|
||||
- *neon-stable
|
||||
assert_instr: ['{type[3]}']
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- ['vget_high_f64', 'float64x2_t', 'float64x1_t', 'nop', 'float64x1_t([simd_extract!(a, 1)])']
|
||||
- ['vget_low_f64', 'float64x2_t', 'float64x1_t', 'nop', 'float64x1_t([simd_extract!(a, 0)])']
|
||||
compose:
|
||||
- Identifier: ['{type[4]}', UnsafeSymbol]
|
||||
|
||||
- name: "vcombine{neon_type[0].noq}"
|
||||
doc: Join two smaller vectors into a single larger vector
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
|
||||
return_type: "{neon_type[1]}"
|
||||
attr:
|
||||
- *neon-stable
|
||||
assert_instr: [mov]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [float64x1_t, float64x2_t, '[0, 1]']
|
||||
compose:
|
||||
- FnCall: [simd_shuffle!, [a, b, '{type[2]}']]
|
||||
|
||||
- name: "vgetq_lane_{type[1]}"
|
||||
doc: "Duplicate vector element to vector or scalar"
|
||||
arguments: ["a: {type[0]}"]
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- *neon-stable
|
||||
- FnCall: [rustc_legacy_const_generics, ['1']]
|
||||
assert_instr: [['nop', 'IMM5 = 0']]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
static_defs: ["const IMM5: i32"]
|
||||
types:
|
||||
- ['float64x2_t', 'f64']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [IMM5, "1"]]
|
||||
- FnCall: ['simd_extract!', [a, 'IMM5 as u32'], [] , true]
|
||||
|
||||
@@ -104,6 +104,10 @@ aarch64-crc-stable: &aarch64-crc-stable
|
||||
neon-unstable-f16: &neon-unstable-f16
|
||||
FnCall: [unstable, ['feature = "stdarch_neon_f16"', 'issue = "136306"']]
|
||||
|
||||
# all(target_endian = "little")
|
||||
all-neon-target-aarch64-arm64ec-little-endian: &all-neon-target-aarch64-arm64ec-little-endian
|
||||
FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}, 'target_endian = "little"']]
|
||||
|
||||
intrinsics:
|
||||
- name: "vand{neon_type.no}"
|
||||
doc: Vector bitwise and
|
||||
@@ -502,11 +506,11 @@ intrinsics:
|
||||
- ['h_f16', 'f16']
|
||||
compose:
|
||||
- FnCall:
|
||||
- simd_extract!
|
||||
- 'vget_lane_{type[1]}'
|
||||
- - FnCall:
|
||||
- "vabs_{type[1]}"
|
||||
- - FnCall: ["vdup_n_{type[1]}", [a]]
|
||||
- 0
|
||||
- - 0
|
||||
|
||||
- name: "vcgt{neon_type[0].no}"
|
||||
doc: "Compare signed greater than"
|
||||
@@ -1438,6 +1442,7 @@ intrinsics:
|
||||
- *neon-cfg-arm-unstable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [_lane_s8, int8x8_t, int8x8_t, '3', '[N as u32; 8]']
|
||||
- [q_lane_s8, int8x8_t, int8x16_t, '3', '[N as u32; 16]']
|
||||
@@ -1462,6 +1467,7 @@ intrinsics:
|
||||
- *neon-cfg-arm-unstable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [q_laneq_s8, int8x16_t, int8x16_t, '4', '[N as u32; 16]']
|
||||
- [_laneq_s8, int8x16_t, int8x8_t, '4', '[N as u32; 8]']
|
||||
@@ -1486,6 +1492,7 @@ intrinsics:
|
||||
- *neon-cfg-arm-unstable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [_lane_s16, int16x4_t, int16x4_t, '2', '[N as u32; 4]']
|
||||
- [q_lane_s16, int16x4_t, int16x8_t, '2', '[N as u32; 8]']
|
||||
@@ -1510,6 +1517,7 @@ intrinsics:
|
||||
- *neon-cfg-arm-unstable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [q_laneq_s16, int16x8_t, int16x8_t, '3', '[N as u32; 8]']
|
||||
- [_laneq_s16, int16x8_t, int16x4_t, '3', '[N as u32; 4]']
|
||||
@@ -1537,6 +1545,7 @@ intrinsics:
|
||||
- *target-not-arm64ec
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [q_laneq_f16, float16x8_t, float16x8_t, '3', '[N as u32; 8]']
|
||||
- [_laneq_f16, float16x8_t, float16x4_t, '3', '[N as u32; 4]']
|
||||
@@ -1577,6 +1586,7 @@ intrinsics:
|
||||
- *target-not-arm64ec
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [_lane_f16, float16x4_t, float16x4_t, '2', '[N as u32; 4]']
|
||||
- [q_lane_f16, float16x4_t, float16x8_t, '2', '[N as u32; 8]']
|
||||
@@ -1598,6 +1608,7 @@ intrinsics:
|
||||
- *neon-cfg-arm-unstable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [_lane_s32, int32x2_t, int32x2_t, '1', '[N as u32, N as u32]']
|
||||
- [q_lane_s32, int32x2_t, int32x4_t, '1', '[N as u32, N as u32, N as u32, N as u32]']
|
||||
@@ -1622,6 +1633,7 @@ intrinsics:
|
||||
- *neon-cfg-arm-unstable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [q_laneq_s32, int32x4_t, int32x4_t, '2', '[N as u32, N as u32, N as u32, N as u32]']
|
||||
- [_laneq_s32, int32x4_t, int32x2_t, '2', '[N as u32, N as u32]']
|
||||
@@ -1646,6 +1658,7 @@ intrinsics:
|
||||
- *neon-cfg-arm-unstable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [q_laneq_s64, int64x2_t, '1', '[N as u32, N as u32]']
|
||||
- [q_laneq_u64, uint64x2_t, '1', '[N as u32, N as u32]']
|
||||
@@ -1666,6 +1679,7 @@ intrinsics:
|
||||
- *neon-cfg-arm-unstable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [q_lane_s64, int64x1_t, int64x2_t]
|
||||
- [q_lane_u64, uint64x1_t, uint64x2_t]
|
||||
@@ -1712,13 +1726,13 @@ intrinsics:
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [N, 1]]
|
||||
- FnCall:
|
||||
- "transmute{type[3]}"
|
||||
- - FnCall: [simd_extract!, [a, 'N as u32']]
|
||||
- "transmute"
|
||||
- - FnCall: ['vget{neon_type[1].lane_nox}', [a], [N]]
|
||||
|
||||
- name: "vext{neon_type[0].no}"
|
||||
- name: "vext{neon_type.no}"
|
||||
doc: "Extract vector from pair of vectors"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
arguments: ["a: {neon_type}", "b: {neon_type}"]
|
||||
return_type: "{neon_type}"
|
||||
attr:
|
||||
- *neon-v7
|
||||
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vext.8"', 'N = 7']]}]]
|
||||
@@ -1728,21 +1742,22 @@ intrinsics:
|
||||
- *neon-cfg-arm-unstable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [int8x8_t, 'match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), }']
|
||||
- [int16x8_t, 'match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), }']
|
||||
- [uint8x8_t, 'match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), }']
|
||||
- [uint16x8_t, 'match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), }']
|
||||
- [poly8x8_t, 'match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), }']
|
||||
- [poly16x8_t, 'match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), }']
|
||||
- int8x8_t
|
||||
- int16x8_t
|
||||
- uint8x8_t
|
||||
- uint16x8_t
|
||||
- poly8x8_t
|
||||
- poly16x8_t
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [N, 3]]
|
||||
- Identifier: ["{type[1]}", UnsafeSymbol]
|
||||
- FnCall: [simd_shuffle!, [a, b, '[N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3, N as u32 + 4, N as u32 + 5, N as u32 + 6, N as u32 + 7]']]
|
||||
|
||||
- name: "vext{neon_type[0].no}"
|
||||
- name: "vext{neon_type.no}"
|
||||
doc: "Extract vector from pair of vectors"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
arguments: ["a: {neon_type}", "b: {neon_type}"]
|
||||
return_type: "{neon_type}"
|
||||
attr:
|
||||
- *neon-v7
|
||||
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vext.8"', 'N = 15']]}]]
|
||||
@@ -1752,18 +1767,19 @@ intrinsics:
|
||||
- *neon-cfg-arm-unstable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [int8x16_t, 'match N & 0b1111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]), 8 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]), 9 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]), 10 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), 11 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]), 12 => simd_shuffle!(a, b, [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]), 13 => simd_shuffle!(a, b, [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]), 14 => simd_shuffle!(a, b, [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]), 15 => simd_shuffle!(a, b, [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]), _ => unreachable_unchecked(), }']
|
||||
- [uint8x16_t, 'match N & 0b1111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]), 8 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]), 9 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]), 10 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), 11 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]), 12 => simd_shuffle!(a, b, [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]), 13 => simd_shuffle!(a, b, [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]), 14 => simd_shuffle!(a, b, [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]), 15 => simd_shuffle!(a, b, [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]), _ => unreachable_unchecked(), }']
|
||||
- [poly8x16_t, 'match N & 0b1111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]), 8 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]), 9 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]), 10 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), 11 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]), 12 => simd_shuffle!(a, b, [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]), 13 => simd_shuffle!(a, b, [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]), 14 => simd_shuffle!(a, b, [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]), 15 => simd_shuffle!(a, b, [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]), _ => unreachable_unchecked(), }']
|
||||
- int8x16_t
|
||||
- uint8x16_t
|
||||
- poly8x16_t
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [N, 4]]
|
||||
- Identifier: ["{type[1]}", UnsafeSymbol]
|
||||
- FnCall: [simd_shuffle!, [a, b, '[N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3, N as u32 + 4, N as u32 + 5, N as u32 + 6, N as u32 + 7, N as u32 + 8, N as u32 + 9, N as u32 + 10, N as u32 + 11, N as u32 + 12, N as u32 + 13, N as u32 + 14, N as u32 + 15]']]
|
||||
|
||||
- name: "vext{neon_type[0].no}"
|
||||
- name: "vext{neon_type.no}"
|
||||
doc: "Extract vector from pair of vectors"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
arguments: ["a: {neon_type}", "b: {neon_type}"]
|
||||
return_type: "{neon_type}"
|
||||
attr:
|
||||
- *neon-v7
|
||||
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vext.8"', 'N = 3']]}]]
|
||||
@@ -1773,22 +1789,23 @@ intrinsics:
|
||||
- *neon-cfg-arm-unstable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [int16x4_t,'match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), }']
|
||||
- [int32x4_t, 'match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), }']
|
||||
- [uint16x4_t, 'match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), }']
|
||||
- [uint32x4_t, 'match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), }']
|
||||
- [poly16x4_t, 'match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), }']
|
||||
- [float32x4_t, 'match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), }']
|
||||
- int16x4_t
|
||||
- int32x4_t
|
||||
- uint16x4_t
|
||||
- uint32x4_t
|
||||
- poly16x4_t
|
||||
- float32x4_t
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [N, 2]]
|
||||
- Identifier: ["{type[1]}", UnsafeSymbol]
|
||||
- FnCall: [simd_shuffle!, [a, b, '[N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]']]
|
||||
|
||||
|
||||
- name: "vext{neon_type[0].no}"
|
||||
- name: "vext{neon_type.no}"
|
||||
doc: "Extract vector from pair of vectors"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
arguments: ["a: {neon_type}", "b: {neon_type}"]
|
||||
return_type: "{neon_type}"
|
||||
attr:
|
||||
- *neon-v7
|
||||
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vext.8"', 'N = 3']]}]]
|
||||
@@ -1800,16 +1817,17 @@ intrinsics:
|
||||
- *target-not-arm64ec
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [float16x4_t, 'match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), }']
|
||||
- float16x4_t
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [N, 2]]
|
||||
- Identifier: ["{type[1]}", UnsafeSymbol]
|
||||
- FnCall: [simd_shuffle!, [a, b, '[N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]']]
|
||||
|
||||
- name: "vext{neon_type[0].no}"
|
||||
- name: "vext{neon_type.no}"
|
||||
doc: "Extract vector from pair of vectors"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
arguments: ["a: {neon_type}", "b: {neon_type}"]
|
||||
return_type: "{neon_type}"
|
||||
attr:
|
||||
- *neon-v7
|
||||
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vext.8"', 'N = 7']]}]]
|
||||
@@ -1821,18 +1839,19 @@ intrinsics:
|
||||
- *target-not-arm64ec
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [float16x8_t, 'match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), }']
|
||||
- float16x8_t
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [N, 3]]
|
||||
- Identifier: ["{type[1]}", UnsafeSymbol]
|
||||
- FnCall: [simd_shuffle!, [a, b, '[N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3, N as u32 + 4, N as u32 + 5, N as u32 + 6, N as u32 + 7]']]
|
||||
|
||||
|
||||
|
||||
- name: "vext{neon_type[0].no}"
|
||||
- name: "vext{neon_type.no}"
|
||||
doc: "Extract vector from pair of vectors"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
arguments: ["a: {neon_type}", "b: {neon_type}"]
|
||||
return_type: "{neon_type}"
|
||||
attr:
|
||||
- *neon-v7
|
||||
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vext.8"', 'N = 1']]}]]
|
||||
@@ -1842,18 +1861,19 @@ intrinsics:
|
||||
- *neon-cfg-arm-unstable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [int32x2_t, 'match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), }']
|
||||
- [uint32x2_t, 'match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), }']
|
||||
- [float32x2_t, 'match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), }']
|
||||
- int32x2_t
|
||||
- uint32x2_t
|
||||
- float32x2_t
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [N, 1]]
|
||||
- Identifier: ["{type[1]}", UnsafeSymbol]
|
||||
- FnCall: [simd_shuffle!, [a, b, '[N as u32, N as u32 + 1]']]
|
||||
|
||||
- name: "vext{neon_type[0].no}"
|
||||
- name: "vext{neon_type.no}"
|
||||
doc: "Extract vector from pair of vectors"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
arguments: ["a: {neon_type}", "b: {neon_type}"]
|
||||
return_type: "{neon_type}"
|
||||
attr:
|
||||
- *neon-v7
|
||||
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vmov, 'N = 1']]}]]
|
||||
@@ -1863,12 +1883,13 @@ intrinsics:
|
||||
- *neon-cfg-arm-unstable
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [int64x2_t, 'match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), }']
|
||||
- [uint64x2_t, 'match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), }']
|
||||
- int64x2_t
|
||||
- uint64x2_t
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [N, 1]]
|
||||
- Identifier: ["{type[1]}", UnsafeSymbol]
|
||||
- FnCall: [simd_shuffle!, [a, b, '[N as u32, N as u32 + 1]']]
|
||||
|
||||
- name: "vmla{neon_type[0].no}"
|
||||
doc: "Multiply-add to accumulator"
|
||||
@@ -1988,17 +2009,17 @@ intrinsics:
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [int32x4_t, int16x4_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [int32x4_t, int16x4_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [int64x2_t, int32x2_t, int32x2_t, '1', '[LANE as u32, LANE as u32]']
|
||||
- [int64x2_t, int32x2_t, int32x4_t, '2', '[LANE as u32, LANE as u32]']
|
||||
- [int32x4_t, int16x4_t, int16x4_t, '2']
|
||||
- [int32x4_t, int16x4_t, int16x8_t, '3']
|
||||
- [int64x2_t, int32x2_t, int32x2_t, '1']
|
||||
- [int64x2_t, int32x2_t, int32x4_t, '2']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]]
|
||||
- FnCall:
|
||||
- "vmlal_{neon_type[1]}"
|
||||
- - a
|
||||
- b
|
||||
- FnCall: [simd_shuffle!, [c, c, '{type[4]}']]
|
||||
- FnCall: ['vdup_lane{neon_type[2].no}', [c], [LANE]]
|
||||
|
||||
- name: "vmlal_lane{neon_type[2].no}"
|
||||
doc: "Vector widening multiply accumulate with scalar"
|
||||
@@ -2014,17 +2035,17 @@ intrinsics:
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [uint32x4_t, uint16x4_t, uint16x4_t, uint32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [uint32x4_t, uint16x4_t, uint16x8_t, uint32x4_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [uint64x2_t, uint32x2_t, uint32x2_t, uint64x2_t, '1', '[LANE as u32, LANE as u32]']
|
||||
- [uint64x2_t, uint32x2_t, uint32x4_t, uint64x2_t, '2', '[LANE as u32, LANE as u32]']
|
||||
- [uint32x4_t, uint16x4_t, uint16x4_t, '2']
|
||||
- [uint32x4_t, uint16x4_t, uint16x8_t, '3']
|
||||
- [uint64x2_t, uint32x2_t, uint32x2_t, '1']
|
||||
- [uint64x2_t, uint32x2_t, uint32x4_t, '2']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, "{type[4]}"]]
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]]
|
||||
- FnCall:
|
||||
- "vmlal_{neon_type[1]}"
|
||||
- - a
|
||||
- b
|
||||
- FnCall: [simd_shuffle!, [c, c, '{type[5]}']]
|
||||
- FnCall: ['vdup_lane{neon_type[2].no}', [c], [LANE]]
|
||||
|
||||
- name: "vmlal_{neon_type[1]}"
|
||||
doc: "Unsigned multiply-add long"
|
||||
@@ -2143,15 +2164,15 @@ intrinsics:
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [int32x4_t, int16x4_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [int32x4_t, int16x4_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [int32x4_t, int16x4_t, int16x4_t, '2']
|
||||
- [int32x4_t, int16x4_t, int16x8_t, '3']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
|
||||
- FnCall:
|
||||
- "vmlsl_{neon_type[1]}"
|
||||
- - a
|
||||
- b
|
||||
- FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]
|
||||
- FnCall: ['vdup_lane{neon_type[2].no}', [c], [LANE]]
|
||||
|
||||
- name: "vmlsl_lane{neon_type[2].no}"
|
||||
doc: "Vector widening multiply subtract with scalar"
|
||||
@@ -2167,15 +2188,15 @@ intrinsics:
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [int64x2_t, int32x2_t, int32x2_t, '[LANE as u32, LANE as u32]', '1']
|
||||
- [int64x2_t, int32x2_t, int32x4_t, '[LANE as u32, LANE as u32]', '2']
|
||||
- [int64x2_t, int32x2_t, int32x2_t, '1']
|
||||
- [int64x2_t, int32x2_t, int32x4_t, '2']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, "{type[4]}"]]
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]]
|
||||
- FnCall:
|
||||
- "vmlsl_{neon_type[1]}"
|
||||
- - a
|
||||
- b
|
||||
- FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]
|
||||
- FnCall: ['vdup_lane{neon_type[2].no}', [c], [LANE]]
|
||||
|
||||
- name: "vmlsl_lane{neon_type[2].no}"
|
||||
doc: "Vector widening multiply subtract with scalar"
|
||||
@@ -2191,17 +2212,17 @@ intrinsics:
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [uint32x4_t, uint16x4_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [uint32x4_t, uint16x4_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [uint64x2_t, uint32x2_t, uint32x2_t, '1', '[LANE as u32, LANE as u32]']
|
||||
- [uint64x2_t, uint32x2_t, uint32x4_t, '2', '[LANE as u32, LANE as u32]']
|
||||
- [uint32x4_t, uint16x4_t, uint16x4_t, '2']
|
||||
- [uint32x4_t, uint16x4_t, uint16x8_t, '3']
|
||||
- [uint64x2_t, uint32x2_t, uint32x2_t, '1']
|
||||
- [uint64x2_t, uint32x2_t, uint32x4_t, '2']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]]
|
||||
- FnCall:
|
||||
- "vmlsl_{neon_type[1]}"
|
||||
- - a
|
||||
- b
|
||||
- FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]
|
||||
- FnCall: ['vdup_lane{neon_type[2].no}', [c], [LANE]]
|
||||
|
||||
- name: "vmlsl_{neon_type[1]}"
|
||||
doc: "Unsigned multiply-subtract long"
|
||||
@@ -2681,7 +2702,6 @@ intrinsics:
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld]]}]]
|
||||
- *neon-not-arm-stable
|
||||
- *neon-cfg-arm-unstable
|
||||
big_endian_inverse: false
|
||||
safety:
|
||||
unsafe: [neon]
|
||||
types:
|
||||
@@ -2740,7 +2760,6 @@ intrinsics:
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld]]}]]
|
||||
- *neon-not-arm-stable
|
||||
- *neon-cfg-arm-unstable
|
||||
big_endian_inverse: false
|
||||
safety:
|
||||
unsafe: [neon]
|
||||
types:
|
||||
@@ -2978,7 +2997,6 @@ intrinsics:
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld2]]}]]
|
||||
- *neon-not-arm-stable
|
||||
- *neon-cfg-arm-unstable
|
||||
big_endian_inverse: false
|
||||
safety:
|
||||
unsafe: [neon]
|
||||
types:
|
||||
@@ -3009,7 +3027,6 @@ intrinsics:
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]]
|
||||
- *neon-not-arm-stable
|
||||
- *neon-cfg-arm-unstable
|
||||
big_endian_inverse: false
|
||||
safety:
|
||||
unsafe: [neon]
|
||||
types:
|
||||
@@ -3106,7 +3123,6 @@ intrinsics:
|
||||
- *neon-cfg-arm-unstable
|
||||
static_defs:
|
||||
- "const LANE: i32"
|
||||
big_endian_inverse: false
|
||||
safety:
|
||||
unsafe: [neon]
|
||||
types:
|
||||
@@ -4095,7 +4111,6 @@ intrinsics:
|
||||
- *neon-not-arm-stable
|
||||
- *neon-cfg-arm-unstable
|
||||
static_defs: ['const LANE: i32']
|
||||
big_endian_inverse: false
|
||||
safety:
|
||||
unsafe: [neon]
|
||||
types:
|
||||
@@ -4126,7 +4141,6 @@ intrinsics:
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld3]]}]]
|
||||
- *neon-not-arm-stable
|
||||
- *neon-cfg-arm-unstable
|
||||
big_endian_inverse: false
|
||||
safety:
|
||||
unsafe: [neon]
|
||||
types:
|
||||
@@ -4490,7 +4504,6 @@ intrinsics:
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld4]]}]]
|
||||
- *neon-not-arm-stable
|
||||
- *neon-cfg-arm-unstable
|
||||
big_endian_inverse: false
|
||||
safety:
|
||||
unsafe: [neon]
|
||||
types:
|
||||
@@ -4612,7 +4625,6 @@ intrinsics:
|
||||
- *neon-not-arm-stable
|
||||
- *neon-cfg-arm-unstable
|
||||
static_defs: ["const LANE: i32"]
|
||||
big_endian_inverse: false
|
||||
safety:
|
||||
unsafe: [neon]
|
||||
types:
|
||||
@@ -6139,10 +6151,11 @@ intrinsics:
|
||||
- *neon-i8mm
|
||||
- *neon-v8
|
||||
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vusdot]]}]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [usdot]]}]]
|
||||
- FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, {FnCall: [assert_instr, [usdot]]}]]
|
||||
- *neon-unstable-i8mm
|
||||
- *neon-cfg-arm-unstable
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [int32x2_t, uint8x8_t, int8x8_t]
|
||||
- [int32x4_t, uint8x16_t, int8x16_t]
|
||||
@@ -6155,10 +6168,10 @@ intrinsics:
|
||||
- link: "llvm.arm.neon.usdot.v{neon_type[0].lane}i32.v{neon_type[1].lane}i8"
|
||||
arch: arm
|
||||
|
||||
- name: "vusdot{type[0]}"
|
||||
- name: "vusdot{neon_type[0].lane_nox}"
|
||||
doc: "Dot product index form with unsigned and signed integers"
|
||||
arguments: ["a: {neon_type[1]}", "b: {neon_type[2]}", "c: int8x8_t"]
|
||||
return_type: "{neon_type[1]}"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: int8x8_t"]
|
||||
return_type: "{neon_type[0]}"
|
||||
attr:
|
||||
- *neon-i8mm
|
||||
- *neon-v8
|
||||
@@ -6168,22 +6181,19 @@ intrinsics:
|
||||
- *neon-unstable-i8mm
|
||||
- *neon-cfg-arm-unstable
|
||||
static_defs: ["const LANE: i32"]
|
||||
big_endian_inverse: true # TODO: Remove this attribute, and replace transmute with vreinterpret when https://github.com/llvm/llvm-project/pull/169337 is merged, LLVM inlining issue causing assertion failure.
|
||||
safety: safe
|
||||
types:
|
||||
- ['_lane_s32', int32x2_t, uint8x8_t, '[LANE as u32, LANE as u32]','']
|
||||
- ['q_lane_s32', int32x4_t, uint8x16_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]','q']
|
||||
- [int32x2_t, uint8x8_t, '']
|
||||
- [int32x4_t, uint8x16_t, 'q']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, '1']]
|
||||
- Let:
|
||||
- c
|
||||
- int32x2_t
|
||||
- FnCall: [transmute, [c]] #- FnCall: ['vreinterpret_s32_s8', [c]]
|
||||
- FnCall: ['vreinterpret_s32_s8', [c]]
|
||||
- Let:
|
||||
- c
|
||||
- "{type[1]}"
|
||||
- FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]
|
||||
- FnCall: ["vusdot{neon_type[1].no}", [a, b, {FnCall: [transmute, [c]]}]] #'vreinterpret{type[4]}_s8_s32'
|
||||
- FnCall: ['vdup{neon_type[0].lane_nox}', [c], [LANE]]
|
||||
- FnCall: ["vusdot{neon_type[0].no}", [a, b, {FnCall: ['vreinterpret{type[2]}_s8_s32', [c]]}]]
|
||||
|
||||
- name: "vsudot{neon_type[0].lane_nox}"
|
||||
doc: "Dot product index form with signed and unsigned integers"
|
||||
@@ -6193,30 +6203,27 @@ intrinsics:
|
||||
- *neon-i8mm
|
||||
- *neon-v8
|
||||
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vsudot, 'LANE = 0']]}]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sudot, 'LANE = 0']]}]]
|
||||
- FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, {FnCall: [assert_instr, [sudot, 'LANE = 0']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['3']]
|
||||
- *neon-unstable-i8mm
|
||||
- *neon-cfg-arm-unstable
|
||||
static_defs: ["const LANE: i32"]
|
||||
big_endian_inverse: true # TODO: Remove this attribute, and replace transmute with vreinterpret when https://github.com/llvm/llvm-project/pull/169337 is merged, LLVM inlining issue causing assertion failure.
|
||||
safety: safe
|
||||
types:
|
||||
- [int32x2_t, int8x8_t, uint8x8_t, '[LANE as u32, LANE as u32]', uint32x2_t,'']
|
||||
- [int32x4_t, int8x16_t, uint8x8_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]', uint32x4_t,'q']
|
||||
- [int32x2_t, int8x8_t, uint8x8_t, uint32x2_t, '']
|
||||
- [int32x4_t, int8x16_t, uint8x8_t, uint32x4_t, 'q']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, '1']]
|
||||
- Let:
|
||||
- c
|
||||
- uint32x2_t
|
||||
- FnCall: [transmute, [c]] #- FnCall: ['vreinterpret_u32_u8', [c]]
|
||||
- FnCall: ['vreinterpret_u32_u8', [c]]
|
||||
- Let:
|
||||
- c
|
||||
- "{type[4]}"
|
||||
- FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]
|
||||
- FnCall: ['vdup{neon_type[3].lane_nox}', [c], [LANE]]
|
||||
- FnCall:
|
||||
- "vusdot{neon_type[0].no}"
|
||||
- - a
|
||||
- FnCall: [transmute, [c]] #- FnCall: ['vreinterpret{type[5]}_u8_u32', [c]]
|
||||
- FnCall: ['vreinterpret{type[4]}_u8_u32', [c]]
|
||||
- b
|
||||
|
||||
- name: "vmul{neon_type[1].no}"
|
||||
@@ -6298,20 +6305,20 @@ intrinsics:
|
||||
static_defs: ["const LANE: i32"]
|
||||
safety: safe
|
||||
types:
|
||||
- [int16x4_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [int16x8_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [int32x2_t, int32x2_t, '1', '[LANE as u32, LANE as u32]']
|
||||
- [int32x4_t, int32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [uint16x4_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [uint16x8_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [uint32x2_t, uint32x2_t, '1', '[LANE as u32, LANE as u32]']
|
||||
- [uint32x4_t, uint32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [int16x4_t, int16x4_t, '2']
|
||||
- [int16x8_t, int16x4_t, '2']
|
||||
- [int32x2_t, int32x2_t, '1']
|
||||
- [int32x4_t, int32x2_t, '1']
|
||||
- [uint16x4_t, uint16x4_t, '2']
|
||||
- [uint16x8_t, uint16x4_t, '2']
|
||||
- [uint32x2_t, uint32x2_t, '1']
|
||||
- [uint32x4_t, uint32x2_t, '1']
|
||||
compose:
|
||||
- FnCall: ["static_assert_uimm_bits!", [LANE, "{type[2]}"]]
|
||||
- FnCall:
|
||||
- simd_mul
|
||||
- - a
|
||||
- FnCall: ["simd_shuffle!", [b, b, "{type[3]}"]]
|
||||
- FnCall: ["vdup{neon_type[0].lane_nox}", [b], [LANE]]
|
||||
|
||||
|
||||
- name: "vmul{neon_type[0].lane_nox}"
|
||||
@@ -6330,14 +6337,14 @@ intrinsics:
|
||||
static_defs: ["const LANE: i32"]
|
||||
safety: safe
|
||||
types:
|
||||
- [float16x4_t, float16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [float16x8_t, float16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [float16x4_t, float16x4_t, '2']
|
||||
- [float16x8_t, float16x4_t, '2']
|
||||
compose:
|
||||
- FnCall: ["static_assert_uimm_bits!", [LANE, "{type[2]}"]]
|
||||
- FnCall:
|
||||
- simd_mul
|
||||
- - a
|
||||
- FnCall: ["simd_shuffle!", [v, v, "{type[3]}"]]
|
||||
- FnCall: ["vdup{neon_type[0].lane_nox}", [v], [LANE]]
|
||||
|
||||
|
||||
- name: "vmul{neon_type[0].laneq_nox}"
|
||||
@@ -6354,20 +6361,20 @@ intrinsics:
|
||||
static_defs: ["const LANE: i32"]
|
||||
safety: safe
|
||||
types:
|
||||
- [int16x4_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [int16x8_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [int32x2_t, int32x4_t, '2', '[LANE as u32, LANE as u32]']
|
||||
- [int32x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [uint16x4_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [uint16x8_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [uint32x2_t, uint32x4_t, '2', '[LANE as u32, LANE as u32]']
|
||||
- [uint32x4_t, uint32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [int16x4_t, int16x8_t, '3']
|
||||
- [int16x8_t, int16x8_t, '3']
|
||||
- [int32x2_t, int32x4_t, '2']
|
||||
- [int32x4_t, int32x4_t, '2']
|
||||
- [uint16x4_t, uint16x8_t, '3']
|
||||
- [uint16x8_t, uint16x8_t, '3']
|
||||
- [uint32x2_t, uint32x4_t, '2']
|
||||
- [uint32x4_t, uint32x4_t, '2']
|
||||
compose:
|
||||
- FnCall: ["static_assert_uimm_bits!", [LANE, "{type[2]}"]]
|
||||
- FnCall:
|
||||
- simd_mul
|
||||
- - a
|
||||
- FnCall: ["simd_shuffle!", [b, b, "{type[3]}"]]
|
||||
- FnCall: ["vdup{neon_type[0].laneq_nox}", [b], [LANE]]
|
||||
|
||||
- name: "vmull{neon_type[1].no}"
|
||||
doc: Signed multiply long
|
||||
@@ -6798,23 +6805,22 @@ intrinsics:
|
||||
attr:
|
||||
- *neon-v7
|
||||
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ["vsubhn"]]}]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [subhn2]]}]]
|
||||
- FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, {FnCall: [assert_instr, [subhn2]]}]]
|
||||
- *neon-not-arm-stable
|
||||
- *neon-cfg-arm-unstable
|
||||
safety: safe
|
||||
types:
|
||||
- [int8x8_t, int16x8_t, int8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
|
||||
- [int16x4_t, int32x4_t, int16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]']
|
||||
- [int32x2_t, int64x2_t, int32x4_t, '[0, 1, 2, 3]']
|
||||
- [uint8x8_t, uint16x8_t, uint8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
|
||||
- [uint16x4_t, uint32x4_t, uint16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]']
|
||||
- [uint32x2_t, uint64x2_t, uint32x4_t, '[0, 1, 2, 3]']
|
||||
- [int8x8_t, int16x8_t, int8x16_t]
|
||||
- [int16x4_t, int32x4_t, int16x8_t]
|
||||
- [int32x2_t, int64x2_t, int32x4_t]
|
||||
- [uint8x8_t, uint16x8_t, uint8x16_t]
|
||||
- [uint16x4_t, uint32x4_t, uint16x8_t]
|
||||
- [uint32x2_t, uint64x2_t, uint32x4_t]
|
||||
compose:
|
||||
- Let:
|
||||
- d
|
||||
- "{neon_type[0]}"
|
||||
- FnCall: ["vsubhn{neon_type[1].noq}", [b, c]]
|
||||
- FnCall: [simd_shuffle!, [a, d, "{type[3]}"]]
|
||||
- FnCall: ['vcombine_{neon_type[0]}', [a, d]]
|
||||
|
||||
- name: "vhsub{neon_type[1].no}"
|
||||
doc: "Signed halving subtract"
|
||||
@@ -6968,7 +6974,6 @@ intrinsics:
|
||||
doc: "Dot product index form with unsigned and signed integers"
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
big_endian_inverse: true # TODO: Remove this attribute, and replace transmute with vreinterpret when https://github.com/llvm/llvm-project/pull/169337 is merged, LLVM inlining issue causing assertion failure.
|
||||
attr:
|
||||
- *neon-v8
|
||||
- *neon-i8mm
|
||||
@@ -6979,14 +6984,13 @@ intrinsics:
|
||||
static_defs: ["const LANE: i32"]
|
||||
safety: safe
|
||||
types:
|
||||
- [int32x2_t, uint8x8_t, int8x16_t, '[LANE as u32, LANE as u32]','']
|
||||
- [int32x4_t, uint8x16_t, int8x16_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]','q']
|
||||
- [int32x2_t, uint8x8_t, int8x16_t, '']
|
||||
- [int32x4_t, uint8x16_t, int8x16_t, 'q']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, '2']]
|
||||
- Let: [c, int32x4_t, {FnCall: [transmute, [c]]}]
|
||||
- Let: [c, "{neon_type[0]}", {FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]}]
|
||||
- FnCall: ["vusdot{neon_type[0].no}", [a, b, {FnCall: [transmute, [c]]}]]
|
||||
#- FnCall: ["vusdot{neon_type[0].no}", [a, b, {FnCall: ['vreinterpret{type[4]}_s8_s32', [c]]}]]
|
||||
- Let: [c, {FnCall: [vreinterpretq_s32_s8, [c]]}]
|
||||
- Let: [c, {FnCall: ['vdup{neon_type[0].laneq_nox}', [c], [LANE]]}]
|
||||
- FnCall: ["vusdot{neon_type[0].no}", [a, b, {FnCall: ['vreinterpret{type[3]}_s8_s32', [c]]}]]
|
||||
|
||||
- name: "vsudot{neon_type[0].laneq_nox}"
|
||||
doc: "Dot product index form with signed and unsigned integers"
|
||||
@@ -6996,28 +7000,26 @@ intrinsics:
|
||||
- *neon-v8
|
||||
- *neon-i8mm
|
||||
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vsudot, 'LANE = 1']]}]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sudot, 'LANE = 3']]}]]
|
||||
- FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, {FnCall: [assert_instr, [sudot, 'LANE = 3']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['3']]
|
||||
- FnCall: [unstable, ['feature = "stdarch_neon_i8mm"', 'issue = "117223"']]
|
||||
static_defs: ["const LANE: i32"]
|
||||
safety: safe
|
||||
types:
|
||||
- [int32x2_t, int8x8_t, uint8x16_t, '[LANE as u32, LANE as u32]', uint32x2_t]
|
||||
- [int32x4_t, int8x16_t, uint8x16_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]', uint32x4_t]
|
||||
- [int32x2_t, int8x8_t, uint8x16_t, uint32x2_t, '']
|
||||
- [int32x4_t, int8x16_t, uint8x16_t, uint32x4_t, 'q']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, 2]]
|
||||
- Let:
|
||||
- c
|
||||
- uint32x4_t
|
||||
- FnCall: [transmute, [c]]
|
||||
- FnCall: [vreinterpretq_u32_u8, [c]]
|
||||
- Let:
|
||||
- c
|
||||
- "{type[4]}"
|
||||
- FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]
|
||||
- FnCall: ['vdup{neon_type[3].laneq_nox}', [c], [LANE]]
|
||||
- FnCall:
|
||||
- "vusdot{neon_type[0].no}"
|
||||
- - a
|
||||
- FnCall: [transmute, [c]]
|
||||
- FnCall: ['vreinterpret{type[4]}_u8_u32', [c]]
|
||||
- b
|
||||
|
||||
- name: "vdot{neon_type[0].laneq_nox}"
|
||||
@@ -7025,7 +7027,6 @@ intrinsics:
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
static_defs: ["const LANE: i32"]
|
||||
big_endian_inverse: true # TODO: Remove this attribute, and replace transmute with vreinterpret when https://github.com/llvm/llvm-project/pull/169337 is merged, LLVM inlining issue causing assertion failure.
|
||||
attr:
|
||||
- *neon-v8
|
||||
- FnCall: [target_feature, ['enable = "neon,dotprod"']]
|
||||
@@ -7035,32 +7036,27 @@ intrinsics:
|
||||
- FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']]
|
||||
safety: safe
|
||||
types:
|
||||
- [int32x2_t, int8x8_t, int8x16_t, int32x4_t, '[LANE as u32, LANE as u32]', '']
|
||||
- [int32x4_t, int8x16_t, int8x16_t, int32x4_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]','q']
|
||||
- [int32x2_t, int8x8_t, int8x16_t, '']
|
||||
- [int32x4_t, int8x16_t, int8x16_t, 'q']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, '2']]
|
||||
- Let:
|
||||
- c
|
||||
- "{neon_type[3]}"
|
||||
- FnCall: [transmute, [c]]
|
||||
#- FnCall: ['vreinterpretq_{neon_type[0]}_{neon_type[1]}', [c]]
|
||||
- FnCall: [vreinterpretq_s32_s8, [c]]
|
||||
- Let:
|
||||
- c
|
||||
- "{neon_type[0]}"
|
||||
- FnCall: [simd_shuffle!, [c, c, '{type[4]}']]
|
||||
- FnCall: ['vdup{neon_type[0].laneq_nox}', [c], [LANE]]
|
||||
- FnCall:
|
||||
- "vdot{neon_type[0].no}"
|
||||
- - a
|
||||
- b
|
||||
- FnCall: [transmute, [c]]
|
||||
#- FnCall: ['vreinterpret{type[5]}_{neon_type[1]}_{neon_type[0]}', [c]]
|
||||
- FnCall: ['vreinterpret{type[3]}_s8_s32', [c]]
|
||||
|
||||
- name: "vdot{neon_type[0].laneq_nox}"
|
||||
doc: Dot product arithmetic (indexed)
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
static_defs: ["const LANE: i32"]
|
||||
big_endian_inverse: true # TODO: Remove this attribute, and replace transmute with vreinterpret when https://github.com/llvm/llvm-project/pull/169337 is merged, LLVM inlining issue causing assertion failure.
|
||||
attr:
|
||||
- *neon-v8
|
||||
- FnCall: [target_feature, ['enable = "neon,dotprod"']]
|
||||
@@ -7070,25 +7066,21 @@ intrinsics:
|
||||
- FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']]
|
||||
safety: safe
|
||||
types:
|
||||
- [uint32x2_t, uint8x8_t, uint8x16_t, uint32x4_t, '[LANE as u32, LANE as u32]','']
|
||||
- [uint32x4_t, uint8x16_t, uint8x16_t, uint32x4_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]','q']
|
||||
- [uint32x2_t, uint8x8_t, uint8x16_t, '']
|
||||
- [uint32x4_t, uint8x16_t, uint8x16_t, 'q']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, '2']]
|
||||
- Let:
|
||||
- c
|
||||
- "{neon_type[3]}"
|
||||
- FnCall: [transmute, [c]]
|
||||
#- FnCall: ['vreinterpretq_{neon_type[0]}_{neon_type[1]}', [c]]
|
||||
- FnCall: ['vreinterpretq_u32_u8', [c]]
|
||||
- Let:
|
||||
- c
|
||||
- "{neon_type[0]}"
|
||||
- FnCall: [simd_shuffle!, [c, c, '{type[4]}']]
|
||||
- FnCall: ['vdup{neon_type[0].laneq_nox}', [c], [LANE]]
|
||||
- FnCall:
|
||||
- "vdot{neon_type[0].no}"
|
||||
- - a
|
||||
- b
|
||||
- FnCall: [transmute, [c]]
|
||||
#- FnCall: ['vreinterpret{type[5]}_{neon_type[1]}_{neon_type[0]}', [c]]
|
||||
- FnCall: ['vreinterpret{type[3]}_u8_u32', [c]]
|
||||
|
||||
- name: "vdot{neon_type[0].no}"
|
||||
doc: Dot product arithmetic (vector)
|
||||
@@ -7102,6 +7094,7 @@ intrinsics:
|
||||
- FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']]}]]
|
||||
- *neon-cfg-arm-unstable
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [int32x2_t, int8x8_t]
|
||||
- [int32x4_t, int8x16_t]
|
||||
@@ -7126,6 +7119,7 @@ intrinsics:
|
||||
- FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']]}]]
|
||||
- *neon-cfg-arm-unstable
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [uint32x2_t, uint8x8_t]
|
||||
- [uint32x4_t, uint8x16_t]
|
||||
@@ -7151,26 +7145,23 @@ intrinsics:
|
||||
- FnCall: [rustc_legacy_const_generics, ['3']]
|
||||
- FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']]}]]
|
||||
- *neon-cfg-arm-unstable
|
||||
big_endian_inverse: true # TODO: Remove this attribute, and replace transmute with vreinterpret when https://github.com/llvm/llvm-project/pull/169337 is merged, LLVM inlining issue causing assertion failure.
|
||||
safety: safe
|
||||
types:
|
||||
- [int32x2_t, int8x8_t, int8x8_t, int32x2_t, '[LANE as u32, LANE as u32]','']
|
||||
- [int32x4_t, int8x16_t, int8x8_t, int32x2_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]','q']
|
||||
- [int32x2_t, int8x8_t, int8x8_t, '']
|
||||
- [int32x4_t, int8x16_t, int8x8_t, 'q']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, '1']]
|
||||
- Let:
|
||||
- c
|
||||
- "{neon_type[3]}"
|
||||
- FnCall: [transmute, [c]]
|
||||
- FnCall: ['vreinterpret_s32_s8', [c]]
|
||||
- Let:
|
||||
- c
|
||||
- "{neon_type[0]}"
|
||||
- FnCall: [simd_shuffle!, [c, c, '{type[4]}']]
|
||||
- FnCall: ['vdup{neon_type[0].lane_nox}', [c], [LANE]]
|
||||
- FnCall:
|
||||
- "vdot{neon_type[0].no}"
|
||||
- - a
|
||||
- b
|
||||
- FnCall: [transmute, [c]]
|
||||
- FnCall: ['vreinterpret{type[3]}_s8_s32', [c]]
|
||||
|
||||
- name: "vdot{neon_type[0].lane_nox}"
|
||||
doc: Dot product arithmetic (indexed)
|
||||
@@ -7186,25 +7177,22 @@ intrinsics:
|
||||
- FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']]}]]
|
||||
- *neon-cfg-arm-unstable
|
||||
safety: safe
|
||||
big_endian_inverse: true # TODO: Remove this attribute, and replace transmute with vreinterpret when https://github.com/llvm/llvm-project/pull/169337 is merged, LLVM inlining issue causing assertion failure.
|
||||
types:
|
||||
- [uint32x2_t, uint8x8_t, uint8x8_t, uint32x2_t, '[LANE as u32, LANE as u32]','']
|
||||
- [uint32x4_t, uint8x16_t, uint8x8_t, uint32x2_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]','q']
|
||||
- [uint32x2_t, uint8x8_t, uint8x8_t, '']
|
||||
- [uint32x4_t, uint8x16_t, uint8x8_t, 'q']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, '1']]
|
||||
- Let:
|
||||
- c
|
||||
- "{neon_type[3]}"
|
||||
- FnCall: [transmute, [c]] #- FnCall: ['vreinterpret_{neon_type[0]}_{neon_type[1]}', [c]]
|
||||
- FnCall: ['vreinterpret_u32_u8', [c]]
|
||||
- Let:
|
||||
- c
|
||||
- "{neon_type[0]}"
|
||||
- FnCall: [simd_shuffle!, [c, c, '{type[4]}']]
|
||||
- FnCall: ['vdup{neon_type[0].lane_nox}', [c], [LANE]]
|
||||
- FnCall:
|
||||
- "vdot{neon_type[0].no}"
|
||||
- - a
|
||||
- b
|
||||
- FnCall: [transmute, [c]] #- FnCall: ['vreinterpret{type[5]}_{neon_type[1]}_{neon_type[0]}', [c]]
|
||||
- FnCall: ['vreinterpret{type[3]}_u8_u32', [c]]
|
||||
|
||||
- name: "vmax{neon_type.no}"
|
||||
doc: Maximum (vector)
|
||||
@@ -7505,6 +7493,7 @@ intrinsics:
|
||||
- *neon-not-arm-stable
|
||||
- *neon-cfg-arm-unstable
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- float32x2_t
|
||||
compose:
|
||||
@@ -7529,6 +7518,7 @@ intrinsics:
|
||||
- *neon-cfg-arm-unstable
|
||||
- *target-not-arm64ec
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- float16x4_t
|
||||
compose:
|
||||
@@ -7598,7 +7588,7 @@ intrinsics:
|
||||
- [int16x4_t, int16x4_t, int32x4_t, '[N as u32; 4]']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [N, '2']]
|
||||
- Let: [b, "{neon_type[0]}", {FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]}]
|
||||
- Let: [b, {FnCall: ['vdup{neon_type[0].lane_nox}', [b], [N]]}]
|
||||
- FnCall: [vqdmull_s16, [a, b]]
|
||||
|
||||
- name: "vqdmull_lane_s32"
|
||||
@@ -7615,10 +7605,10 @@ intrinsics:
|
||||
static_defs: ['const N: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [int32x2_t, int32x2_t, int64x2_t, '[N as u32; 2]']
|
||||
- [int32x2_t, int32x2_t, int64x2_t]
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [N, '1']]
|
||||
- Let: [b, "{neon_type[0]}", {FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]}]
|
||||
- Let: [b, {FnCall: ['vdup{neon_type[0].lane_nox}', [b], [N]]}]
|
||||
- FnCall: [vqdmull_s32, [a, b]]
|
||||
|
||||
- name: "vqdmlal{neon_type[1].noq}"
|
||||
@@ -7662,7 +7652,7 @@ intrinsics:
|
||||
attr:
|
||||
- *neon-v7
|
||||
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vqdmlal, N = 2]]}]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sqdmlal, 'N = 2']]}]]
|
||||
- FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, {FnCall: [assert_instr, [sqdmlal, 'N = 2']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['3']]
|
||||
- *neon-not-arm-stable
|
||||
- *neon-cfg-arm-unstable
|
||||
@@ -7681,7 +7671,7 @@ intrinsics:
|
||||
attr:
|
||||
- *neon-v7
|
||||
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vqdmlal, N = 1]]}]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sqdmlal, 'N = 1']]}]]
|
||||
- FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, {FnCall: [assert_instr, [sqdmlal, 'N = 1']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['3']]
|
||||
- *neon-not-arm-stable
|
||||
- *neon-cfg-arm-unstable
|
||||
@@ -7734,7 +7724,7 @@ intrinsics:
|
||||
attr:
|
||||
- *neon-v7
|
||||
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vqdmlsl, N = 2]]}]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sqdmlsl, 'N = 2']]}]]
|
||||
- FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, {FnCall: [assert_instr, [sqdmlsl, 'N = 2']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['3']]
|
||||
- *neon-not-arm-stable
|
||||
- *neon-cfg-arm-unstable
|
||||
@@ -7753,7 +7743,7 @@ intrinsics:
|
||||
attr:
|
||||
- *neon-v7
|
||||
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vqdmlsl, N = 1]]}]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sqdmlsl, 'N = 1']]}]]
|
||||
- FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, {FnCall: [assert_instr, [sqdmlsl, 'N = 1']]}]]
|
||||
- FnCall: [rustc_legacy_const_generics, ['3']]
|
||||
- *neon-not-arm-stable
|
||||
- *neon-cfg-arm-unstable
|
||||
@@ -8500,60 +8490,18 @@ intrinsics:
|
||||
- *neon-cfg-arm-unstable
|
||||
safety: safe
|
||||
types:
|
||||
- [poly64x1_t, int32x2_t]
|
||||
- [poly64x1_t, uint32x2_t]
|
||||
- [poly64x2_t, int32x4_t]
|
||||
- [poly64x2_t, uint32x4_t]
|
||||
- [p128, int64x2_t]
|
||||
- [p128, uint64x2_t]
|
||||
- [p128, poly64x2_t]
|
||||
- [poly8x16_t, p128]
|
||||
- [p128, int8x16_t]
|
||||
- [p128, uint8x16_t]
|
||||
- [p128, poly8x16_t]
|
||||
- [int32x2_t, poly64x1_t]
|
||||
- [uint32x2_t, poly64x1_t]
|
||||
- [int32x4_t, poly64x2_t]
|
||||
- [uint32x4_t, poly64x2_t]
|
||||
- [int64x2_t, p128]
|
||||
- [uint64x2_t, p128]
|
||||
- [poly64x2_t, p128]
|
||||
- [poly64x1_t, int16x4_t]
|
||||
- [poly64x1_t, uint16x4_t]
|
||||
- [poly64x1_t, poly16x4_t]
|
||||
- [poly64x2_t, int16x8_t]
|
||||
- [poly64x2_t, uint16x8_t]
|
||||
- [poly64x2_t, poly16x8_t]
|
||||
- [p128, int32x4_t]
|
||||
- [p128, uint32x4_t]
|
||||
- [poly16x4_t, poly64x1_t]
|
||||
- [int16x4_t, poly64x1_t]
|
||||
- [uint16x4_t, poly64x1_t]
|
||||
- [poly16x8_t, poly64x2_t]
|
||||
- [int16x8_t, poly64x2_t]
|
||||
- [uint16x8_t, poly64x2_t]
|
||||
- [int32x4_t, p128]
|
||||
- [uint32x4_t, p128]
|
||||
- [poly64x1_t, int8x8_t]
|
||||
- [poly64x1_t, uint8x8_t]
|
||||
- [poly64x1_t, poly8x8_t]
|
||||
- [poly64x2_t, int8x16_t]
|
||||
- [poly64x2_t, uint8x16_t]
|
||||
- [poly64x2_t, poly8x16_t]
|
||||
- [p128, int16x8_t]
|
||||
- [p128, uint16x8_t]
|
||||
- [p128, poly16x8_t]
|
||||
- [poly8x8_t, poly64x1_t]
|
||||
- [int8x8_t, poly64x1_t]
|
||||
- [uint8x8_t, poly64x1_t]
|
||||
- [poly8x16_t, poly64x2_t]
|
||||
- [int8x16_t, poly64x2_t]
|
||||
- [uint8x16_t, poly64x2_t]
|
||||
- [int16x8_t, p128]
|
||||
- [uint16x8_t, p128]
|
||||
- [poly16x8_t, p128]
|
||||
- [int8x16_t, p128]
|
||||
- [uint8x16_t, p128]
|
||||
compose:
|
||||
- FnCall: [transmute, [a]]
|
||||
|
||||
@@ -8569,38 +8517,48 @@ intrinsics:
|
||||
- *neon-cfg-arm-unstable
|
||||
safety: safe
|
||||
types:
|
||||
- [uint8x8_t, int8x8_t]
|
||||
- [poly8x8_t, int8x8_t]
|
||||
- [poly16x4_t, int16x4_t]
|
||||
- [uint16x4_t, int16x4_t]
|
||||
- [uint32x2_t, int32x2_t]
|
||||
- [uint64x1_t, int64x1_t]
|
||||
- [uint8x16_t, int8x16_t]
|
||||
- [poly8x16_t, int8x16_t]
|
||||
- [poly16x8_t, int16x8_t]
|
||||
- [uint16x8_t, int16x8_t]
|
||||
- [uint32x4_t, int32x4_t]
|
||||
- [uint64x2_t, int64x2_t]
|
||||
- [poly8x8_t, uint8x8_t]
|
||||
- [int8x8_t, uint8x8_t]
|
||||
- [poly16x4_t, uint16x4_t]
|
||||
- [int16x4_t, uint16x4_t]
|
||||
- [int32x2_t, uint32x2_t]
|
||||
- [int64x1_t, uint64x1_t]
|
||||
- [poly8x16_t, uint8x16_t]
|
||||
- [int8x16_t, uint8x16_t]
|
||||
- [poly16x8_t, uint16x8_t]
|
||||
- [int16x8_t, uint16x8_t]
|
||||
- [int32x4_t, uint32x4_t]
|
||||
- [int64x2_t, uint64x2_t]
|
||||
- [int8x8_t, poly8x8_t]
|
||||
- [uint8x8_t, poly8x8_t]
|
||||
- [int16x4_t, poly16x4_t]
|
||||
- [uint16x4_t, poly16x4_t]
|
||||
- [int8x16_t, poly8x16_t]
|
||||
- [uint8x16_t, poly8x16_t]
|
||||
- [int16x8_t, poly16x8_t]
|
||||
- [uint16x8_t, poly16x8_t]
|
||||
- [poly64x1_t, int32x2_t]
|
||||
- [poly64x1_t, uint32x2_t]
|
||||
- [poly8x16_t, p128]
|
||||
- [int32x2_t, poly64x1_t]
|
||||
- [uint32x2_t, poly64x1_t]
|
||||
- [int32x4_t, poly64x2_t]
|
||||
- [uint32x4_t, poly64x2_t]
|
||||
- [int64x2_t, p128]
|
||||
- [uint64x2_t, p128]
|
||||
- [poly64x1_t, int16x4_t]
|
||||
- [poly64x1_t, uint16x4_t]
|
||||
- [poly64x1_t, poly16x4_t]
|
||||
- [poly64x2_t, poly16x8_t]
|
||||
- [poly16x4_t, poly64x1_t]
|
||||
- [int16x4_t, poly64x1_t]
|
||||
- [uint16x4_t, poly64x1_t]
|
||||
- [poly16x8_t, poly64x2_t]
|
||||
- [int16x8_t, poly64x2_t]
|
||||
- [uint16x8_t, poly64x2_t]
|
||||
- [int32x4_t, p128]
|
||||
- [uint32x4_t, p128]
|
||||
- [poly64x1_t, int8x8_t]
|
||||
- [poly64x1_t, uint8x8_t]
|
||||
- [poly64x1_t, poly8x8_t]
|
||||
- [poly64x2_t, poly8x16_t]
|
||||
- [poly8x8_t, poly64x1_t]
|
||||
- [int8x8_t, poly64x1_t]
|
||||
- [uint8x8_t, poly64x1_t]
|
||||
- [poly8x16_t, poly64x2_t]
|
||||
- [int8x16_t, poly64x2_t]
|
||||
- [uint8x16_t, poly64x2_t]
|
||||
- [int16x8_t, p128]
|
||||
- [uint16x8_t, p128]
|
||||
- [poly16x8_t, p128]
|
||||
- [int8x16_t, p128]
|
||||
- [uint8x16_t, p128]
|
||||
- [poly64x2_t, int32x4_t]
|
||||
- [poly64x2_t, uint32x4_t]
|
||||
- [poly64x2_t, int16x8_t]
|
||||
- [poly64x2_t, uint16x8_t]
|
||||
- [poly64x2_t, int8x16_t]
|
||||
- [poly64x2_t, uint8x16_t]
|
||||
- [int16x4_t, int8x8_t]
|
||||
- [uint16x4_t, int8x8_t]
|
||||
- [poly16x4_t, int8x8_t]
|
||||
@@ -8751,19 +8709,15 @@ intrinsics:
|
||||
- [uint8x16_t, uint64x2_t]
|
||||
- [float32x2_t, int8x8_t]
|
||||
- [float32x2_t, int16x4_t]
|
||||
- [float32x2_t, int32x2_t]
|
||||
- [float32x2_t, int64x1_t]
|
||||
- [float32x4_t, int8x16_t]
|
||||
- [float32x4_t, int16x8_t]
|
||||
- [float32x4_t, int32x4_t]
|
||||
- [float32x4_t, int64x2_t]
|
||||
- [float32x2_t, uint8x8_t]
|
||||
- [float32x2_t, uint16x4_t]
|
||||
- [float32x2_t, uint32x2_t]
|
||||
- [float32x2_t, uint64x1_t]
|
||||
- [float32x4_t, uint8x16_t]
|
||||
- [float32x4_t, uint16x8_t]
|
||||
- [float32x4_t, uint32x4_t]
|
||||
- [float32x4_t, uint64x2_t]
|
||||
- [float32x2_t, poly8x8_t]
|
||||
- [float32x2_t, poly16x4_t]
|
||||
@@ -8772,19 +8726,15 @@ intrinsics:
|
||||
- [float32x4_t, p128]
|
||||
- [int8x8_t, float32x2_t]
|
||||
- [int16x4_t, float32x2_t]
|
||||
- [int32x2_t, float32x2_t]
|
||||
- [int64x1_t, float32x2_t]
|
||||
- [int8x16_t, float32x4_t]
|
||||
- [int16x8_t, float32x4_t]
|
||||
- [int32x4_t, float32x4_t]
|
||||
- [int64x2_t, float32x4_t]
|
||||
- [uint8x8_t, float32x2_t]
|
||||
- [uint16x4_t, float32x2_t]
|
||||
- [uint32x2_t, float32x2_t]
|
||||
- [uint64x1_t, float32x2_t]
|
||||
- [uint8x16_t, float32x4_t]
|
||||
- [uint16x8_t, float32x4_t]
|
||||
- [uint32x4_t, float32x4_t]
|
||||
- [uint64x2_t, float32x4_t]
|
||||
- [poly8x8_t, float32x2_t]
|
||||
- [poly16x4_t, float32x2_t]
|
||||
@@ -8794,6 +8744,111 @@ intrinsics:
|
||||
compose:
|
||||
- FnCall: [transmute, [a]]
|
||||
|
||||
- name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}"
|
||||
doc: Vector reinterpret cast operation
|
||||
arguments: ["a: {type[0]}"]
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- *neon-v7
|
||||
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]]
|
||||
- *neon-not-arm-stable
|
||||
- *neon-cfg-arm-unstable
|
||||
safety: safe
|
||||
types:
|
||||
- [uint8x8_t, int8x8_t]
|
||||
- [poly8x8_t, int8x8_t]
|
||||
- [poly16x4_t, int16x4_t]
|
||||
- [uint16x4_t, int16x4_t]
|
||||
- [uint32x2_t, int32x2_t]
|
||||
- [uint64x1_t, int64x1_t]
|
||||
- [uint8x16_t, int8x16_t]
|
||||
- [poly8x16_t, int8x16_t]
|
||||
- [poly16x8_t, int16x8_t]
|
||||
- [uint16x8_t, int16x8_t]
|
||||
- [uint32x4_t, int32x4_t]
|
||||
- [uint64x2_t, int64x2_t]
|
||||
- [poly8x8_t, uint8x8_t]
|
||||
- [int8x8_t, uint8x8_t]
|
||||
- [poly16x4_t, uint16x4_t]
|
||||
- [int16x4_t, uint16x4_t]
|
||||
- [int32x2_t, uint32x2_t]
|
||||
- [int64x1_t, uint64x1_t]
|
||||
- [poly8x16_t, uint8x16_t]
|
||||
- [int8x16_t, uint8x16_t]
|
||||
- [poly16x8_t, uint16x8_t]
|
||||
- [int16x8_t, uint16x8_t]
|
||||
- [int32x4_t, uint32x4_t]
|
||||
- [int64x2_t, uint64x2_t]
|
||||
- [int8x8_t, poly8x8_t]
|
||||
- [uint8x8_t, poly8x8_t]
|
||||
- [int16x4_t, poly16x4_t]
|
||||
- [uint16x4_t, poly16x4_t]
|
||||
- [int8x16_t, poly8x16_t]
|
||||
- [uint8x16_t, poly8x16_t]
|
||||
- [int16x8_t, poly16x8_t]
|
||||
- [uint16x8_t, poly16x8_t]
|
||||
- [float32x2_t, int32x2_t]
|
||||
- [float32x4_t, int32x4_t]
|
||||
- [float32x2_t, uint32x2_t]
|
||||
- [float32x4_t, uint32x4_t]
|
||||
- [int32x2_t, float32x2_t]
|
||||
- [int32x4_t, float32x4_t]
|
||||
- [uint32x2_t, float32x2_t]
|
||||
- [uint32x4_t, float32x4_t]
|
||||
compose:
|
||||
- FnCall: [transmute, [a]]
|
||||
|
||||
- name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}"
|
||||
doc: Vector reinterpret cast operation
|
||||
arguments: ["a: {type[0]}"]
|
||||
return_type: "{type[1]}"
|
||||
attr:
|
||||
- *neon-v7
|
||||
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]]
|
||||
- *neon-not-arm-stable-fp16
|
||||
- *neon-cfg-arm-unstable
|
||||
- *target-not-arm64ec
|
||||
safety: safe
|
||||
types:
|
||||
# non-q
|
||||
- [float32x2_t, float16x4_t]
|
||||
- [poly8x8_t, float16x4_t]
|
||||
- [int8x8_t, float16x4_t]
|
||||
- [int32x2_t, float16x4_t]
|
||||
- [int64x1_t, float16x4_t]
|
||||
- [uint8x8_t, float16x4_t]
|
||||
- [uint32x2_t, float16x4_t]
|
||||
- [uint64x1_t, float16x4_t]
|
||||
- [float16x4_t, float32x2_t]
|
||||
- [float16x4_t, poly8x8_t]
|
||||
- [float16x4_t, int8x8_t]
|
||||
- [float16x4_t, int32x2_t]
|
||||
- [float16x4_t, int64x1_t]
|
||||
- [float16x4_t, uint8x8_t]
|
||||
- [float16x4_t, uint32x2_t]
|
||||
- [float16x4_t, uint64x1_t]
|
||||
# q
|
||||
- [float32x4_t, float16x8_t]
|
||||
- [poly8x16_t, float16x8_t]
|
||||
- [int8x16_t, float16x8_t]
|
||||
- [int32x4_t, float16x8_t]
|
||||
- [int64x2_t, float16x8_t]
|
||||
- [uint8x16_t, float16x8_t]
|
||||
- [uint32x4_t, float16x8_t]
|
||||
- [uint64x2_t, float16x8_t]
|
||||
- [float16x8_t, float32x4_t]
|
||||
- [float16x8_t, poly8x16_t]
|
||||
- [float16x8_t, int8x16_t]
|
||||
- [float16x8_t, int32x4_t]
|
||||
- [float16x8_t, int64x2_t]
|
||||
- [float16x8_t, uint8x16_t]
|
||||
- [float16x8_t, uint32x4_t]
|
||||
- [float16x8_t, uint64x2_t]
|
||||
compose:
|
||||
- FnCall: [transmute, [a]]
|
||||
|
||||
|
||||
- name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}"
|
||||
doc: Vector reinterpret cast operation
|
||||
@@ -8809,51 +8864,19 @@ intrinsics:
|
||||
safety: safe
|
||||
types:
|
||||
# non-q
|
||||
- [float32x2_t, float16x4_t]
|
||||
- [poly16x4_t, float16x4_t]
|
||||
- [poly8x8_t, float16x4_t]
|
||||
- [int8x8_t, float16x4_t]
|
||||
- [int16x4_t, float16x4_t]
|
||||
- [int32x2_t, float16x4_t]
|
||||
- [int64x1_t, float16x4_t]
|
||||
- [uint8x8_t, float16x4_t]
|
||||
- [uint16x4_t, float16x4_t]
|
||||
- [uint32x2_t, float16x4_t]
|
||||
- [uint64x1_t, float16x4_t]
|
||||
- [float16x4_t, float32x2_t]
|
||||
- [float16x4_t, poly16x4_t]
|
||||
- [float16x4_t, poly8x8_t]
|
||||
- [float16x4_t, int8x8_t]
|
||||
- [float16x4_t, int16x4_t]
|
||||
- [float16x4_t, int32x2_t]
|
||||
- [float16x4_t, int64x1_t]
|
||||
- [float16x4_t, uint8x8_t]
|
||||
- [float16x4_t, uint16x4_t]
|
||||
- [float16x4_t, uint32x2_t]
|
||||
- [float16x4_t, uint64x1_t]
|
||||
# q
|
||||
- [float32x4_t, float16x8_t]
|
||||
- [poly16x8_t, float16x8_t]
|
||||
- [poly8x16_t, float16x8_t]
|
||||
- [int8x16_t, float16x8_t]
|
||||
- [int16x8_t, float16x8_t]
|
||||
- [int32x4_t, float16x8_t]
|
||||
- [int64x2_t, float16x8_t]
|
||||
- [uint8x16_t, float16x8_t]
|
||||
- [uint16x8_t, float16x8_t]
|
||||
- [uint32x4_t, float16x8_t]
|
||||
- [uint64x2_t, float16x8_t]
|
||||
- [float16x8_t, float32x4_t]
|
||||
- [float16x8_t, poly16x8_t]
|
||||
- [float16x8_t, poly8x16_t]
|
||||
- [float16x8_t, int8x16_t]
|
||||
- [float16x8_t, int16x8_t]
|
||||
- [float16x8_t, int32x4_t]
|
||||
- [float16x8_t, int64x2_t]
|
||||
- [float16x8_t, uint8x16_t]
|
||||
- [float16x8_t, uint16x8_t]
|
||||
- [float16x8_t, uint32x4_t]
|
||||
- [float16x8_t, uint64x2_t]
|
||||
compose:
|
||||
- FnCall: [transmute, [a]]
|
||||
|
||||
@@ -8894,6 +8917,7 @@ intrinsics:
|
||||
- *neon-cfg-arm-unstable
|
||||
- *target-not-arm64ec
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [float16x4_t, "[3, 2, 1, 0]"]
|
||||
- [float16x8_t, "[3, 2, 1, 0, 7, 6, 5, 4]"]
|
||||
@@ -9246,6 +9270,7 @@ intrinsics:
|
||||
- *neon-cfg-arm-unstable
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- ["i8", int8x8_t, '3']
|
||||
- ["i16", int16x4_t, '2']
|
||||
@@ -9286,6 +9311,7 @@ intrinsics:
|
||||
- *target-not-arm64ec
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- ["f16", float16x4_t, '2']
|
||||
- ["f16", float16x8_t, '3']
|
||||
@@ -9307,6 +9333,7 @@ intrinsics:
|
||||
- *neon-cfg-arm-unstable
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- ["i64", int64x1_t, int64x1_t]
|
||||
- ["u64", uint64x1_t, uint64x1_t]
|
||||
@@ -9328,6 +9355,7 @@ intrinsics:
|
||||
- *neon-cfg-arm-unstable
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- ["p64", poly64x1_t, poly64x1_t]
|
||||
compose:
|
||||
@@ -9348,6 +9376,7 @@ intrinsics:
|
||||
- *neon-cfg-arm-unstable
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- ["p64", poly64x2_t, poly64x2_t]
|
||||
compose:
|
||||
@@ -9674,6 +9703,7 @@ intrinsics:
|
||||
- *neon-not-arm-stable
|
||||
- *neon-cfg-arm-unstable
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [int8x8_t, int8x8x2_t, '[0, 8, 2, 10, 4, 12, 6, 14]', '[1, 9, 3, 11, 5, 13, 7, 15]']
|
||||
- [int16x4_t, int16x4x2_t, '[0, 4, 2, 6]', '[1, 5, 3, 7]']
|
||||
@@ -9718,6 +9748,7 @@ intrinsics:
|
||||
- *neon-cfg-arm-unstable
|
||||
- *target-not-arm64ec
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [float16x4_t, float16x4x2_t, '[0, 4, 2, 6]', '[1, 5, 3, 7]']
|
||||
- [float16x8_t, float16x8x2_t, '[0, 8, 2, 10, 4, 12, 6, 14]', '[1, 9, 3, 11, 5, 13, 7, 15]']
|
||||
@@ -9747,6 +9778,7 @@ intrinsics:
|
||||
- *neon-not-arm-stable
|
||||
- *neon-cfg-arm-unstable
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [int32x2_t, int32x2x2_t, '[0, 2]', '[1, 3]']
|
||||
- [uint32x2_t, uint32x2x2_t, '[0, 2]', '[1, 3]']
|
||||
@@ -9776,6 +9808,7 @@ intrinsics:
|
||||
- *neon-not-arm-stable
|
||||
- *neon-cfg-arm-unstable
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [int8x16_t, int8x16x2_t, '[0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]', '[8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]']
|
||||
- [int16x8_t, int16x8x2_t, '[0, 8, 1, 9, 2, 10, 3, 11]', '[4, 12, 5, 13, 6, 14, 7, 15]']
|
||||
@@ -9811,6 +9844,7 @@ intrinsics:
|
||||
- *neon-not-arm-stable
|
||||
- *neon-cfg-arm-unstable
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [int32x2_t, int32x2x2_t, '[0, 2]', '[1, 3]']
|
||||
- [uint32x2_t, uint32x2x2_t, '[0, 2]', '[1, 3]']
|
||||
@@ -9840,6 +9874,7 @@ intrinsics:
|
||||
- *neon-not-arm-stable
|
||||
- *neon-cfg-arm-unstable
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [int8x8_t, int8x8x2_t, '[0, 8, 1, 9, 2, 10, 3, 11]', '[4, 12, 5, 13, 6, 14, 7, 15]']
|
||||
- [int16x4_t, int16x4x2_t, '[0, 4, 1, 5]', '[2, 6, 3, 7]']
|
||||
@@ -9875,6 +9910,7 @@ intrinsics:
|
||||
- *neon-cfg-arm-unstable
|
||||
- *target-not-arm64ec
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [float16x4_t, float16x4x2_t, '[0, 4, 1, 5]', '[2, 6, 3, 7]']
|
||||
- [float16x8_t, float16x8x2_t, '[0, 8, 1, 9, 2, 10, 3, 11]', '[4, 12, 5, 13, 6, 14, 7, 15]']
|
||||
@@ -9903,6 +9939,7 @@ intrinsics:
|
||||
- *neon-not-arm-stable
|
||||
- *neon-cfg-arm-unstable
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [int8x8_t, int8x8x2_t, '[0, 2, 4, 6, 8, 10, 12, 14]', '[1, 3, 5, 7, 9, 11, 13, 15]']
|
||||
- [int16x4_t, int16x4x2_t, '[0, 2, 4, 6]', '[1, 3, 5, 7]']
|
||||
@@ -9947,6 +9984,7 @@ intrinsics:
|
||||
- *neon-cfg-arm-unstable
|
||||
- *target-not-arm64ec
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [float16x4_t, float16x4x2_t, '[0, 2, 4, 6]', '[1, 3, 5, 7]']
|
||||
- [float16x8_t, float16x8x2_t, '[0, 2, 4, 6, 8, 10, 12, 14]', '[1, 3, 5, 7, 9, 11, 13, 15]']
|
||||
@@ -9976,6 +10014,7 @@ intrinsics:
|
||||
- *neon-not-arm-stable
|
||||
- *neon-cfg-arm-unstable
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [float32x2_t, float32x2x2_t, '[0, 2]', '[1, 3]']
|
||||
- [int32x2_t, int32x2x2_t, '[0, 2]', '[1, 3]']
|
||||
@@ -10909,21 +10948,21 @@ intrinsics:
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [_lane_s16, int16x4_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [_laneq_s16, int16x4_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [q_lane_s16, int16x8_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [q_laneq_s16, int16x8_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [_lane_u16, uint16x4_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [_laneq_u16, uint16x4_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [q_lane_u16, uint16x8_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [q_laneq_u16, uint16x8_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [_lane_s16, int16x4_t, int16x4_t, '2']
|
||||
- [_laneq_s16, int16x4_t, int16x8_t, '3']
|
||||
- [q_lane_s16, int16x8_t, int16x4_t, '2']
|
||||
- [q_laneq_s16, int16x8_t, int16x8_t, '3']
|
||||
- [_lane_u16, uint16x4_t, uint16x4_t, '2']
|
||||
- [_laneq_u16, uint16x4_t, uint16x8_t, '3']
|
||||
- [q_lane_u16, uint16x8_t, uint16x4_t, '2']
|
||||
- [q_laneq_u16, uint16x8_t, uint16x8_t, '3']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
|
||||
- FnCall:
|
||||
- "vmla{neon_type[1].no}"
|
||||
- - a
|
||||
- b
|
||||
- FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]
|
||||
- FnCall: ['vdup{type[0]}', [c], [LANE]]
|
||||
|
||||
- name: "vmla{type[0]}"
|
||||
doc: "Vector multiply accumulate with scalar"
|
||||
@@ -10939,21 +10978,21 @@ intrinsics:
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [_lane_s32, int32x2_t, int32x2_t, '1', '[LANE as u32, LANE as u32]']
|
||||
- [_laneq_s32, int32x2_t, int32x4_t, '2', '[LANE as u32, LANE as u32]']
|
||||
- [q_lane_s32, int32x4_t, int32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [q_laneq_s32, int32x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [_lane_u32, uint32x2_t, uint32x2_t, '1', '[LANE as u32, LANE as u32]']
|
||||
- [_laneq_u32, uint32x2_t, uint32x4_t, '2', '[LANE as u32, LANE as u32]']
|
||||
- [q_lane_u32, uint32x4_t, uint32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [q_laneq_u32, uint32x4_t, uint32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [_lane_s32, int32x2_t, int32x2_t, '1']
|
||||
- [_laneq_s32, int32x2_t, int32x4_t, '2']
|
||||
- [q_lane_s32, int32x4_t, int32x2_t, '1']
|
||||
- [q_laneq_s32, int32x4_t, int32x4_t, '2']
|
||||
- [_lane_u32, uint32x2_t, uint32x2_t, '1']
|
||||
- [_laneq_u32, uint32x2_t, uint32x4_t, '2']
|
||||
- [q_lane_u32, uint32x4_t, uint32x2_t, '1']
|
||||
- [q_laneq_u32, uint32x4_t, uint32x4_t, '2']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
|
||||
- FnCall:
|
||||
- "vmla{neon_type[1].no}"
|
||||
- - a
|
||||
- b
|
||||
- FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]
|
||||
- FnCall: ['vdup{type[0]}', [c], [LANE]]
|
||||
|
||||
- name: "vmla{type[0]}"
|
||||
doc: "Vector multiply accumulate with scalar"
|
||||
@@ -10969,17 +11008,17 @@ intrinsics:
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [_lane_f32, float32x2_t, float32x2_t, '1', '[LANE as u32, LANE as u32]']
|
||||
- [_laneq_f32, float32x2_t, float32x4_t, '2', '[LANE as u32, LANE as u32]']
|
||||
- [q_lane_f32, float32x4_t, float32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [q_laneq_f32, float32x4_t, float32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [_lane_f32, float32x2_t, float32x2_t, '1']
|
||||
- [_laneq_f32, float32x2_t, float32x4_t, '2']
|
||||
- [q_lane_f32, float32x4_t, float32x2_t, '1']
|
||||
- [q_laneq_f32, float32x4_t, float32x4_t, '2']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
|
||||
- FnCall:
|
||||
- "vmla{neon_type[1].no}"
|
||||
- - a
|
||||
- b
|
||||
- FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]
|
||||
- FnCall: ['vdup{type[0]}', [c], [LANE]]
|
||||
|
||||
- name: "vmls{neon_type[0].N}"
|
||||
doc: "Vector multiply subtract with scalar"
|
||||
@@ -11058,21 +11097,21 @@ intrinsics:
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [_lane_s16, int16x4_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [_laneq_s16, int16x4_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [q_lane_s16, int16x8_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [q_laneq_s16, int16x8_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [_lane_u16, uint16x4_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [_laneq_u16, uint16x4_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [q_lane_u16, uint16x8_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [q_laneq_u16, uint16x8_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [_lane_s16, int16x4_t, int16x4_t, '2']
|
||||
- [_laneq_s16, int16x4_t, int16x8_t, '3']
|
||||
- [q_lane_s16, int16x8_t, int16x4_t, '2']
|
||||
- [q_laneq_s16, int16x8_t, int16x8_t, '3']
|
||||
- [_lane_u16, uint16x4_t, uint16x4_t, '2']
|
||||
- [_laneq_u16, uint16x4_t, uint16x8_t, '3']
|
||||
- [q_lane_u16, uint16x8_t, uint16x4_t, '2']
|
||||
- [q_laneq_u16, uint16x8_t, uint16x8_t, '3']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
|
||||
- FnCall:
|
||||
- "vmls{neon_type[1].no}"
|
||||
- - a
|
||||
- b
|
||||
- FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]
|
||||
- FnCall: ['vdup{type[0]}', [c], [LANE]]
|
||||
|
||||
- name: "vmls{type[0]}"
|
||||
doc: "Vector multiply subtract with scalar"
|
||||
@@ -11088,21 +11127,21 @@ intrinsics:
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [_lane_s32, int32x2_t, int32x2_t, '1', '[LANE as u32, LANE as u32]']
|
||||
- [_laneq_s32, int32x2_t, int32x4_t, '2', '[LANE as u32, LANE as u32]']
|
||||
- [q_lane_s32, int32x4_t, int32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [q_laneq_s32, int32x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [_lane_u32, uint32x2_t, uint32x2_t, '1', '[LANE as u32, LANE as u32]']
|
||||
- [_laneq_u32, uint32x2_t, uint32x4_t, '2', '[LANE as u32, LANE as u32]']
|
||||
- [q_lane_u32, uint32x4_t, uint32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [q_laneq_u32, uint32x4_t, uint32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [_lane_s32, int32x2_t, int32x2_t, '1']
|
||||
- [_laneq_s32, int32x2_t, int32x4_t, '2']
|
||||
- [q_lane_s32, int32x4_t, int32x2_t, '1']
|
||||
- [q_laneq_s32, int32x4_t, int32x4_t, '2']
|
||||
- [_lane_u32, uint32x2_t, uint32x2_t, '1']
|
||||
- [_laneq_u32, uint32x2_t, uint32x4_t, '2']
|
||||
- [q_lane_u32, uint32x4_t, uint32x2_t, '1']
|
||||
- [q_laneq_u32, uint32x4_t, uint32x4_t, '2']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
|
||||
- FnCall:
|
||||
- "vmls{neon_type[1].no}"
|
||||
- - a
|
||||
- b
|
||||
- FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]
|
||||
- FnCall: ['vdup{type[0]}', [c], [LANE]]
|
||||
|
||||
- name: "vmls{type[0]}"
|
||||
doc: "Vector multiply subtract with scalar"
|
||||
@@ -11118,17 +11157,17 @@ intrinsics:
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [_lane_f32, float32x2_t, float32x2_t, '1', '[LANE as u32, LANE as u32]']
|
||||
- [_laneq_f32, float32x2_t, float32x4_t, '2', '[LANE as u32, LANE as u32]']
|
||||
- [q_lane_f32, float32x4_t, float32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [q_laneq_f32, float32x4_t, float32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [_lane_f32, float32x2_t, float32x2_t, '1']
|
||||
- [_laneq_f32, float32x2_t, float32x4_t, '2']
|
||||
- [q_lane_f32, float32x4_t, float32x2_t, '1']
|
||||
- [q_laneq_f32, float32x4_t, float32x4_t, '2']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
|
||||
- FnCall:
|
||||
- "vmls{neon_type[1].no}"
|
||||
- - a
|
||||
- b
|
||||
- FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]
|
||||
- FnCall: ['vdup{type[0]}', [c], [LANE]]
|
||||
|
||||
- name: "vmul{neon_type[0].N}"
|
||||
doc: "Vector multiply by scalar"
|
||||
@@ -11213,16 +11252,16 @@ intrinsics:
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [float32x2_t, float32x2_t, '_lane_f32', '1', '[LANE as u32, LANE as u32]']
|
||||
- [float32x2_t, float32x4_t, '_laneq_f32', '2', '[LANE as u32, LANE as u32]']
|
||||
- [float32x4_t, float32x2_t, 'q_lane_f32', '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [float32x4_t, float32x4_t, 'q_laneq_f32', '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [float32x2_t, float32x2_t, '_lane_f32', '1']
|
||||
- [float32x2_t, float32x4_t, '_laneq_f32', '2']
|
||||
- [float32x4_t, float32x2_t, 'q_lane_f32', '1']
|
||||
- [float32x4_t, float32x4_t, 'q_laneq_f32', '2']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
|
||||
- FnCall:
|
||||
- simd_mul
|
||||
- - a
|
||||
- FnCall: [simd_shuffle!, [b, b, "{type[4]}"]]
|
||||
- FnCall: ['vdup{type[2]}', [b], [LANE]]
|
||||
|
||||
- name: "vqrdmulh{type[0]}"
|
||||
doc: "Vector rounding saturating doubling multiply high by scalar"
|
||||
@@ -11238,17 +11277,17 @@ intrinsics:
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [_lane_s16, int16x4_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [_laneq_s16, int16x4_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [q_lane_s16, int16x8_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [q_laneq_s16, int16x8_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [_lane_s32, int32x2_t, int32x2_t, '1', '[LANE as u32, LANE as u32]']
|
||||
- [_laneq_s32, int32x2_t, int32x4_t, '2', '[LANE as u32, LANE as u32]']
|
||||
- [q_lane_s32, int32x4_t, int32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [q_laneq_s32, int32x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [_lane_s16, int16x4_t, int16x4_t, '2']
|
||||
- [_laneq_s16, int16x4_t, int16x8_t, '3']
|
||||
- [q_lane_s16, int16x8_t, int16x4_t, '2']
|
||||
- [q_laneq_s16, int16x8_t, int16x8_t, '3']
|
||||
- [_lane_s32, int32x2_t, int32x2_t, '1']
|
||||
- [_laneq_s32, int32x2_t, int32x4_t, '2']
|
||||
- [q_lane_s32, int32x4_t, int32x2_t, '1']
|
||||
- [q_laneq_s32, int32x4_t, int32x4_t, '2']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]]
|
||||
- Let: [b, "{neon_type[1]}", {FnCall: [simd_shuffle!, [b, b, '{type[4]}']]}]
|
||||
- Let: [b, {FnCall: ['vdup{type[0]}', [b], [LANE]]}]
|
||||
- FnCall: ["vqrdmulh{neon_type[1].no}", [a, b]]
|
||||
|
||||
- name: "vqrdmulh{neon_type[0].N}"
|
||||
@@ -11365,16 +11404,16 @@ intrinsics:
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [int16x4_t, int16x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [int16x4_t, int16x8_t, int32x4_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [int32x2_t, int32x2_t, int64x2_t, '1', '[LANE as u32, LANE as u32]']
|
||||
- [int32x2_t, int32x4_t, int64x2_t, '2', '[LANE as u32, LANE as u32]']
|
||||
- [int16x4_t, int16x4_t, int32x4_t, '2']
|
||||
- [int16x4_t, int16x8_t, int32x4_t, '3']
|
||||
- [int32x2_t, int32x2_t, int64x2_t, '1']
|
||||
- [int32x2_t, int32x4_t, int64x2_t, '2']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]]
|
||||
- FnCall:
|
||||
- "vmull_{neon_type[0]}"
|
||||
- - a
|
||||
- FnCall: [simd_shuffle!, [b, b, "{type[4]}"]]
|
||||
- FnCall: ['vdup_lane{neon_type[1].nox}', [b], [LANE]]
|
||||
|
||||
- name: "vmull_lane{neon_type[1].no}"
|
||||
doc: "Vector long multiply by scalar"
|
||||
@@ -11390,16 +11429,16 @@ intrinsics:
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
types:
|
||||
- [uint16x4_t, uint16x4_t, uint32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [uint16x4_t, uint16x8_t, uint32x4_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
|
||||
- [uint32x2_t, uint32x2_t, uint64x2_t, '1', '[LANE as u32, LANE as u32]']
|
||||
- [uint32x2_t, uint32x4_t, uint64x2_t, '2', '[LANE as u32, LANE as u32]']
|
||||
- [uint16x4_t, uint16x4_t, uint32x4_t, '2']
|
||||
- [uint16x4_t, uint16x8_t, uint32x4_t, '3']
|
||||
- [uint32x2_t, uint32x2_t, uint64x2_t, '1']
|
||||
- [uint32x2_t, uint32x4_t, uint64x2_t, '2']
|
||||
compose:
|
||||
- FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]]
|
||||
- FnCall:
|
||||
- "vmull_{neon_type[0]}"
|
||||
- - a
|
||||
- FnCall: [simd_shuffle!, [b, b, "{type[4]}"]]
|
||||
- FnCall: ['vdup_lane{neon_type[1].nox}', [b], [LANE]]
|
||||
|
||||
- name: "vfms{neon_type[0].N}"
|
||||
doc: "Floating-point fused Multiply-subtract to accumulator(vector)"
|
||||
@@ -11469,7 +11508,7 @@ intrinsics:
|
||||
- - a
|
||||
- FnCall:
|
||||
- "vdup{neon_type[0].N}"
|
||||
- - FnCall: [simd_extract!, [b, 'LANE as u32']]
|
||||
- - FnCall: ['vget{neon_type[1].lane_nox}', [b], [LANE]]
|
||||
|
||||
- name: "vrecpe{neon_type.no}"
|
||||
doc: "Unsigned reciprocal estimate"
|
||||
@@ -12495,6 +12534,7 @@ intrinsics:
|
||||
- *neon-not-arm-stable
|
||||
- *neon-cfg-arm-unstable
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [float32x2_t, float32x4_t, '[0, 1, 2, 3]']
|
||||
- [poly8x8_t, poly8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
|
||||
@@ -12522,6 +12562,7 @@ intrinsics:
|
||||
- *neon-cfg-arm-unstable
|
||||
- FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- uint8x16_t
|
||||
compose:
|
||||
@@ -12544,6 +12585,7 @@ intrinsics:
|
||||
- *neon-cfg-arm-unstable
|
||||
- FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- uint8x16_t
|
||||
compose:
|
||||
@@ -12566,6 +12608,7 @@ intrinsics:
|
||||
- *neon-cfg-arm-unstable
|
||||
- FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [uint8x16_t, "aesmc"]
|
||||
compose:
|
||||
@@ -12588,6 +12631,7 @@ intrinsics:
|
||||
- *neon-cfg-arm-unstable
|
||||
- FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [uint8x16_t, "aesimc"]
|
||||
compose:
|
||||
@@ -12610,6 +12654,7 @@ intrinsics:
|
||||
- *neon-cfg-arm-unstable
|
||||
- FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [u32, "sha1h"]
|
||||
compose:
|
||||
@@ -12632,6 +12677,7 @@ intrinsics:
|
||||
- *neon-cfg-arm-unstable
|
||||
- FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [u32, "sha1c", "uint32x4_t"]
|
||||
compose:
|
||||
@@ -12654,6 +12700,7 @@ intrinsics:
|
||||
- *neon-cfg-arm-unstable
|
||||
- FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [u32, "sha1m", "uint32x4_t"]
|
||||
compose:
|
||||
@@ -12676,6 +12723,7 @@ intrinsics:
|
||||
- *neon-cfg-arm-unstable
|
||||
- FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [u32, "sha1p", "uint32x4_t"]
|
||||
compose:
|
||||
@@ -12698,6 +12746,7 @@ intrinsics:
|
||||
- *neon-cfg-arm-unstable
|
||||
- FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [uint32x4_t, "sha1su0"]
|
||||
compose:
|
||||
@@ -12720,6 +12769,7 @@ intrinsics:
|
||||
- *neon-cfg-arm-unstable
|
||||
- FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [uint32x4_t, "sha1su1"]
|
||||
compose:
|
||||
@@ -12742,6 +12792,7 @@ intrinsics:
|
||||
- *neon-cfg-arm-unstable
|
||||
- FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [uint32x4_t, "sha256h"]
|
||||
compose:
|
||||
@@ -12764,6 +12815,7 @@ intrinsics:
|
||||
- *neon-cfg-arm-unstable
|
||||
- FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [uint32x4_t, "sha256h2"]
|
||||
compose:
|
||||
@@ -12786,6 +12838,7 @@ intrinsics:
|
||||
- *neon-cfg-arm-unstable
|
||||
- FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [uint32x4_t, "sha256su0"]
|
||||
compose:
|
||||
@@ -12808,6 +12861,7 @@ intrinsics:
|
||||
- *neon-cfg-arm-unstable
|
||||
- FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [uint32x4_t, "sha256su1"]
|
||||
compose:
|
||||
@@ -13045,6 +13099,7 @@ intrinsics:
|
||||
- *neon-not-arm-stable
|
||||
- *neon-cfg-arm-unstable
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- int8x8_t
|
||||
- int16x4_t
|
||||
@@ -13069,6 +13124,7 @@ intrinsics:
|
||||
- *neon-not-arm-stable
|
||||
- *neon-cfg-arm-unstable
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- uint8x8_t
|
||||
- uint16x4_t
|
||||
@@ -13093,6 +13149,7 @@ intrinsics:
|
||||
- *neon-not-arm-stable
|
||||
- *neon-cfg-arm-unstable
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- float32x2_t
|
||||
compose:
|
||||
@@ -13115,6 +13172,7 @@ intrinsics:
|
||||
- *neon-not-arm-stable
|
||||
- *neon-cfg-arm-unstable
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- int8x8_t
|
||||
- int16x4_t
|
||||
@@ -13139,6 +13197,7 @@ intrinsics:
|
||||
- *neon-not-arm-stable
|
||||
- *neon-cfg-arm-unstable
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- uint8x8_t
|
||||
- uint16x4_t
|
||||
@@ -13163,6 +13222,7 @@ intrinsics:
|
||||
- *neon-not-arm-stable
|
||||
- *neon-cfg-arm-unstable
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- float32x2_t
|
||||
compose:
|
||||
@@ -13228,14 +13288,14 @@ intrinsics:
|
||||
attr:
|
||||
- *neon-v7
|
||||
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"{type[3]}"']]}]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [raddhn2]]}]]
|
||||
- FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, {FnCall: [assert_instr, [raddhn2]]}]]
|
||||
- *neon-not-arm-stable
|
||||
- *neon-cfg-arm-unstable
|
||||
safety: safe
|
||||
types:
|
||||
- [uint8x8_t , uint16x8_t, uint8x16_t, 'vraddhn.i16', int16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
|
||||
- [uint16x4_t, uint32x4_t, uint16x8_t, 'vraddhn.i32', int32x4_t, '[0, 1, 2, 3, 4, 5, 6, 7]']
|
||||
- [uint32x2_t, uint64x2_t, uint32x4_t, 'vraddhn.i64', int64x2_t, '[0, 1, 2, 3]']
|
||||
- [uint8x8_t , uint16x8_t, uint8x16_t, 'vraddhn.i16', int16x8_t]
|
||||
- [uint16x4_t, uint32x4_t, uint16x8_t, 'vraddhn.i32', int32x4_t]
|
||||
- [uint32x2_t, uint64x2_t, uint32x4_t, 'vraddhn.i64', int64x2_t]
|
||||
compose:
|
||||
- Let:
|
||||
- x
|
||||
@@ -13246,7 +13306,7 @@ intrinsics:
|
||||
- "vraddhn{neon_type[4].noq}"
|
||||
- - FnCall: [transmute, [b]]
|
||||
- FnCall: [transmute, [c]]
|
||||
- FnCall: ["simd_shuffle!", [a, x, '{type[5]}']]
|
||||
- FnCall: ['vcombine_{neon_type[0]}', [a, x]]
|
||||
|
||||
- name: "vraddhn_high{neon_type[1].noq}"
|
||||
doc: "Rounding Add returning High Narrow (high half)."
|
||||
@@ -13255,14 +13315,14 @@ intrinsics:
|
||||
attr:
|
||||
- *neon-v7
|
||||
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"{type[3]}"']]}]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [raddhn2]]}]]
|
||||
- FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, {FnCall: [assert_instr, [raddhn2]]}]]
|
||||
- *neon-not-arm-stable
|
||||
- *neon-cfg-arm-unstable
|
||||
safety: safe
|
||||
types:
|
||||
- [int8x8_t , int16x8_t, int8x16_t, 'vraddhn.i16', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
|
||||
- [int16x4_t, int32x4_t, int16x8_t, 'vraddhn.i32', '[0, 1, 2, 3, 4, 5, 6, 7]']
|
||||
- [int32x2_t, int64x2_t, int32x4_t, 'vraddhn.i64', '[0, 1, 2, 3]']
|
||||
- [int8x8_t , int16x8_t, int8x16_t, 'vraddhn.i16']
|
||||
- [int16x4_t, int32x4_t, int16x8_t, 'vraddhn.i32']
|
||||
- [int32x2_t, int64x2_t, int32x4_t, 'vraddhn.i64']
|
||||
compose:
|
||||
- Let:
|
||||
- x
|
||||
@@ -13270,7 +13330,7 @@ intrinsics:
|
||||
- "vraddhn{neon_type[1].noq}"
|
||||
- - b
|
||||
- c
|
||||
- FnCall: ["simd_shuffle!", [a, x, '{type[4]}']]
|
||||
- FnCall: ['vcombine_{neon_type[0]}', [a, x]]
|
||||
|
||||
- name: "vpadd{neon_type.no}"
|
||||
doc: "Add pairwise."
|
||||
@@ -13283,6 +13343,7 @@ intrinsics:
|
||||
- *neon-not-arm-stable
|
||||
- *neon-cfg-arm-unstable
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- int8x8_t
|
||||
- int16x4_t
|
||||
@@ -13300,7 +13361,6 @@ intrinsics:
|
||||
doc: "Add pairwise."
|
||||
arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
|
||||
return_type: "{neon_type[0]}"
|
||||
big_endian_inverse: false
|
||||
attr:
|
||||
- *neon-v7
|
||||
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vpadd]]}]]
|
||||
@@ -14134,6 +14194,7 @@ intrinsics:
|
||||
- *target-not-arm64ec
|
||||
assert_instr: [nop]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [float16x4_t, float16x8_t]
|
||||
compose:
|
||||
@@ -14151,6 +14212,7 @@ intrinsics:
|
||||
- *target-not-arm64ec
|
||||
assert_instr: [nop]
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [float16x4_t, float16x8_t, 'low', "[0, 1, 2, 3]"]
|
||||
- [float16x4_t, float16x8_t, 'high', "[4, 5, 6, 7]"]
|
||||
@@ -14171,6 +14233,7 @@ intrinsics:
|
||||
- *target-not-arm64ec
|
||||
static_defs: ['const LANE: i32']
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- [float16x4_t, f16, '_lane_f16', '2']
|
||||
- [float16x8_t, f16, 'q_lane_f16', '3']
|
||||
@@ -14325,7 +14388,6 @@ intrinsics:
|
||||
doc: "Load one single-element structure and Replicate to all lanes (of one register)."
|
||||
arguments: ["ptr: {type[1]}"]
|
||||
return_type: "{neon_type[2]}"
|
||||
big_endian_inverse: false
|
||||
attr:
|
||||
- *neon-v7
|
||||
- FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ['"{type[3]}"']] } ]]
|
||||
@@ -14481,26 +14543,26 @@ intrinsics:
|
||||
attr:
|
||||
- *neon-v7
|
||||
- FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ['{type[3]}']] } ]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, { FnCall: [assert_instr, ['{type[4]}']]}] ]
|
||||
- FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, { FnCall: [assert_instr, ['{type[4]}']]}] ]
|
||||
- *neon-not-arm-stable
|
||||
- *neon-cfg-arm-unstable
|
||||
safety: safe
|
||||
types:
|
||||
- ['vaddl_high_s8', 'int8x16_t', 'int16x8_t', 'vaddl', 'saddl2', 'int8x8_t', '[8, 9, 10, 11, 12, 13, 14, 15]']
|
||||
- ['vaddl_high_s16', 'int16x8_t', 'int32x4_t', 'vaddl', 'saddl2', 'int16x4_t', '[4, 5, 6, 7]']
|
||||
- ['vaddl_high_s32', 'int32x4_t', 'int64x2_t', 'vaddl', 'saddl2', 'int32x2_t', '[2, 3]']
|
||||
- ['vaddl_high_u8', 'uint8x16_t', 'uint16x8_t', 'vaddl', 'uaddl2', 'uint8x8_t', '[8, 9, 10, 11, 12, 13, 14, 15]']
|
||||
- ['vaddl_high_u16', 'uint16x8_t', 'uint32x4_t', 'vaddl', 'uaddl2', 'uint16x4_t', '[4, 5, 6, 7]']
|
||||
- ['vaddl_high_u32', 'uint32x4_t', 'uint64x2_t', 'vaddl', 'uaddl2', 'uint32x2_t', '[2, 3]']
|
||||
- ['vaddl_high_s8', 'int8x16_t', 'int16x8_t', 'vaddl', 'saddl2', 'int8x8_t']
|
||||
- ['vaddl_high_s16', 'int16x8_t', 'int32x4_t', 'vaddl', 'saddl2', 'int16x4_t']
|
||||
- ['vaddl_high_s32', 'int32x4_t', 'int64x2_t', 'vaddl', 'saddl2', 'int32x2_t']
|
||||
- ['vaddl_high_u8', 'uint8x16_t', 'uint16x8_t', 'vaddl', 'uaddl2', 'uint8x8_t']
|
||||
- ['vaddl_high_u16', 'uint16x8_t', 'uint32x4_t', 'vaddl', 'uaddl2', 'uint16x4_t']
|
||||
- ['vaddl_high_u32', 'uint32x4_t', 'uint64x2_t', 'vaddl', 'uaddl2', 'uint32x2_t']
|
||||
compose:
|
||||
- Let:
|
||||
- a
|
||||
- '{neon_type[5]}'
|
||||
- FnCall: ['simd_shuffle!', [a, a, '{type[6]}']]
|
||||
- FnCall: ['vget_high_{neon_type[1]}', [a]]
|
||||
- Let:
|
||||
- b
|
||||
- '{neon_type[5]}'
|
||||
- FnCall: ['simd_shuffle!', [b, b, '{type[6]}']]
|
||||
- FnCall: ['vget_high_{neon_type[1]}', [b]]
|
||||
- Let: [a, '{neon_type[2]}', {FnCall: [simd_cast, [a]]}]
|
||||
- Let: [b, '{neon_type[2]}', {FnCall: [simd_cast, [b]]}]
|
||||
- FnCall: [simd_add, [a, b]]
|
||||
@@ -14537,22 +14599,21 @@ intrinsics:
|
||||
attr:
|
||||
- *neon-v7
|
||||
- FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ['{type[3]}']] } ]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, { FnCall: [assert_instr, ['{type[4]}']]}] ]
|
||||
- FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, { FnCall: [assert_instr, ['{type[4]}']]}] ]
|
||||
- *neon-not-arm-stable
|
||||
- *neon-cfg-arm-unstable
|
||||
safety: safe
|
||||
types:
|
||||
- ['vaddw_high_s8', 'int16x8_t', 'int8x16_t', 'vaddw', 'saddw2', 'int8x8_t', '[8, 9, 10, 11, 12, 13, 14, 15]']
|
||||
- ['vaddw_high_s16', 'int32x4_t', 'int16x8_t', 'vaddw', 'saddw2', 'int16x4_t', '[4, 5, 6, 7]']
|
||||
- ['vaddw_high_s32', 'int64x2_t', 'int32x4_t', 'vaddw', 'saddw2', 'int32x2_t', '[2, 3]']
|
||||
- ['vaddw_high_u8', 'uint16x8_t', 'uint8x16_t', 'vaddw', 'uaddw2', 'uint8x8_t', '[8, 9, 10, 11, 12, 13, 14, 15]']
|
||||
- ['vaddw_high_u16', 'uint32x4_t', 'uint16x8_t', 'vaddw', 'uaddw2', 'uint16x4_t', '[4, 5, 6, 7]']
|
||||
- ['vaddw_high_u32', 'uint64x2_t', 'uint32x4_t', 'vaddw', 'uaddw2', 'uint32x2_t', '[2, 3]']
|
||||
- ['vaddw_high_s8', 'int16x8_t', 'int8x16_t', 'vaddw', 'saddw2', 'int8x8_t']
|
||||
- ['vaddw_high_s16', 'int32x4_t', 'int16x8_t', 'vaddw', 'saddw2', 'int16x4_t']
|
||||
- ['vaddw_high_s32', 'int64x2_t', 'int32x4_t', 'vaddw', 'saddw2', 'int32x2_t']
|
||||
- ['vaddw_high_u8', 'uint16x8_t', 'uint8x16_t', 'vaddw', 'uaddw2', 'uint8x8_t']
|
||||
- ['vaddw_high_u16', 'uint32x4_t', 'uint16x8_t', 'vaddw', 'uaddw2', 'uint16x4_t']
|
||||
- ['vaddw_high_u32', 'uint64x2_t', 'uint32x4_t', 'vaddw', 'uaddw2', 'uint32x2_t']
|
||||
compose:
|
||||
- Let:
|
||||
- b
|
||||
- '{neon_type[5]}'
|
||||
- FnCall: ['simd_shuffle!', [b, b, '{type[6]}']]
|
||||
- FnCall: ['vget_high_{neon_type[2]}', [b]]
|
||||
- Let:
|
||||
- b
|
||||
- '{neon_type[1]}'
|
||||
@@ -14595,17 +14656,17 @@ intrinsics:
|
||||
attr:
|
||||
- *neon-v7
|
||||
- FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ['vaddhn']] } ]]
|
||||
- FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, { FnCall: [assert_instr, ['addhn2']]}] ]
|
||||
- FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, { FnCall: [assert_instr, ['addhn2']]}] ]
|
||||
- *neon-not-arm-stable
|
||||
- *neon-cfg-arm-unstable
|
||||
safety: safe
|
||||
types:
|
||||
- ['vaddhn_high_s16', 'int8x8_t', 'int16x8_t', 'int8x16_t', 'int16x8_t::splat(8)', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
|
||||
- ['vaddhn_high_s32', 'int16x4_t', 'int32x4_t', 'int16x8_t', 'int32x4_t::splat(16)', '[0, 1, 2, 3, 4, 5, 6, 7]']
|
||||
- ['vaddhn_high_s64', 'int32x2_t', 'int64x2_t', 'int32x4_t', 'int64x2_t::splat(32)', '[0, 1, 2, 3]']
|
||||
- ['vaddhn_high_u16', 'uint8x8_t', 'uint16x8_t', 'uint8x16_t', 'uint16x8_t::splat(8)', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
|
||||
- ['vaddhn_high_u32', 'uint16x4_t', 'uint32x4_t', 'uint16x8_t', 'uint32x4_t::splat(16)', '[0, 1, 2, 3, 4, 5, 6, 7]']
|
||||
- ['vaddhn_high_u64', 'uint32x2_t', 'uint64x2_t', 'uint32x4_t', 'uint64x2_t::splat(32)', '[0, 1, 2, 3]']
|
||||
- ['vaddhn_high_s16', 'int8x8_t', 'int16x8_t', 'int8x16_t', 'int16x8_t::splat(8)']
|
||||
- ['vaddhn_high_s32', 'int16x4_t', 'int32x4_t', 'int16x8_t', 'int32x4_t::splat(16)']
|
||||
- ['vaddhn_high_s64', 'int32x2_t', 'int64x2_t', 'int32x4_t', 'int64x2_t::splat(32)']
|
||||
- ['vaddhn_high_u16', 'uint8x8_t', 'uint16x8_t', 'uint8x16_t', 'uint16x8_t::splat(8)']
|
||||
- ['vaddhn_high_u32', 'uint16x4_t', 'uint32x4_t', 'uint16x8_t', 'uint32x4_t::splat(16)']
|
||||
- ['vaddhn_high_u64', 'uint32x2_t', 'uint64x2_t', 'uint32x4_t', 'uint64x2_t::splat(32)']
|
||||
compose:
|
||||
- Let:
|
||||
- x
|
||||
@@ -14618,7 +14679,7 @@ intrinsics:
|
||||
- - a
|
||||
- b
|
||||
- '{type[4]}'
|
||||
- FnCall: ['simd_shuffle!', [r, x, '{type[5]}']]
|
||||
- FnCall: ['vcombine_{neon_type[1]}', [r, x]]
|
||||
|
||||
- name: "{type[0]}"
|
||||
doc: "Vector narrow integer."
|
||||
@@ -14924,6 +14985,7 @@ intrinsics:
|
||||
arguments: ["v: {neon_type[1]}"]
|
||||
return_type: "{type[2]}"
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
static_defs: ['const IMM5: i32']
|
||||
attr:
|
||||
- *neon-v7
|
||||
@@ -14962,6 +15024,7 @@ intrinsics:
|
||||
arguments: ["v: {neon_type[1]}"]
|
||||
return_type: "{type[2]}"
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
static_defs: ['const IMM5: i32']
|
||||
attr:
|
||||
- *neon-v7
|
||||
@@ -15008,9 +15071,11 @@ intrinsics:
|
||||
- *neon-not-arm-stable
|
||||
- *neon-cfg-arm-unstable
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- ['vget_high_s64', 'int64x2_t', 'int64x1_t', 'vmov', 'ext', 'int64x1_t([simd_extract!(a, 1)])']
|
||||
- ['vget_high_u64', 'uint64x2_t', 'uint64x1_t', 'vmov', 'ext', 'uint64x1_t([simd_extract!(a, 1)])']
|
||||
- ['vget_high_s64', 'int64x2_t', 'int64x1_t', 'vmov', 'ext', 'int64x1_t([simd_extract!(a, 1)])']
|
||||
- ['vget_high_u64', 'uint64x2_t', 'uint64x1_t', 'vmov', 'ext', 'uint64x1_t([simd_extract!(a, 1)])']
|
||||
- ['vget_high_p64', 'poly64x2_t', 'poly64x1_t', 'vmov', 'ext', 'transmute(u64x1::new(simd_extract!(a, 1)))']
|
||||
compose:
|
||||
- Identifier: ['{type[5]}', UnsafeSymbol]
|
||||
|
||||
@@ -15024,9 +15089,11 @@ intrinsics:
|
||||
- *neon-not-arm-stable
|
||||
- *neon-cfg-arm-unstable
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- ['vget_low_s64', 'int64x2_t', 'int64x1_t', 'int64x1_t([simd_extract!(a, 0)])']
|
||||
- ['vget_low_u64', 'uint64x2_t', 'uint64x1_t', 'uint64x1_t([simd_extract!(a, 0)])']
|
||||
- ['vget_low_s64', 'int64x2_t', 'int64x1_t', 'int64x1_t([simd_extract!(a, 0)])']
|
||||
- ['vget_low_u64', 'uint64x2_t', 'uint64x1_t', 'uint64x1_t([simd_extract!(a, 0)])']
|
||||
- ['vget_low_p64', 'poly64x2_t', 'poly64x1_t', 'transmute(u64x1::new(simd_extract!(a, 0)))']
|
||||
compose:
|
||||
- Identifier: ['{type[3]}', UnsafeSymbol]
|
||||
|
||||
@@ -15041,6 +15108,7 @@ intrinsics:
|
||||
- *neon-not-arm-stable
|
||||
- *neon-cfg-arm-unstable
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- ['vget_high_s8', 'int8x16_t', 'int8x8_t', 'vmov', 'ext', '[8, 9, 10, 11, 12, 13, 14, 15]']
|
||||
- ['vget_high_u8', 'uint8x16_t', 'uint8x8_t', 'vmov', 'ext', '[8, 9, 10, 11, 12, 13, 14, 15]']
|
||||
@@ -15064,6 +15132,7 @@ intrinsics:
|
||||
- *neon-not-arm-stable
|
||||
- *neon-cfg-arm-unstable
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- ['vget_low_s8', 'int8x16_t', 'int8x8_t', '[0, 1, 2, 3, 4, 5, 6, 7]']
|
||||
- ['vget_low_u8', 'uint8x16_t', 'uint8x8_t','[0, 1, 2, 3, 4, 5, 6, 7]']
|
||||
@@ -15216,6 +15285,7 @@ intrinsics:
|
||||
- *neon-not-arm-stable
|
||||
- *neon-cfg-arm-unstable
|
||||
safety: safe
|
||||
big_endian_inverse: true
|
||||
types:
|
||||
- ['vrev16_s8', 'int8x8_t', 'vrev16.8', 'rev16', '[1, 0, 3, 2, 5, 4, 7, 6]']
|
||||
- ['vrev16q_s8', 'int8x16_t', 'vrev16.8', 'rev16', '[1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]']
|
||||
|
||||
@@ -188,7 +188,7 @@ fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
"rot90_lane" => Ok(SuffixKind::Rot90Lane),
|
||||
"rot90_laneq" => Ok(SuffixKind::Rot90LaneQ),
|
||||
"rot180" => Ok(SuffixKind::Rot180),
|
||||
"rot180_lane" => Ok(SuffixKind::Rot180LaneQ),
|
||||
"rot180_lane" => Ok(SuffixKind::Rot180Lane),
|
||||
"rot180_laneq" => Ok(SuffixKind::Rot180LaneQ),
|
||||
"u" => Ok(SuffixKind::Unsigned),
|
||||
"nox" => Ok(SuffixKind::NoX),
|
||||
|
||||
@@ -1059,23 +1059,8 @@ pub fn generate_variant(
|
||||
|
||||
/// Add a big endian implementation
|
||||
fn generate_big_endian(&self, variant: &mut Intrinsic) {
|
||||
/* We can't always blindly reverse the bits only in certain conditions
|
||||
* do we need a different order - thus this allows us to have the
|
||||
* ability to do so without having to play codegolf with the yaml AST */
|
||||
let should_reverse = {
|
||||
if let Some(should_reverse) = variant.big_endian_inverse {
|
||||
should_reverse
|
||||
} else if variant.compose.len() == 1 {
|
||||
match &variant.compose[0] {
|
||||
Expression::FnCall(fn_call) => fn_call.0.to_string() == "transmute",
|
||||
_ => false,
|
||||
}
|
||||
} else {
|
||||
false
|
||||
}
|
||||
};
|
||||
|
||||
if !should_reverse {
|
||||
// We only reverse if it was specifically requested
|
||||
if !variant.big_endian_inverse.unwrap_or(false) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -228,21 +228,25 @@ asm-fmts = xd, xj, ui6
|
||||
data-types = V4DI, V4DI, UQI
|
||||
|
||||
/// lasx_xvbitclr_b
|
||||
impl = portable
|
||||
name = lasx_xvbitclr_b
|
||||
asm-fmts = xd, xj, xk
|
||||
data-types = UV32QI, UV32QI, UV32QI
|
||||
|
||||
/// lasx_xvbitclr_h
|
||||
impl = portable
|
||||
name = lasx_xvbitclr_h
|
||||
asm-fmts = xd, xj, xk
|
||||
data-types = UV16HI, UV16HI, UV16HI
|
||||
|
||||
/// lasx_xvbitclr_w
|
||||
impl = portable
|
||||
name = lasx_xvbitclr_w
|
||||
asm-fmts = xd, xj, xk
|
||||
data-types = UV8SI, UV8SI, UV8SI
|
||||
|
||||
/// lasx_xvbitclr_d
|
||||
impl = portable
|
||||
name = lasx_xvbitclr_d
|
||||
asm-fmts = xd, xj, xk
|
||||
data-types = UV4DI, UV4DI, UV4DI
|
||||
@@ -268,21 +272,25 @@ asm-fmts = xd, xj, ui6
|
||||
data-types = UV4DI, UV4DI, UQI
|
||||
|
||||
/// lasx_xvbitset_b
|
||||
impl = portable
|
||||
name = lasx_xvbitset_b
|
||||
asm-fmts = xd, xj, xk
|
||||
data-types = UV32QI, UV32QI, UV32QI
|
||||
|
||||
/// lasx_xvbitset_h
|
||||
impl = portable
|
||||
name = lasx_xvbitset_h
|
||||
asm-fmts = xd, xj, xk
|
||||
data-types = UV16HI, UV16HI, UV16HI
|
||||
|
||||
/// lasx_xvbitset_w
|
||||
impl = portable
|
||||
name = lasx_xvbitset_w
|
||||
asm-fmts = xd, xj, xk
|
||||
data-types = UV8SI, UV8SI, UV8SI
|
||||
|
||||
/// lasx_xvbitset_d
|
||||
impl = portable
|
||||
name = lasx_xvbitset_d
|
||||
asm-fmts = xd, xj, xk
|
||||
data-types = UV4DI, UV4DI, UV4DI
|
||||
@@ -308,21 +316,25 @@ asm-fmts = xd, xj, ui6
|
||||
data-types = UV4DI, UV4DI, UQI
|
||||
|
||||
/// lasx_xvbitrev_b
|
||||
impl = portable
|
||||
name = lasx_xvbitrev_b
|
||||
asm-fmts = xd, xj, xk
|
||||
data-types = UV32QI, UV32QI, UV32QI
|
||||
|
||||
/// lasx_xvbitrev_h
|
||||
impl = portable
|
||||
name = lasx_xvbitrev_h
|
||||
asm-fmts = xd, xj, xk
|
||||
data-types = UV16HI, UV16HI, UV16HI
|
||||
|
||||
/// lasx_xvbitrev_w
|
||||
impl = portable
|
||||
name = lasx_xvbitrev_w
|
||||
asm-fmts = xd, xj, xk
|
||||
data-types = UV8SI, UV8SI, UV8SI
|
||||
|
||||
/// lasx_xvbitrev_d
|
||||
impl = portable
|
||||
name = lasx_xvbitrev_d
|
||||
asm-fmts = xd, xj, xk
|
||||
data-types = UV4DI, UV4DI, UV4DI
|
||||
@@ -912,61 +924,73 @@ asm-fmts = xd, xj, ui6
|
||||
data-types = UV4DI, UV4DI, UQI
|
||||
|
||||
/// lasx_xvadda_b
|
||||
impl = portable
|
||||
name = lasx_xvadda_b
|
||||
asm-fmts = xd, xj, xk
|
||||
data-types = V32QI, V32QI, V32QI
|
||||
|
||||
/// lasx_xvadda_h
|
||||
impl = portable
|
||||
name = lasx_xvadda_h
|
||||
asm-fmts = xd, xj, xk
|
||||
data-types = V16HI, V16HI, V16HI
|
||||
|
||||
/// lasx_xvadda_w
|
||||
impl = portable
|
||||
name = lasx_xvadda_w
|
||||
asm-fmts = xd, xj, xk
|
||||
data-types = V8SI, V8SI, V8SI
|
||||
|
||||
/// lasx_xvadda_d
|
||||
impl = portable
|
||||
name = lasx_xvadda_d
|
||||
asm-fmts = xd, xj, xk
|
||||
data-types = V4DI, V4DI, V4DI
|
||||
|
||||
/// lasx_xvsadd_b
|
||||
impl = portable
|
||||
name = lasx_xvsadd_b
|
||||
asm-fmts = xd, xj, xk
|
||||
data-types = V32QI, V32QI, V32QI
|
||||
|
||||
/// lasx_xvsadd_h
|
||||
impl = portable
|
||||
name = lasx_xvsadd_h
|
||||
asm-fmts = xd, xj, xk
|
||||
data-types = V16HI, V16HI, V16HI
|
||||
|
||||
/// lasx_xvsadd_w
|
||||
impl = portable
|
||||
name = lasx_xvsadd_w
|
||||
asm-fmts = xd, xj, xk
|
||||
data-types = V8SI, V8SI, V8SI
|
||||
|
||||
/// lasx_xvsadd_d
|
||||
impl = portable
|
||||
name = lasx_xvsadd_d
|
||||
asm-fmts = xd, xj, xk
|
||||
data-types = V4DI, V4DI, V4DI
|
||||
|
||||
/// lasx_xvsadd_bu
|
||||
impl = portable
|
||||
name = lasx_xvsadd_bu
|
||||
asm-fmts = xd, xj, xk
|
||||
data-types = UV32QI, UV32QI, UV32QI
|
||||
|
||||
/// lasx_xvsadd_hu
|
||||
impl = portable
|
||||
name = lasx_xvsadd_hu
|
||||
asm-fmts = xd, xj, xk
|
||||
data-types = UV16HI, UV16HI, UV16HI
|
||||
|
||||
/// lasx_xvsadd_wu
|
||||
impl = portable
|
||||
name = lasx_xvsadd_wu
|
||||
asm-fmts = xd, xj, xk
|
||||
data-types = UV8SI, UV8SI, UV8SI
|
||||
|
||||
/// lasx_xvsadd_du
|
||||
impl = portable
|
||||
name = lasx_xvsadd_du
|
||||
asm-fmts = xd, xj, xk
|
||||
data-types = UV4DI, UV4DI, UV4DI
|
||||
@@ -1052,81 +1076,97 @@ asm-fmts = xd, xj, xk
|
||||
data-types = UV4DI, UV4DI, UV4DI
|
||||
|
||||
/// lasx_xvssub_b
|
||||
impl = portable
|
||||
name = lasx_xvssub_b
|
||||
asm-fmts = xd, xj, xk
|
||||
data-types = V32QI, V32QI, V32QI
|
||||
|
||||
/// lasx_xvssub_h
|
||||
impl = portable
|
||||
name = lasx_xvssub_h
|
||||
asm-fmts = xd, xj, xk
|
||||
data-types = V16HI, V16HI, V16HI
|
||||
|
||||
/// lasx_xvssub_w
|
||||
impl = portable
|
||||
name = lasx_xvssub_w
|
||||
asm-fmts = xd, xj, xk
|
||||
data-types = V8SI, V8SI, V8SI
|
||||
|
||||
/// lasx_xvssub_d
|
||||
impl = portable
|
||||
name = lasx_xvssub_d
|
||||
asm-fmts = xd, xj, xk
|
||||
data-types = V4DI, V4DI, V4DI
|
||||
|
||||
/// lasx_xvssub_bu
|
||||
impl = portable
|
||||
name = lasx_xvssub_bu
|
||||
asm-fmts = xd, xj, xk
|
||||
data-types = UV32QI, UV32QI, UV32QI
|
||||
|
||||
/// lasx_xvssub_hu
|
||||
impl = portable
|
||||
name = lasx_xvssub_hu
|
||||
asm-fmts = xd, xj, xk
|
||||
data-types = UV16HI, UV16HI, UV16HI
|
||||
|
||||
/// lasx_xvssub_wu
|
||||
impl = portable
|
||||
name = lasx_xvssub_wu
|
||||
asm-fmts = xd, xj, xk
|
||||
data-types = UV8SI, UV8SI, UV8SI
|
||||
|
||||
/// lasx_xvssub_du
|
||||
impl = portable
|
||||
name = lasx_xvssub_du
|
||||
asm-fmts = xd, xj, xk
|
||||
data-types = UV4DI, UV4DI, UV4DI
|
||||
|
||||
/// lasx_xvabsd_b
|
||||
impl = portable
|
||||
name = lasx_xvabsd_b
|
||||
asm-fmts = xd, xj, xk
|
||||
data-types = V32QI, V32QI, V32QI
|
||||
|
||||
/// lasx_xvabsd_h
|
||||
impl = portable
|
||||
name = lasx_xvabsd_h
|
||||
asm-fmts = xd, xj, xk
|
||||
data-types = V16HI, V16HI, V16HI
|
||||
|
||||
/// lasx_xvabsd_w
|
||||
impl = portable
|
||||
name = lasx_xvabsd_w
|
||||
asm-fmts = xd, xj, xk
|
||||
data-types = V8SI, V8SI, V8SI
|
||||
|
||||
/// lasx_xvabsd_d
|
||||
impl = portable
|
||||
name = lasx_xvabsd_d
|
||||
asm-fmts = xd, xj, xk
|
||||
data-types = V4DI, V4DI, V4DI
|
||||
|
||||
/// lasx_xvabsd_bu
|
||||
impl = portable
|
||||
name = lasx_xvabsd_bu
|
||||
asm-fmts = xd, xj, xk
|
||||
data-types = UV32QI, UV32QI, UV32QI
|
||||
|
||||
/// lasx_xvabsd_hu
|
||||
impl = portable
|
||||
name = lasx_xvabsd_hu
|
||||
asm-fmts = xd, xj, xk
|
||||
data-types = UV16HI, UV16HI, UV16HI
|
||||
|
||||
/// lasx_xvabsd_wu
|
||||
impl = portable
|
||||
name = lasx_xvabsd_wu
|
||||
asm-fmts = xd, xj, xk
|
||||
data-types = UV8SI, UV8SI, UV8SI
|
||||
|
||||
/// lasx_xvabsd_du
|
||||
impl = portable
|
||||
name = lasx_xvabsd_du
|
||||
asm-fmts = xd, xj, xk
|
||||
data-types = UV4DI, UV4DI, UV4DI
|
||||
@@ -1380,41 +1420,49 @@ asm-fmts = xd, xj, ui1
|
||||
data-types = V4DI, V4DI, UQI
|
||||
|
||||
/// lasx_xvpickev_b
|
||||
impl = portable
|
||||
name = lasx_xvpickev_b
|
||||
asm-fmts = xd, xj, xk
|
||||
data-types = V32QI, V32QI, V32QI
|
||||
|
||||
/// lasx_xvpickev_h
|
||||
impl = portable
|
||||
name = lasx_xvpickev_h
|
||||
asm-fmts = xd, xj, xk
|
||||
data-types = V16HI, V16HI, V16HI
|
||||
|
||||
/// lasx_xvpickev_w
|
||||
impl = portable
|
||||
name = lasx_xvpickev_w
|
||||
asm-fmts = xd, xj, xk
|
||||
data-types = V8SI, V8SI, V8SI
|
||||
|
||||
/// lasx_xvpickev_d
|
||||
impl = portable
|
||||
name = lasx_xvpickev_d
|
||||
asm-fmts = xd, xj, xk
|
||||
data-types = V4DI, V4DI, V4DI
|
||||
|
||||
/// lasx_xvpickod_b
|
||||
impl = portable
|
||||
name = lasx_xvpickod_b
|
||||
asm-fmts = xd, xj, xk
|
||||
data-types = V32QI, V32QI, V32QI
|
||||
|
||||
/// lasx_xvpickod_h
|
||||
impl = portable
|
||||
name = lasx_xvpickod_h
|
||||
asm-fmts = xd, xj, xk
|
||||
data-types = V16HI, V16HI, V16HI
|
||||
|
||||
/// lasx_xvpickod_w
|
||||
impl = portable
|
||||
name = lasx_xvpickod_w
|
||||
asm-fmts = xd, xj, xk
|
||||
data-types = V8SI, V8SI, V8SI
|
||||
|
||||
/// lasx_xvpickod_d
|
||||
impl = portable
|
||||
name = lasx_xvpickod_d
|
||||
asm-fmts = xd, xj, xk
|
||||
data-types = V4DI, V4DI, V4DI
|
||||
|
||||
@@ -228,21 +228,25 @@ asm-fmts = vd, vj, ui6
|
||||
data-types = V2DI, V2DI, UQI
|
||||
|
||||
/// lsx_vbitclr_b
|
||||
impl = portable
|
||||
name = lsx_vbitclr_b
|
||||
asm-fmts = vd, vj, vk
|
||||
data-types = UV16QI, UV16QI, UV16QI
|
||||
|
||||
/// lsx_vbitclr_h
|
||||
impl = portable
|
||||
name = lsx_vbitclr_h
|
||||
asm-fmts = vd, vj, vk
|
||||
data-types = UV8HI, UV8HI, UV8HI
|
||||
|
||||
/// lsx_vbitclr_w
|
||||
impl = portable
|
||||
name = lsx_vbitclr_w
|
||||
asm-fmts = vd, vj, vk
|
||||
data-types = UV4SI, UV4SI, UV4SI
|
||||
|
||||
/// lsx_vbitclr_d
|
||||
impl = portable
|
||||
name = lsx_vbitclr_d
|
||||
asm-fmts = vd, vj, vk
|
||||
data-types = UV2DI, UV2DI, UV2DI
|
||||
@@ -268,21 +272,25 @@ asm-fmts = vd, vj, ui6
|
||||
data-types = UV2DI, UV2DI, UQI
|
||||
|
||||
/// lsx_vbitset_b
|
||||
impl = portable
|
||||
name = lsx_vbitset_b
|
||||
asm-fmts = vd, vj, vk
|
||||
data-types = UV16QI, UV16QI, UV16QI
|
||||
|
||||
/// lsx_vbitset_h
|
||||
impl = portable
|
||||
name = lsx_vbitset_h
|
||||
asm-fmts = vd, vj, vk
|
||||
data-types = UV8HI, UV8HI, UV8HI
|
||||
|
||||
/// lsx_vbitset_w
|
||||
impl = portable
|
||||
name = lsx_vbitset_w
|
||||
asm-fmts = vd, vj, vk
|
||||
data-types = UV4SI, UV4SI, UV4SI
|
||||
|
||||
/// lsx_vbitset_d
|
||||
impl = portable
|
||||
name = lsx_vbitset_d
|
||||
asm-fmts = vd, vj, vk
|
||||
data-types = UV2DI, UV2DI, UV2DI
|
||||
@@ -308,21 +316,25 @@ asm-fmts = vd, vj, ui6
|
||||
data-types = UV2DI, UV2DI, UQI
|
||||
|
||||
/// lsx_vbitrev_b
|
||||
impl = portable
|
||||
name = lsx_vbitrev_b
|
||||
asm-fmts = vd, vj, vk
|
||||
data-types = UV16QI, UV16QI, UV16QI
|
||||
|
||||
/// lsx_vbitrev_h
|
||||
impl = portable
|
||||
name = lsx_vbitrev_h
|
||||
asm-fmts = vd, vj, vk
|
||||
data-types = UV8HI, UV8HI, UV8HI
|
||||
|
||||
/// lsx_vbitrev_w
|
||||
impl = portable
|
||||
name = lsx_vbitrev_w
|
||||
asm-fmts = vd, vj, vk
|
||||
data-types = UV4SI, UV4SI, UV4SI
|
||||
|
||||
/// lsx_vbitrev_d
|
||||
impl = portable
|
||||
name = lsx_vbitrev_d
|
||||
asm-fmts = vd, vj, vk
|
||||
data-types = UV2DI, UV2DI, UV2DI
|
||||
@@ -912,61 +924,73 @@ asm-fmts = vd, vj, ui6
|
||||
data-types = UV2DI, UV2DI, UQI
|
||||
|
||||
/// lsx_vadda_b
|
||||
impl = portable
|
||||
name = lsx_vadda_b
|
||||
asm-fmts = vd, vj, vk
|
||||
data-types = V16QI, V16QI, V16QI
|
||||
|
||||
/// lsx_vadda_h
|
||||
impl = portable
|
||||
name = lsx_vadda_h
|
||||
asm-fmts = vd, vj, vk
|
||||
data-types = V8HI, V8HI, V8HI
|
||||
|
||||
/// lsx_vadda_w
|
||||
impl = portable
|
||||
name = lsx_vadda_w
|
||||
asm-fmts = vd, vj, vk
|
||||
data-types = V4SI, V4SI, V4SI
|
||||
|
||||
/// lsx_vadda_d
|
||||
impl = portable
|
||||
name = lsx_vadda_d
|
||||
asm-fmts = vd, vj, vk
|
||||
data-types = V2DI, V2DI, V2DI
|
||||
|
||||
/// lsx_vsadd_b
|
||||
impl = portable
|
||||
name = lsx_vsadd_b
|
||||
asm-fmts = vd, vj, vk
|
||||
data-types = V16QI, V16QI, V16QI
|
||||
|
||||
/// lsx_vsadd_h
|
||||
impl = portable
|
||||
name = lsx_vsadd_h
|
||||
asm-fmts = vd, vj, vk
|
||||
data-types = V8HI, V8HI, V8HI
|
||||
|
||||
/// lsx_vsadd_w
|
||||
impl = portable
|
||||
name = lsx_vsadd_w
|
||||
asm-fmts = vd, vj, vk
|
||||
data-types = V4SI, V4SI, V4SI
|
||||
|
||||
/// lsx_vsadd_d
|
||||
impl = portable
|
||||
name = lsx_vsadd_d
|
||||
asm-fmts = vd, vj, vk
|
||||
data-types = V2DI, V2DI, V2DI
|
||||
|
||||
/// lsx_vsadd_bu
|
||||
impl = portable
|
||||
name = lsx_vsadd_bu
|
||||
asm-fmts = vd, vj, vk
|
||||
data-types = UV16QI, UV16QI, UV16QI
|
||||
|
||||
/// lsx_vsadd_hu
|
||||
impl = portable
|
||||
name = lsx_vsadd_hu
|
||||
asm-fmts = vd, vj, vk
|
||||
data-types = UV8HI, UV8HI, UV8HI
|
||||
|
||||
/// lsx_vsadd_wu
|
||||
impl = portable
|
||||
name = lsx_vsadd_wu
|
||||
asm-fmts = vd, vj, vk
|
||||
data-types = UV4SI, UV4SI, UV4SI
|
||||
|
||||
/// lsx_vsadd_du
|
||||
impl = portable
|
||||
name = lsx_vsadd_du
|
||||
asm-fmts = vd, vj, vk
|
||||
data-types = UV2DI, UV2DI, UV2DI
|
||||
@@ -1052,81 +1076,97 @@ asm-fmts = vd, vj, vk
|
||||
data-types = UV2DI, UV2DI, UV2DI
|
||||
|
||||
/// lsx_vssub_b
|
||||
impl = portable
|
||||
name = lsx_vssub_b
|
||||
asm-fmts = vd, vj, vk
|
||||
data-types = V16QI, V16QI, V16QI
|
||||
|
||||
/// lsx_vssub_h
|
||||
impl = portable
|
||||
name = lsx_vssub_h
|
||||
asm-fmts = vd, vj, vk
|
||||
data-types = V8HI, V8HI, V8HI
|
||||
|
||||
/// lsx_vssub_w
|
||||
impl = portable
|
||||
name = lsx_vssub_w
|
||||
asm-fmts = vd, vj, vk
|
||||
data-types = V4SI, V4SI, V4SI
|
||||
|
||||
/// lsx_vssub_d
|
||||
impl = portable
|
||||
name = lsx_vssub_d
|
||||
asm-fmts = vd, vj, vk
|
||||
data-types = V2DI, V2DI, V2DI
|
||||
|
||||
/// lsx_vssub_bu
|
||||
impl = portable
|
||||
name = lsx_vssub_bu
|
||||
asm-fmts = vd, vj, vk
|
||||
data-types = UV16QI, UV16QI, UV16QI
|
||||
|
||||
/// lsx_vssub_hu
|
||||
impl = portable
|
||||
name = lsx_vssub_hu
|
||||
asm-fmts = vd, vj, vk
|
||||
data-types = UV8HI, UV8HI, UV8HI
|
||||
|
||||
/// lsx_vssub_wu
|
||||
impl = portable
|
||||
name = lsx_vssub_wu
|
||||
asm-fmts = vd, vj, vk
|
||||
data-types = UV4SI, UV4SI, UV4SI
|
||||
|
||||
/// lsx_vssub_du
|
||||
impl = portable
|
||||
name = lsx_vssub_du
|
||||
asm-fmts = vd, vj, vk
|
||||
data-types = UV2DI, UV2DI, UV2DI
|
||||
|
||||
/// lsx_vabsd_b
|
||||
impl = portable
|
||||
name = lsx_vabsd_b
|
||||
asm-fmts = vd, vj, vk
|
||||
data-types = V16QI, V16QI, V16QI
|
||||
|
||||
/// lsx_vabsd_h
|
||||
impl = portable
|
||||
name = lsx_vabsd_h
|
||||
asm-fmts = vd, vj, vk
|
||||
data-types = V8HI, V8HI, V8HI
|
||||
|
||||
/// lsx_vabsd_w
|
||||
impl = portable
|
||||
name = lsx_vabsd_w
|
||||
asm-fmts = vd, vj, vk
|
||||
data-types = V4SI, V4SI, V4SI
|
||||
|
||||
/// lsx_vabsd_d
|
||||
impl = portable
|
||||
name = lsx_vabsd_d
|
||||
asm-fmts = vd, vj, vk
|
||||
data-types = V2DI, V2DI, V2DI
|
||||
|
||||
/// lsx_vabsd_bu
|
||||
impl = portable
|
||||
name = lsx_vabsd_bu
|
||||
asm-fmts = vd, vj, vk
|
||||
data-types = UV16QI, UV16QI, UV16QI
|
||||
|
||||
/// lsx_vabsd_hu
|
||||
impl = portable
|
||||
name = lsx_vabsd_hu
|
||||
asm-fmts = vd, vj, vk
|
||||
data-types = UV8HI, UV8HI, UV8HI
|
||||
|
||||
/// lsx_vabsd_wu
|
||||
impl = portable
|
||||
name = lsx_vabsd_wu
|
||||
asm-fmts = vd, vj, vk
|
||||
data-types = UV4SI, UV4SI, UV4SI
|
||||
|
||||
/// lsx_vabsd_du
|
||||
impl = portable
|
||||
name = lsx_vabsd_du
|
||||
asm-fmts = vd, vj, vk
|
||||
data-types = UV2DI, UV2DI, UV2DI
|
||||
@@ -1400,41 +1440,49 @@ asm-fmts = vd, vj, ui1
|
||||
data-types = V2DI, V2DI, UQI
|
||||
|
||||
/// lsx_vpickev_b
|
||||
impl = portable
|
||||
name = lsx_vpickev_b
|
||||
asm-fmts = vd, vj, vk
|
||||
data-types = V16QI, V16QI, V16QI
|
||||
|
||||
/// lsx_vpickev_h
|
||||
impl = portable
|
||||
name = lsx_vpickev_h
|
||||
asm-fmts = vd, vj, vk
|
||||
data-types = V8HI, V8HI, V8HI
|
||||
|
||||
/// lsx_vpickev_w
|
||||
impl = portable
|
||||
name = lsx_vpickev_w
|
||||
asm-fmts = vd, vj, vk
|
||||
data-types = V4SI, V4SI, V4SI
|
||||
|
||||
/// lsx_vpickev_d
|
||||
impl = portable
|
||||
name = lsx_vpickev_d
|
||||
asm-fmts = vd, vj, vk
|
||||
data-types = V2DI, V2DI, V2DI
|
||||
|
||||
/// lsx_vpickod_b
|
||||
impl = portable
|
||||
name = lsx_vpickod_b
|
||||
asm-fmts = vd, vj, vk
|
||||
data-types = V16QI, V16QI, V16QI
|
||||
|
||||
/// lsx_vpickod_h
|
||||
impl = portable
|
||||
name = lsx_vpickod_h
|
||||
asm-fmts = vd, vj, vk
|
||||
data-types = V8HI, V8HI, V8HI
|
||||
|
||||
/// lsx_vpickod_w
|
||||
impl = portable
|
||||
name = lsx_vpickod_w
|
||||
asm-fmts = vd, vj, vk
|
||||
data-types = V4SI, V4SI, V4SI
|
||||
|
||||
/// lsx_vpickod_d
|
||||
impl = portable
|
||||
name = lsx_vpickod_d
|
||||
asm-fmts = vd, vj, vk
|
||||
data-types = V2DI, V2DI, V2DI
|
||||
|
||||
@@ -191,6 +191,54 @@ lsx_vrepli_b
|
||||
lsx_vrepli_h
|
||||
lsx_vrepli_w
|
||||
lsx_vrepli_d
|
||||
lsx_vbitclr_b
|
||||
lsx_vbitclr_h
|
||||
lsx_vbitclr_w
|
||||
lsx_vbitclr_d
|
||||
lsx_vbitset_b
|
||||
lsx_vbitset_h
|
||||
lsx_vbitset_w
|
||||
lsx_vbitset_d
|
||||
lsx_vbitrev_b
|
||||
lsx_vbitrev_h
|
||||
lsx_vbitrev_w
|
||||
lsx_vbitrev_d
|
||||
lsx_vsadd_b
|
||||
lsx_vsadd_h
|
||||
lsx_vsadd_w
|
||||
lsx_vsadd_d
|
||||
lsx_vsadd_bu
|
||||
lsx_vsadd_hu
|
||||
lsx_vsadd_wu
|
||||
lsx_vsadd_du
|
||||
lsx_vssub_b
|
||||
lsx_vssub_h
|
||||
lsx_vssub_w
|
||||
lsx_vssub_d
|
||||
lsx_vssub_bu
|
||||
lsx_vssub_hu
|
||||
lsx_vssub_wu
|
||||
lsx_vssub_du
|
||||
lsx_vadda_b
|
||||
lsx_vadda_h
|
||||
lsx_vadda_w
|
||||
lsx_vadda_d
|
||||
lsx_vabsd_b
|
||||
lsx_vabsd_h
|
||||
lsx_vabsd_w
|
||||
lsx_vabsd_d
|
||||
lsx_vabsd_bu
|
||||
lsx_vabsd_hu
|
||||
lsx_vabsd_wu
|
||||
lsx_vabsd_du
|
||||
lsx_vpickev_b
|
||||
lsx_vpickev_h
|
||||
lsx_vpickev_w
|
||||
lsx_vpickev_d
|
||||
lsx_vpickod_b
|
||||
lsx_vpickod_h
|
||||
lsx_vpickod_w
|
||||
lsx_vpickod_d
|
||||
|
||||
# LASX intrinsics
|
||||
lasx_xvsll_b
|
||||
@@ -379,3 +427,51 @@ lasx_xvrepli_b
|
||||
lasx_xvrepli_h
|
||||
lasx_xvrepli_w
|
||||
lasx_xvrepli_d
|
||||
lasx_xvbitclr_b
|
||||
lasx_xvbitclr_h
|
||||
lasx_xvbitclr_w
|
||||
lasx_xvbitclr_d
|
||||
lasx_xvbitset_b
|
||||
lasx_xvbitset_h
|
||||
lasx_xvbitset_w
|
||||
lasx_xvbitset_d
|
||||
lasx_xvbitrev_b
|
||||
lasx_xvbitrev_h
|
||||
lasx_xvbitrev_w
|
||||
lasx_xvbitrev_d
|
||||
lasx_xvsadd_b
|
||||
lasx_xvsadd_h
|
||||
lasx_xvsadd_w
|
||||
lasx_xvsadd_d
|
||||
lasx_xvsadd_bu
|
||||
lasx_xvsadd_hu
|
||||
lasx_xvsadd_wu
|
||||
lasx_xvsadd_du
|
||||
lasx_xvssub_b
|
||||
lasx_xvssub_h
|
||||
lasx_xvssub_w
|
||||
lasx_xvssub_d
|
||||
lasx_xvssub_bu
|
||||
lasx_xvssub_hu
|
||||
lasx_xvssub_wu
|
||||
lasx_xvssub_du
|
||||
lasx_xvadda_b
|
||||
lasx_xvadda_h
|
||||
lasx_xvadda_w
|
||||
lasx_xvadda_d
|
||||
lasx_xvabsd_b
|
||||
lasx_xvabsd_h
|
||||
lasx_xvabsd_w
|
||||
lasx_xvabsd_d
|
||||
lasx_xvabsd_bu
|
||||
lasx_xvabsd_hu
|
||||
lasx_xvabsd_wu
|
||||
lasx_xvabsd_du
|
||||
lasx_xvpickev_b
|
||||
lasx_xvpickev_h
|
||||
lasx_xvpickev_w
|
||||
lasx_xvpickev_d
|
||||
lasx_xvpickod_b
|
||||
lasx_xvpickod_h
|
||||
lasx_xvpickod_w
|
||||
lasx_xvpickod_d
|
||||
|
||||
@@ -5145,4 +5145,128 @@ fn parse_ty_base(s: &str) -> &'static Type {
|
||||
"vzipq_p16",
|
||||
"__rndr",
|
||||
"__rndrrs",
|
||||
"vcopy_laneq_f64",
|
||||
"vcopy_laneq_f64",
|
||||
"vcopy_laneq_s64",
|
||||
"vcopy_laneq_s64",
|
||||
"vcopy_laneq_u64",
|
||||
"vcopy_laneq_u64",
|
||||
"vcopy_laneq_p64",
|
||||
"vcopy_laneq_p64",
|
||||
"vget_high_f64",
|
||||
"vget_high_f64",
|
||||
"vget_high_p64",
|
||||
"vget_high_p64",
|
||||
"vget_low_f64",
|
||||
"vget_low_f64",
|
||||
"vget_low_p64",
|
||||
"vget_low_p64",
|
||||
"vgetq_lane_f64",
|
||||
"vgetq_lane_f64",
|
||||
"vaddl_high_s16",
|
||||
"vaddl_high_s16",
|
||||
"vaddl_high_s32",
|
||||
"vaddl_high_s32",
|
||||
"vaddl_high_s8",
|
||||
"vaddl_high_s8",
|
||||
"vaddl_high_u16",
|
||||
"vaddl_high_u16",
|
||||
"vaddl_high_u32",
|
||||
"vaddl_high_u32",
|
||||
"vaddl_high_u8",
|
||||
"vaddl_high_u8",
|
||||
"vget_high_f32",
|
||||
"vget_high_f32",
|
||||
"vget_high_p16",
|
||||
"vget_high_p16",
|
||||
"vget_high_p8",
|
||||
"vget_high_p8",
|
||||
"vget_high_s16",
|
||||
"vget_high_s16",
|
||||
"vget_high_s32",
|
||||
"vget_high_s32",
|
||||
"vget_high_s8",
|
||||
"vget_high_s8",
|
||||
"vget_high_u16",
|
||||
"vget_high_u16",
|
||||
"vget_high_u32",
|
||||
"vget_high_u32",
|
||||
"vget_high_u8",
|
||||
"vget_high_u8",
|
||||
"vget_high_s64",
|
||||
"vget_high_s64",
|
||||
"vget_high_u64",
|
||||
"vget_high_u64",
|
||||
"vget_lane_f32",
|
||||
"vget_lane_f32",
|
||||
"vget_lane_p16",
|
||||
"vget_lane_p16",
|
||||
"vget_lane_p8",
|
||||
"vget_lane_p8",
|
||||
"vget_lane_s16",
|
||||
"vget_lane_s16",
|
||||
"vget_lane_s32",
|
||||
"vget_lane_s32",
|
||||
"vget_lane_s8",
|
||||
"vget_lane_s8",
|
||||
"vget_lane_u16",
|
||||
"vget_lane_u16",
|
||||
"vget_lane_u32",
|
||||
"vget_lane_u32",
|
||||
"vget_lane_u8",
|
||||
"vget_lane_u8",
|
||||
"vgetq_lane_f32",
|
||||
"vgetq_lane_f32",
|
||||
"vgetq_lane_p16",
|
||||
"vgetq_lane_p16",
|
||||
"vgetq_lane_p64",
|
||||
"vgetq_lane_p64",
|
||||
"vgetq_lane_p8",
|
||||
"vgetq_lane_p8",
|
||||
"vgetq_lane_s16",
|
||||
"vgetq_lane_s16",
|
||||
"vgetq_lane_s32",
|
||||
"vgetq_lane_s32",
|
||||
"vgetq_lane_s64",
|
||||
"vgetq_lane_s64",
|
||||
"vgetq_lane_s8",
|
||||
"vgetq_lane_s8",
|
||||
"vgetq_lane_u16",
|
||||
"vgetq_lane_u16",
|
||||
"vgetq_lane_u32",
|
||||
"vgetq_lane_u32",
|
||||
"vgetq_lane_u8",
|
||||
"vgetq_lane_u8",
|
||||
"vget_lane_p64",
|
||||
"vget_lane_s64",
|
||||
"vget_lane_u64",
|
||||
"vget_low_f32",
|
||||
"vget_low_f32",
|
||||
"vget_low_p16",
|
||||
"vget_low_p16",
|
||||
"vget_low_p8",
|
||||
"vget_low_p8",
|
||||
"vget_low_s16",
|
||||
"vget_low_s16",
|
||||
"vget_low_s32",
|
||||
"vget_low_s32",
|
||||
"vget_low_s8",
|
||||
"vget_low_s8",
|
||||
"vget_low_u16",
|
||||
"vget_low_u16",
|
||||
"vget_low_u32",
|
||||
"vget_low_u32",
|
||||
"vget_low_u8",
|
||||
"vget_low_u8",
|
||||
"vget_low_s64",
|
||||
"vget_low_s64",
|
||||
"vget_low_u64",
|
||||
"vget_low_u64",
|
||||
"vaddw_high_s16",
|
||||
"vaddw_high_s32",
|
||||
"vaddw_high_s8",
|
||||
"vaddw_high_u16",
|
||||
"vaddw_high_u32",
|
||||
"vaddw_high_u8",
|
||||
"vgetq_lane_u64",
|
||||
];
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[assign]
|
||||
|
||||
[assign.owners]
|
||||
"*" = ["@Amanieu", "@folkertdev", "@sayantn"]
|
||||
"*" = ["@Amanieu", "@folkertdev", "@sayantn", "@davidtwco", "@adamgemmell"]
|
||||
|
||||
[ping.windows]
|
||||
message = """\
|
||||
|
||||
Reference in New Issue
Block a user