Merge pull request #2106 from sayantn/cvtf16s16

Use LLVM intrinsics for `f16` to `{i,u}16` intrinsics
This commit is contained in:
Folkert de Vries
2026-05-11 08:16:06 +00:00
committed by GitHub
6 changed files with 103 additions and 139 deletions
@@ -10,8 +10,14 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
qemu-user \
make \
file \
clang \
lld
xz-utils \
wget
RUN wget https://mirrors.edge.kernel.org/pub/tools/llvm/files/llvm-22.1.4-x86_64.tar.gz -O llvm.tar.xz
RUN mkdir llvm
RUN tar -xvf llvm.tar.xz --strip-components=1 -C llvm
ENV PATH="/llvm/bin:$PATH"
ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc \
CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64 -cpu max -L /usr/aarch64-linux-gnu" \
@@ -9,10 +9,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
qemu-user \
make \
file \
clang \
curl \
xz-utils \
lld
wget
ENV TOOLCHAIN="arm-gnu-toolchain-14.3.rel1-x86_64-aarch64_be-none-linux-gnu"
@@ -21,6 +20,12 @@ RUN curl -L "https://developer.arm.com/-/media/Files/downloads/gnu/14.3.rel1/bin
RUN tar -xvf "${TOOLCHAIN}.tar.xz"
RUN mkdir /toolchains && mv "./${TOOLCHAIN}" /toolchains
RUN wget https://mirrors.edge.kernel.org/pub/tools/llvm/files/llvm-22.1.4-x86_64.tar.gz -O llvm.tar.xz
RUN mkdir llvm
RUN tar -xvf llvm.tar.xz --strip-components=1 -C llvm
ENV PATH="/llvm/bin:$PATH"
ENV AARCH64_BE_TOOLCHAIN="/toolchains/${TOOLCHAIN}"
ENV AARCH64_BE_LIBC="${AARCH64_BE_TOOLCHAIN}/aarch64_be-none-linux-gnu/libc"
@@ -10,8 +10,14 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
qemu-user \
make \
file \
clang \
lld
wget
RUN wget https://mirrors.edge.kernel.org/pub/tools/llvm/files/llvm-22.1.4-x86_64.tar.gz -O llvm.tar.xz
RUN mkdir llvm
RUN tar -xvf llvm.tar.xz --strip-components=1 -C llvm
ENV PATH="/llvm/bin:$PATH"
ENV CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \
CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -cpu max -L /usr/arm-linux-gnueabihf" \
OBJDUMP=arm-linux-gnueabihf-objdump
@@ -15,6 +15,13 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
RUN wget http://ci-mirrors.rust-lang.org/sde-external-10.8.0-2026-03-15-lin.tar.xz -O sde.tar.xz
RUN mkdir intel-sde
RUN tar -xJf sde.tar.xz --strip-components=1 -C intel-sde
RUN wget https://mirrors.edge.kernel.org/pub/tools/llvm/files/llvm-22.1.4-x86_64.tar.gz -O llvm.tar.xz
RUN mkdir llvm
RUN tar -xvf llvm.tar.xz --strip-components=1 -C llvm
ENV PATH="/llvm/bin:$PATH"
ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="/intel-sde/sde64 \
-cpuid-in /checkout/ci/docker/x86_64-unknown-linux-gnu/cpuid.def \
-rtm-mode full -tsx --"
@@ -5516,7 +5516,14 @@ pub fn vcvtaq_u64_f64(a: float64x2_t) -> uint64x2_t {
#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
#[cfg(not(target_arch = "arm64ec"))]
pub fn vcvtah_s16_f16(a: f16) -> i16 {
vcvtah_s32_f16(a) as i16
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.fcvtas.i16.f16"
)]
fn _vcvtah_s16_f16(a: f16) -> i16;
}
unsafe { _vcvtah_s16_f16(a) }
}
#[doc = "Floating-point convert to integer, rounding to nearest with ties to away"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtah_s32_f16)"]
@@ -5560,7 +5567,14 @@ pub fn vcvtah_s64_f16(a: f16) -> i64 {
#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
#[cfg(not(target_arch = "arm64ec"))]
pub fn vcvtah_u16_f16(a: f16) -> u16 {
vcvtah_u32_f16(a) as u16
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.fcvtau.i16.f16"
)]
fn _vcvtah_u16_f16(a: f16) -> u16;
}
unsafe { _vcvtah_u16_f16(a) }
}
#[doc = "Floating-point convert to integer, rounding to nearest with ties to away"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtah_u32_f16)"]
@@ -6202,7 +6216,14 @@ pub fn vcvtmq_u64_f64(a: float64x2_t) -> uint64x2_t {
#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
#[cfg(not(target_arch = "arm64ec"))]
pub fn vcvtmh_s16_f16(a: f16) -> i16 {
vcvtmh_s32_f16(a) as i16
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.fcvtms.i16.f16"
)]
fn _vcvtmh_s16_f16(a: f16) -> i16;
}
unsafe { _vcvtmh_s16_f16(a) }
}
#[doc = "Floating-point convert to integer, rounding towards minus infinity"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmh_s32_f16)"]
@@ -6238,7 +6259,7 @@ pub fn vcvtmh_s64_f16(a: f16) -> i64 {
}
unsafe { _vcvtmh_s64_f16(a) }
}
#[doc = "Floating-point convert to integer, rounding towards minus infinity"]
#[doc = "Floating-point convert to unsigned integer, rounding towards minus infinity"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmh_u16_f16)"]
#[inline]
#[cfg_attr(test, assert_instr(fcvtmu))]
@@ -6246,7 +6267,14 @@ pub fn vcvtmh_s64_f16(a: f16) -> i64 {
#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
#[cfg(not(target_arch = "arm64ec"))]
pub fn vcvtmh_u16_f16(a: f16) -> u16 {
vcvtmh_u32_f16(a) as u16
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.fcvtmu.i16.f16"
)]
fn _vcvtmh_u16_f16(a: f16) -> u16;
}
unsafe { _vcvtmh_u16_f16(a) }
}
#[doc = "Floating-point convert to unsigned integer, rounding towards minus infinity"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmh_u32_f16)"]
@@ -6550,7 +6578,14 @@ pub fn vcvtnq_u64_f64(a: float64x2_t) -> uint64x2_t {
#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
#[cfg(not(target_arch = "arm64ec"))]
pub fn vcvtnh_s16_f16(a: f16) -> i16 {
vcvtnh_s32_f16(a) as i16
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.fcvtns.i16.f16"
)]
fn _vcvtnh_s16_f16(a: f16) -> i16;
}
unsafe { _vcvtnh_s16_f16(a) }
}
#[doc = "Floating-point convert to integer, rounding to nearest with ties to even"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnh_s32_f16)"]
@@ -6594,7 +6629,14 @@ pub fn vcvtnh_s64_f16(a: f16) -> i64 {
#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
#[cfg(not(target_arch = "arm64ec"))]
pub fn vcvtnh_u16_f16(a: f16) -> u16 {
vcvtnh_u32_f16(a) as u16
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.fcvtnu.i16.f16"
)]
fn _vcvtnh_u16_f16(a: f16) -> u16;
}
unsafe { _vcvtnh_u16_f16(a) }
}
#[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to even"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnh_u32_f16)"]
@@ -6898,7 +6940,14 @@ pub fn vcvtpq_u64_f64(a: float64x2_t) -> uint64x2_t {
#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
#[cfg(not(target_arch = "arm64ec"))]
pub fn vcvtph_s16_f16(a: f16) -> i16 {
vcvtph_s32_f16(a) as i16
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.fcvtps.i16.f16"
)]
fn _vcvtph_s16_f16(a: f16) -> i16;
}
unsafe { _vcvtph_s16_f16(a) }
}
#[doc = "Floating-point convert to integer, rounding to plus infinity"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtph_s32_f16)"]
@@ -6942,7 +6991,14 @@ pub fn vcvtph_s64_f16(a: f16) -> i64 {
#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
#[cfg(not(target_arch = "arm64ec"))]
pub fn vcvtph_u16_f16(a: f16) -> u16 {
vcvtph_u32_f16(a) as u16
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.fcvtpu.i16.f16"
)]
fn _vcvtph_u16_f16(a: f16) -> u16;
}
unsafe { _vcvtph_u16_f16(a) }
}
#[doc = "Floating-point convert to unsigned integer, rounding to plus infinity"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtph_u32_f16)"]
@@ -1767,9 +1767,9 @@ intrinsics:
- *target-not-arm64ec
safety: safe
types:
- ["f16", "u16", 'h_u16_f16']
- ["f16", "u32", 'h_u32_f16']
- ["f16", "u64", 'h_u64_f16']
compose:
- LLVMLink:
name: "vcvta{type[2]}"
@@ -1789,6 +1789,7 @@ intrinsics:
- *target-not-arm64ec
safety: safe
types:
- ["f16", "i16", 'h_s16_f16']
- ["f16", "i32", 'h_s32_f16']
- ["f16", "i64", 'h_s64_f16']
compose:
@@ -1799,37 +1800,6 @@ intrinsics:
- link: "llvm.aarch64.neon.fcvtas.{type[1]}.{type[0]}"
arch: aarch64,arm64ec
- name: "vcvta{type[2]}"
doc: "Floating-point convert to integer, rounding to nearest with ties to away"
arguments: ["a: {type[0]}"]
return_type: "{type[1]}"
attr:
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtas]]}]]
- *neon-fp16
- *neon-unstable-f16
- *target-not-arm64ec
safety: safe
types:
- ["f16", "i16", 'h_s16_f16', 's32']
compose:
- 'vcvtah_{type[3]}_f16(a) as i16'
- name: "vcvta{type[2]}"
doc: "Floating-point convert to integer, rounding to nearest with ties to away"
arguments: ["a: {type[0]}"]
return_type: "{type[1]}"
attr:
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtau]]}]]
- *neon-fp16
- *neon-unstable-f16
- *target-not-arm64ec
safety: safe
types:
- ["f16", "u16", 'h_u16_f16', 'u32']
compose:
- 'vcvtah_{type[3]}_f16(a) as u16'
- name: "vcvta{type[2]}"
doc: "Floating-point convert to integer, rounding to nearest with ties to away"
arguments: ["a: {type[0]}"]
@@ -1939,6 +1909,7 @@ intrinsics:
- *target-not-arm64ec
safety: safe
types:
- ["f16", "i16", 'h']
- ["f16", "i32", 'h']
- ["f16", "i64", 'h']
compose:
@@ -1949,22 +1920,6 @@ intrinsics:
- link: "llvm.aarch64.neon.fcvtns.{type[1]}.{type[0]}"
arch: aarch64,arm64ec
- name: "vcvtn{type[2]}_{type[1]}_{type[0]}"
doc: "Floating-point convert to integer, rounding to nearest with ties to even"
arguments: ["a: {type[0]}"]
return_type: "{type[1]}"
attr:
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtns]]}]]
- *neon-fp16
- *neon-unstable-f16
- *target-not-arm64ec
safety: safe
types:
- ["f16", "i16", 'h', 'i32']
compose:
- 'vcvtnh_{type[3]}_f16(a) as i16'
- name: "vcvtn{type[2]}_{type[1]}_{type[0]}"
doc: "Floating-point convert to unsigned integer, rounding to nearest with ties to even"
arguments: ["a: {type[0]}"]
@@ -1976,6 +1931,7 @@ intrinsics:
- *target-not-arm64ec
safety: safe
types:
- ["f16", "u16", 'h']
- ["f16", "u32", 'h']
- ["f16", "u64", 'h']
compose:
@@ -1986,21 +1942,6 @@ intrinsics:
- link: "llvm.aarch64.neon.fcvtnu.{type[1]}.{type[0]}"
arch: aarch64,arm64ec
- name: "vcvtn{type[2]}_{type[1]}_{type[0]}"
doc: "Floating-point convert to unsigned integer, rounding to nearest with ties to even"
arguments: ["a: {type[0]}"]
return_type: "{type[1]}"
attr:
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtnu]]}]]
- *neon-fp16
- *neon-unstable-f16
- *target-not-arm64ec
safety: safe
types:
- ["f16", "u16", 'h', 'u32']
compose:
- 'vcvtnh_{type[3]}_f16(a) as u16'
- name: "vcvtm{neon_type[1].no}_{neon_type[0]}"
doc: "Floating-point convert to signed integer, rounding toward minus infinity"
arguments: ["a: {neon_type[0]}"]
@@ -2291,6 +2232,7 @@ intrinsics:
- *target-not-arm64ec
safety: safe
types:
- ["f16", "i16", 'h']
- ["f16", "i32", 'h']
- ["f16", "i64", 'h']
compose:
@@ -2301,21 +2243,6 @@ intrinsics:
- link: "llvm.aarch64.neon.fcvtps.{type[1]}.{type[0]}"
arch: aarch64,arm64ec
- name: "vcvtp{type[2]}_{type[1]}_{type[0]}"
doc: "Floating-point convert to integer, rounding to plus infinity"
arguments: ["a: {type[0]}"]
return_type: "{type[1]}"
attr:
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtps]]}]]
- *neon-fp16
- *neon-unstable-f16
- *target-not-arm64ec
safety: safe
types:
- ["f16", "i16", 'h', 'i32']
compose:
- 'vcvtph_{type[3]}_f16(a) as i16'
- name: "vcvtp{type[2]}_{type[1]}_{type[0]}"
doc: "Floating-point convert to unsigned integer, rounding to plus infinity"
arguments: ["a: {type[0]}"]
@@ -2327,6 +2254,7 @@ intrinsics:
- *target-not-arm64ec
safety: safe
types:
- ["f16", "u16", 'h']
- ["f16", "u32", 'h']
- ["f16", "u64", 'h']
compose:
@@ -2337,21 +2265,6 @@ intrinsics:
- link: "llvm.aarch64.neon.fcvtpu.{type[1]}.{type[0]}"
arch: aarch64,arm64ec
- name: "vcvtp{type[2]}_{type[1]}_{type[0]}"
doc: "Floating-point convert to unsigned integer, rounding to plus infinity"
arguments: ["a: {type[0]}"]
return_type: "{type[1]}"
attr:
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtpu]]}]]
- *neon-fp16
- *neon-unstable-f16
- *target-not-arm64ec
safety: safe
types:
- ["f16", "u16", 'h', 'u32']
compose:
- 'vcvtph_{type[3]}_f16(a) as u16'
- name: "vdup{neon_type.laneq_nox}"
doc: "Set all vector lanes to the same value"
arguments: ["a: {neon_type}"]
@@ -11782,6 +11695,7 @@ intrinsics:
- *target-not-arm64ec
safety: safe
types:
- ["f16", "i16", 'h']
- ["f16", "i32", 'h']
- ["f16", "i64", 'h']
compose:
@@ -11792,22 +11706,6 @@ intrinsics:
- link: "llvm.aarch64.neon.fcvtms.{type[1]}.{type[0]}"
arch: aarch64,arm64ec
- name: "vcvtm{type[2]}_{type[1]}_{type[0]}"
doc: "Floating-point convert to integer, rounding towards minus infinity"
arguments: ["a: {type[0]}"]
return_type: "{type[1]}"
attr:
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtms]]}]]
- *neon-fp16
- *neon-unstable-f16
- *target-not-arm64ec
safety: safe
types:
- ["f16", "i16", 'h', 'i32']
compose:
- 'vcvtmh_{type[3]}_f16(a) as i16'
- name: "vcvtm{type[2]}_{type[1]}_{type[0]}"
doc: "Floating-point convert to unsigned integer, rounding towards minus infinity"
arguments: ["a: {type[0]}"]
@@ -11819,6 +11717,7 @@ intrinsics:
- *target-not-arm64ec
safety: safe
types:
- ["f16", "u16", 'h']
- ["f16", "u32", 'h']
- ["f16", "u64", 'h']
compose:
@@ -11829,21 +11728,6 @@ intrinsics:
- link: "llvm.aarch64.neon.fcvtmu.{type[1]}.{type[0]}"
arch: aarch64,arm64ec
- name: "vcvtm{type[2]}_{type[1]}_{type[0]}"
doc: "Floating-point convert to integer, rounding towards minus infinity"
arguments: ["a: {type[0]}"]
return_type: "{type[1]}"
attr:
- FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtmu]]}]]
- *neon-fp16
- *neon-unstable-f16
- *target-not-arm64ec
safety: safe
types:
- ["f16", "u16", 'h', 'u32']
compose:
- 'vcvtmh_{type[3]}_f16(a) as u16'
- name: "vmlal_high_n_{neon_type[1]}"
doc: "Multiply-add long"
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"]