Merge pull request #2106 from sayantn/cvtf16s16

Use LLVM intrinsics for `f16` to `{i,u}16` intrinsics
2026-05-31 21:47:15 +03:00 · 2026-05-11 08:16:06 +00:00
parent f42d908f99 7fb11ae763
commit 1330bfbb28
6 changed files with 103 additions and 139 deletions
@@ -10,8 +10,14 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
  qemu-user \
  make \
  file \
-  clang \
-  lld
+  xz-utils \
+  wget
+
+RUN wget https://mirrors.edge.kernel.org/pub/tools/llvm/files/llvm-22.1.4-x86_64.tar.gz -O llvm.tar.xz
+RUN mkdir llvm
+RUN tar -xvf llvm.tar.xz --strip-components=1 -C llvm
+
+ENV PATH="/llvm/bin:$PATH"

 ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc \
    CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64 -cpu max -L /usr/aarch64-linux-gnu" \
@@ -9,10 +9,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
  qemu-user \
  make \
  file \
-  clang \
  curl \
  xz-utils \
-  lld
+  wget

 ENV TOOLCHAIN="arm-gnu-toolchain-14.3.rel1-x86_64-aarch64_be-none-linux-gnu"

@@ -21,6 +20,12 @@ RUN curl -L "https://developer.arm.com/-/media/Files/downloads/gnu/14.3.rel1/bin
 RUN tar -xvf "${TOOLCHAIN}.tar.xz"
 RUN mkdir /toolchains && mv "./${TOOLCHAIN}" /toolchains

+RUN wget https://mirrors.edge.kernel.org/pub/tools/llvm/files/llvm-22.1.4-x86_64.tar.gz -O llvm.tar.xz
+RUN mkdir llvm
+RUN tar -xvf llvm.tar.xz --strip-components=1 -C llvm
+
+ENV PATH="/llvm/bin:$PATH"
+
 ENV AARCH64_BE_TOOLCHAIN="/toolchains/${TOOLCHAIN}"
 ENV AARCH64_BE_LIBC="${AARCH64_BE_TOOLCHAIN}/aarch64_be-none-linux-gnu/libc"

@@ -10,8 +10,14 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
  qemu-user \
  make \
  file \
-  clang \
-  lld
+  wget
+
+RUN wget https://mirrors.edge.kernel.org/pub/tools/llvm/files/llvm-22.1.4-x86_64.tar.gz -O llvm.tar.xz
+RUN mkdir llvm
+RUN tar -xvf llvm.tar.xz --strip-components=1 -C llvm
+
+ENV PATH="/llvm/bin:$PATH"
+
 ENV CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \
    CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -cpu max -L /usr/arm-linux-gnueabihf" \
    OBJDUMP=arm-linux-gnueabihf-objdump
@@ -15,6 +15,13 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
 RUN wget http://ci-mirrors.rust-lang.org/sde-external-10.8.0-2026-03-15-lin.tar.xz -O sde.tar.xz
 RUN mkdir intel-sde
 RUN tar -xJf sde.tar.xz --strip-components=1 -C intel-sde
+
+RUN wget https://mirrors.edge.kernel.org/pub/tools/llvm/files/llvm-22.1.4-x86_64.tar.gz -O llvm.tar.xz
+RUN mkdir llvm
+RUN tar -xvf llvm.tar.xz --strip-components=1 -C llvm
+
+ENV PATH="/llvm/bin:$PATH"
+
 ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="/intel-sde/sde64 \
            -cpuid-in /checkout/ci/docker/x86_64-unknown-linux-gnu/cpuid.def \
            -rtm-mode full -tsx --"
@@ -5516,7 +5516,14 @@ pub fn vcvtaq_u64_f64(a: float64x2_t) -> uint64x2_t {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtah_s16_f16(a: f16) -> i16 {
-    vcvtah_s32_f16(a) as i16
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtas.i16.f16"
+        )]
+        fn _vcvtah_s16_f16(a: f16) -> i16;
+    }
+    unsafe { _vcvtah_s16_f16(a) }
 }
 #[doc = "Floating-point convert to integer, rounding to nearest with ties to away"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtah_s32_f16)"]
@@ -5560,7 +5567,14 @@ pub fn vcvtah_s64_f16(a: f16) -> i64 {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtah_u16_f16(a: f16) -> u16 {
-    vcvtah_u32_f16(a) as u16
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtau.i16.f16"
+        )]
+        fn _vcvtah_u16_f16(a: f16) -> u16;
+    }
+    unsafe { _vcvtah_u16_f16(a) }
 }
 #[doc = "Floating-point convert to integer, rounding to nearest with ties to away"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtah_u32_f16)"]
@@ -6202,7 +6216,14 @@ pub fn vcvtmq_u64_f64(a: float64x2_t) -> uint64x2_t {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtmh_s16_f16(a: f16) -> i16 {
-    vcvtmh_s32_f16(a) as i16
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtms.i16.f16"
+        )]
+        fn _vcvtmh_s16_f16(a: f16) -> i16;
+    }
+    unsafe { _vcvtmh_s16_f16(a) }
 }
 #[doc = "Floating-point convert to integer, rounding towards minus infinity"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmh_s32_f16)"]
@@ -6238,7 +6259,7 @@ pub fn vcvtmh_s64_f16(a: f16) -> i64 {
    }
    unsafe { _vcvtmh_s64_f16(a) }
 }
-#[doc = "Floating-point convert to integer, rounding towards minus infinity"]
+#[doc = "Floating-point convert to unsigned integer, rounding towards minus infinity"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmh_u16_f16)"]
 #[inline]
 #[cfg_attr(test, assert_instr(fcvtmu))]
@@ -6246,7 +6267,14 @@ pub fn vcvtmh_s64_f16(a: f16) -> i64 {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtmh_u16_f16(a: f16) -> u16 {
-    vcvtmh_u32_f16(a) as u16
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtmu.i16.f16"
+        )]
+        fn _vcvtmh_u16_f16(a: f16) -> u16;
+    }
+    unsafe { _vcvtmh_u16_f16(a) }
 }
 #[doc = "Floating-point convert to unsigned integer, rounding towards minus infinity"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmh_u32_f16)"]
@@ -6550,7 +6578,14 @@ pub fn vcvtnq_u64_f64(a: float64x2_t) -> uint64x2_t {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtnh_s16_f16(a: f16) -> i16 {
-    vcvtnh_s32_f16(a) as i16
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtns.i16.f16"
+        )]
+        fn _vcvtnh_s16_f16(a: f16) -> i16;
+    }
+    unsafe { _vcvtnh_s16_f16(a) }
 }
 #[doc = "Floating-point convert to integer, rounding to nearest with ties to even"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnh_s32_f16)"]
@@ -6594,7 +6629,14 @@ pub fn vcvtnh_s64_f16(a: f16) -> i64 {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtnh_u16_f16(a: f16) -> u16 {
-    vcvtnh_u32_f16(a) as u16
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtnu.i16.f16"
+        )]
+        fn _vcvtnh_u16_f16(a: f16) -> u16;
+    }
+    unsafe { _vcvtnh_u16_f16(a) }
 }
 #[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to even"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnh_u32_f16)"]
@@ -6898,7 +6940,14 @@ pub fn vcvtpq_u64_f64(a: float64x2_t) -> uint64x2_t {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtph_s16_f16(a: f16) -> i16 {
-    vcvtph_s32_f16(a) as i16
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtps.i16.f16"
+        )]
+        fn _vcvtph_s16_f16(a: f16) -> i16;
+    }
+    unsafe { _vcvtph_s16_f16(a) }
 }
 #[doc = "Floating-point convert to integer, rounding to plus infinity"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtph_s32_f16)"]
@@ -6942,7 +6991,14 @@ pub fn vcvtph_s64_f16(a: f16) -> i64 {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtph_u16_f16(a: f16) -> u16 {
-    vcvtph_u32_f16(a) as u16
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtpu.i16.f16"
+        )]
+        fn _vcvtph_u16_f16(a: f16) -> u16;
+    }
+    unsafe { _vcvtph_u16_f16(a) }
 }
 #[doc = "Floating-point convert to unsigned integer, rounding to plus infinity"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtph_u32_f16)"]
@@ -1767,9 +1767,9 @@ intrinsics:
      - *target-not-arm64ec
    safety: safe
    types:
+      - ["f16", "u16", 'h_u16_f16']
      - ["f16", "u32", 'h_u32_f16']
      - ["f16", "u64", 'h_u64_f16']
-
    compose:
      - LLVMLink:
          name: "vcvta{type[2]}"
@@ -1789,6 +1789,7 @@ intrinsics:
      - *target-not-arm64ec
    safety: safe
    types:
+      - ["f16", "i16", 'h_s16_f16']
      - ["f16", "i32", 'h_s32_f16']
      - ["f16", "i64", 'h_s64_f16']
    compose:
@@ -1799,37 +1800,6 @@ intrinsics:
            - link: "llvm.aarch64.neon.fcvtas.{type[1]}.{type[0]}"
              arch: aarch64,arm64ec

-
-  - name: "vcvta{type[2]}"
-    doc: "Floating-point convert to integer, rounding to nearest with ties to away"
-    arguments: ["a: {type[0]}"]
-    return_type: "{type[1]}"
-    attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtas]]}]]
-      - *neon-fp16
-      - *neon-unstable-f16
-      - *target-not-arm64ec
-    safety: safe
-    types:
-      - ["f16", "i16", 'h_s16_f16', 's32']
-    compose:
-      - 'vcvtah_{type[3]}_f16(a) as i16'
-
-  - name: "vcvta{type[2]}"
-    doc: "Floating-point convert to integer, rounding to nearest with ties to away"
-    arguments: ["a: {type[0]}"]
-    return_type: "{type[1]}"
-    attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtau]]}]]
-      - *neon-fp16
-      - *neon-unstable-f16
-      - *target-not-arm64ec
-    safety: safe
-    types:
-      - ["f16", "u16", 'h_u16_f16', 'u32']
-    compose:
-      - 'vcvtah_{type[3]}_f16(a) as u16'
-
  - name: "vcvta{type[2]}"
    doc: "Floating-point convert to integer, rounding to nearest with ties to away"
    arguments: ["a: {type[0]}"]
@@ -1939,6 +1909,7 @@ intrinsics:
      - *target-not-arm64ec
    safety: safe
    types:
+      - ["f16", "i16", 'h']
      - ["f16", "i32", 'h']
      - ["f16", "i64", 'h']
    compose:
@@ -1949,22 +1920,6 @@ intrinsics:
            - link: "llvm.aarch64.neon.fcvtns.{type[1]}.{type[0]}"
              arch: aarch64,arm64ec

-  - name: "vcvtn{type[2]}_{type[1]}_{type[0]}"
-    doc: "Floating-point convert to integer, rounding to nearest with ties to even"
-    arguments: ["a: {type[0]}"]
-    return_type: "{type[1]}"
-    attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtns]]}]]
-      - *neon-fp16
-      - *neon-unstable-f16
-      - *target-not-arm64ec
-    safety: safe
-    types:
-      - ["f16", "i16", 'h', 'i32']
-    compose:
-      - 'vcvtnh_{type[3]}_f16(a) as i16'
-
-
  - name: "vcvtn{type[2]}_{type[1]}_{type[0]}"
    doc: "Floating-point convert to unsigned integer, rounding to nearest with ties to even"
    arguments: ["a: {type[0]}"]
@@ -1976,6 +1931,7 @@ intrinsics:
      - *target-not-arm64ec
    safety: safe
    types:
+      - ["f16", "u16", 'h']
      - ["f16", "u32", 'h']
      - ["f16", "u64", 'h']
    compose:
@@ -1986,21 +1942,6 @@ intrinsics:
            - link: "llvm.aarch64.neon.fcvtnu.{type[1]}.{type[0]}"
              arch: aarch64,arm64ec

-  - name: "vcvtn{type[2]}_{type[1]}_{type[0]}"
-    doc: "Floating-point convert to unsigned integer, rounding to nearest with ties to even"
-    arguments: ["a: {type[0]}"]
-    return_type: "{type[1]}"
-    attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtnu]]}]]
-      - *neon-fp16
-      - *neon-unstable-f16
-      - *target-not-arm64ec
-    safety: safe
-    types:
-      - ["f16", "u16", 'h', 'u32']
-    compose:
-      - 'vcvtnh_{type[3]}_f16(a) as u16'
-
  - name: "vcvtm{neon_type[1].no}_{neon_type[0]}"
    doc: "Floating-point convert to signed integer, rounding toward minus infinity"
    arguments: ["a: {neon_type[0]}"]
@@ -2291,6 +2232,7 @@ intrinsics:
      - *target-not-arm64ec
    safety: safe
    types:
+      - ["f16", "i16", 'h']
      - ["f16", "i32", 'h']
      - ["f16", "i64", 'h']
    compose:
@@ -2301,21 +2243,6 @@ intrinsics:
            - link: "llvm.aarch64.neon.fcvtps.{type[1]}.{type[0]}"
              arch: aarch64,arm64ec

-  - name: "vcvtp{type[2]}_{type[1]}_{type[0]}"
-    doc: "Floating-point convert to integer, rounding to plus infinity"
-    arguments: ["a: {type[0]}"]
-    return_type: "{type[1]}"
-    attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtps]]}]]
-      - *neon-fp16
-      - *neon-unstable-f16
-      - *target-not-arm64ec
-    safety: safe
-    types:
-      - ["f16", "i16", 'h', 'i32']
-    compose:
-      - 'vcvtph_{type[3]}_f16(a) as i16'
-
  - name: "vcvtp{type[2]}_{type[1]}_{type[0]}"
    doc: "Floating-point convert to unsigned integer, rounding to plus infinity"
    arguments: ["a: {type[0]}"]
@@ -2327,6 +2254,7 @@ intrinsics:
      - *target-not-arm64ec
    safety: safe
    types:
+      - ["f16", "u16", 'h']
      - ["f16", "u32", 'h']
      - ["f16", "u64", 'h']
    compose:
@@ -2337,21 +2265,6 @@ intrinsics:
            - link: "llvm.aarch64.neon.fcvtpu.{type[1]}.{type[0]}"
              arch: aarch64,arm64ec

-  - name: "vcvtp{type[2]}_{type[1]}_{type[0]}"
-    doc: "Floating-point convert to unsigned integer, rounding to plus infinity"
-    arguments: ["a: {type[0]}"]
-    return_type: "{type[1]}"
-    attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtpu]]}]]
-      - *neon-fp16
-      - *neon-unstable-f16
-      - *target-not-arm64ec
-    safety: safe
-    types:
-      - ["f16", "u16", 'h', 'u32']
-    compose:
-      - 'vcvtph_{type[3]}_f16(a) as u16'
-
  - name: "vdup{neon_type.laneq_nox}"
    doc: "Set all vector lanes to the same value"
    arguments: ["a: {neon_type}"]
@@ -11782,6 +11695,7 @@ intrinsics:
      - *target-not-arm64ec
    safety: safe
    types:
+      - ["f16", "i16", 'h']
      - ["f16", "i32", 'h']
      - ["f16", "i64", 'h']
    compose:
@@ -11792,22 +11706,6 @@ intrinsics:
            - link: "llvm.aarch64.neon.fcvtms.{type[1]}.{type[0]}"
              arch: aarch64,arm64ec

-  - name: "vcvtm{type[2]}_{type[1]}_{type[0]}"
-    doc: "Floating-point convert to integer, rounding towards minus infinity"
-    arguments: ["a: {type[0]}"]
-    return_type: "{type[1]}"
-    attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtms]]}]]
-      - *neon-fp16
-      - *neon-unstable-f16
-      - *target-not-arm64ec
-    safety: safe
-    types:
-      - ["f16", "i16", 'h', 'i32']
-    compose:
-      - 'vcvtmh_{type[3]}_f16(a) as i16'
-
-
  - name: "vcvtm{type[2]}_{type[1]}_{type[0]}"
    doc: "Floating-point convert to unsigned integer, rounding towards minus infinity"
    arguments: ["a: {type[0]}"]
@@ -11819,6 +11717,7 @@ intrinsics:
      - *target-not-arm64ec
    safety: safe
    types:
+      - ["f16", "u16", 'h']
      - ["f16", "u32", 'h']
      - ["f16", "u64", 'h']
    compose:
@@ -11829,21 +11728,6 @@ intrinsics:
            - link: "llvm.aarch64.neon.fcvtmu.{type[1]}.{type[0]}"
              arch: aarch64,arm64ec

-  - name: "vcvtm{type[2]}_{type[1]}_{type[0]}"
-    doc: "Floating-point convert to integer, rounding towards minus infinity"
-    arguments: ["a: {type[0]}"]
-    return_type: "{type[1]}"
-    attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtmu]]}]]
-      - *neon-fp16
-      - *neon-unstable-f16
-      - *target-not-arm64ec
-    safety: safe
-    types:
-      - ["f16", "u16", 'h', 'u32']
-    compose:
-      - 'vcvtmh_{type[3]}_f16(a) as u16'
-
  - name: "vmlal_high_n_{neon_type[1]}"
    doc: "Multiply-add long"
    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"]