From 7050d6998275df25a8d21dc7aeb8a44866a98b4a Mon Sep 17 00:00:00 2001 From: David Wood Date: Mon, 20 Apr 2026 10:00:45 +0100 Subject: [PATCH 1/2] gen-arm: use LLVM intrinsics for `f16` to `{i,u}16` Instead of doing an `as {i,u}16` cast, we can use the same LLVM intrinsics as when converting to `{i,u}{32,64}`, which is what Clang does and ensures the intrinsic result matches. --- .../core_arch/src/aarch64/neon/generated.rs | 74 ++++++++-- .../spec/neon/aarch64.spec.yml | 132 ++---------------- 2 files changed, 73 insertions(+), 133 deletions(-) diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs index 5a0bbfa09552..f70595419422 100644 --- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs +++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs @@ -8007,7 +8007,14 @@ pub fn vcvtaq_u64_f64(a: float64x2_t) -> uint64x2_t { #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] pub fn vcvtah_s16_f16(a: f16) -> i16 { - vcvtah_s32_f16(a) as i16 + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtas.i16.f16" + )] + fn _vcvtah_s16_f16(a: f16) -> i16; + } + unsafe { _vcvtah_s16_f16(a) } } #[doc = "Floating-point convert to integer, rounding to nearest with ties to away"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtah_s32_f16)"] @@ -8051,7 +8058,14 @@ pub fn vcvtah_s64_f16(a: f16) -> i64 { #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] pub fn vcvtah_u16_f16(a: f16) -> u16 { - vcvtah_u32_f16(a) as u16 + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtau.i16.f16" + )] + fn _vcvtah_u16_f16(a: f16) -> u16; + } + unsafe { _vcvtah_u16_f16(a) } } #[doc = "Floating-point convert to integer, rounding to nearest with ties to away"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtah_u32_f16)"] @@ -8693,7 +8707,14 @@ pub fn vcvtmq_u64_f64(a: float64x2_t) -> uint64x2_t { #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] pub fn vcvtmh_s16_f16(a: f16) -> i16 { - vcvtmh_s32_f16(a) as i16 + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtms.i16.f16" + )] + fn _vcvtmh_s16_f16(a: f16) -> i16; + } + unsafe { _vcvtmh_s16_f16(a) } } #[doc = "Floating-point convert to integer, rounding towards minus infinity"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmh_s32_f16)"] @@ -8729,7 +8750,7 @@ pub fn vcvtmh_s64_f16(a: f16) -> i64 { } unsafe { _vcvtmh_s64_f16(a) } } -#[doc = "Floating-point convert to integer, rounding towards minus infinity"] +#[doc = "Floating-point convert to unsigned integer, rounding towards minus infinity"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmh_u16_f16)"] #[inline] #[cfg_attr(test, assert_instr(fcvtmu))] @@ -8737,7 +8758,14 @@ pub fn vcvtmh_s64_f16(a: f16) -> i64 { #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] pub fn vcvtmh_u16_f16(a: f16) -> u16 { - vcvtmh_u32_f16(a) as u16 + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtmu.i16.f16" + )] + fn _vcvtmh_u16_f16(a: f16) -> u16; + } + unsafe { _vcvtmh_u16_f16(a) } } #[doc = "Floating-point convert to unsigned integer, rounding towards minus infinity"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmh_u32_f16)"] @@ -9041,7 +9069,14 @@ pub fn vcvtnq_u64_f64(a: float64x2_t) -> uint64x2_t { #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] pub fn vcvtnh_s16_f16(a: f16) -> i16 { - vcvtnh_s32_f16(a) as i16 + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtns.i16.f16" + )] + fn _vcvtnh_s16_f16(a: f16) -> i16; + } + unsafe { _vcvtnh_s16_f16(a) } } #[doc = "Floating-point convert to integer, rounding to nearest with ties to even"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnh_s32_f16)"] @@ -9085,7 +9120,14 @@ pub fn vcvtnh_s64_f16(a: f16) -> i64 { #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] pub fn vcvtnh_u16_f16(a: f16) -> u16 { - vcvtnh_u32_f16(a) as u16 + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtnu.i16.f16" + )] + fn _vcvtnh_u16_f16(a: f16) -> u16; + } + unsafe { _vcvtnh_u16_f16(a) } } #[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to even"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnh_u32_f16)"] @@ -9389,7 +9431,14 @@ pub fn vcvtpq_u64_f64(a: float64x2_t) -> uint64x2_t { #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] pub fn vcvtph_s16_f16(a: f16) -> i16 { - vcvtph_s32_f16(a) as i16 + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtps.i16.f16" + )] + fn _vcvtph_s16_f16(a: f16) -> i16; + } + unsafe { _vcvtph_s16_f16(a) } } #[doc = "Floating-point convert to integer, rounding to plus infinity"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtph_s32_f16)"] @@ -9433,7 +9482,14 @@ pub fn vcvtph_s64_f16(a: f16) -> i64 { #[unstable(feature = "stdarch_neon_f16", issue = "136306")] #[cfg(not(target_arch = "arm64ec"))] pub fn vcvtph_u16_f16(a: f16) -> u16 { - vcvtph_u32_f16(a) as u16 + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.fcvtpu.i16.f16" + )] + fn _vcvtph_u16_f16(a: f16) -> u16; + } + unsafe { _vcvtph_u16_f16(a) } } #[doc = "Floating-point convert to unsigned integer, rounding to plus infinity"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtph_u32_f16)"] diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml index 6950f69731c3..342e66cab4da 100644 --- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml +++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml @@ -1869,9 +1869,9 @@ intrinsics: - *target-not-arm64ec safety: safe types: + - ["f16", "u16", 'h_u16_f16'] - ["f16", "u32", 'h_u32_f16'] - ["f16", "u64", 'h_u64_f16'] - compose: - LLVMLink: name: "vcvta{type[2]}" @@ -1891,6 +1891,7 @@ intrinsics: - *target-not-arm64ec safety: safe types: + - ["f16", "i16", 'h_s16_f16'] - ["f16", "i32", 'h_s32_f16'] - ["f16", "i64", 'h_s64_f16'] compose: @@ -1901,37 +1902,6 @@ intrinsics: - link: "llvm.aarch64.neon.fcvtas.{type[1]}.{type[0]}" arch: aarch64,arm64ec - - - name: "vcvta{type[2]}" - doc: "Floating-point convert to integer, rounding to nearest with ties to away" - arguments: ["a: {type[0]}"] - return_type: "{type[1]}" - attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtas]]}]] - - *neon-fp16 - - *neon-unstable-f16 - - *target-not-arm64ec - safety: safe - types: - - ["f16", "i16", 'h_s16_f16', 's32'] - compose: - - 'vcvtah_{type[3]}_f16(a) as i16' - - - name: "vcvta{type[2]}" - doc: "Floating-point convert to integer, rounding to nearest with ties to away" - arguments: ["a: {type[0]}"] - return_type: "{type[1]}" - attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtau]]}]] - - *neon-fp16 - - *neon-unstable-f16 - - *target-not-arm64ec - safety: safe - types: - - ["f16", "u16", 'h_u16_f16', 'u32'] - compose: - - 'vcvtah_{type[3]}_f16(a) as u16' - - name: "vcvta{type[2]}" doc: "Floating-point convert to integer, rounding to nearest with ties to away" arguments: ["a: {type[0]}"] @@ -2041,6 +2011,7 @@ intrinsics: - *target-not-arm64ec safety: safe types: + - ["f16", "i16", 'h'] - ["f16", "i32", 'h'] - ["f16", "i64", 'h'] compose: @@ -2051,22 +2022,6 @@ intrinsics: - link: "llvm.aarch64.neon.fcvtns.{type[1]}.{type[0]}" arch: aarch64,arm64ec - - name: "vcvtn{type[2]}_{type[1]}_{type[0]}" - doc: "Floating-point convert to integer, rounding to nearest with ties to even" - arguments: ["a: {type[0]}"] - return_type: "{type[1]}" - attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtns]]}]] - - *neon-fp16 - - *neon-unstable-f16 - - *target-not-arm64ec - safety: safe - types: - - ["f16", "i16", 'h', 'i32'] - compose: - - 'vcvtnh_{type[3]}_f16(a) as i16' - - - name: "vcvtn{type[2]}_{type[1]}_{type[0]}" doc: "Floating-point convert to unsigned integer, rounding to nearest with ties to even" arguments: ["a: {type[0]}"] @@ -2078,6 +2033,7 @@ intrinsics: - *target-not-arm64ec safety: safe types: + - ["f16", "u16", 'h'] - ["f16", "u32", 'h'] - ["f16", "u64", 'h'] compose: @@ -2088,21 +2044,6 @@ intrinsics: - link: "llvm.aarch64.neon.fcvtnu.{type[1]}.{type[0]}" arch: aarch64,arm64ec - - name: "vcvtn{type[2]}_{type[1]}_{type[0]}" - doc: "Floating-point convert to unsigned integer, rounding to nearest with ties to even" - arguments: ["a: {type[0]}"] - return_type: "{type[1]}" - attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtnu]]}]] - - *neon-fp16 - - *neon-unstable-f16 - - *target-not-arm64ec - safety: safe - types: - - ["f16", "u16", 'h', 'u32'] - compose: - - 'vcvtnh_{type[3]}_f16(a) as u16' - - name: "vcvtm{neon_type[1].no}_{neon_type[0]}" doc: "Floating-point convert to signed integer, rounding toward minus infinity" arguments: ["a: {neon_type[0]}"] @@ -2393,6 +2334,7 @@ intrinsics: - *target-not-arm64ec safety: safe types: + - ["f16", "i16", 'h'] - ["f16", "i32", 'h'] - ["f16", "i64", 'h'] compose: @@ -2403,21 +2345,6 @@ intrinsics: - link: "llvm.aarch64.neon.fcvtps.{type[1]}.{type[0]}" arch: aarch64,arm64ec - - name: "vcvtp{type[2]}_{type[1]}_{type[0]}" - doc: "Floating-point convert to integer, rounding to plus infinity" - arguments: ["a: {type[0]}"] - return_type: "{type[1]}" - attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtps]]}]] - - *neon-fp16 - - *neon-unstable-f16 - - *target-not-arm64ec - safety: safe - types: - - ["f16", "i16", 'h', 'i32'] - compose: - - 'vcvtph_{type[3]}_f16(a) as i16' - - name: "vcvtp{type[2]}_{type[1]}_{type[0]}" doc: "Floating-point convert to unsigned integer, rounding to plus infinity" arguments: ["a: {type[0]}"] @@ -2429,6 +2356,7 @@ intrinsics: - *target-not-arm64ec safety: safe types: + - ["f16", "u16", 'h'] - ["f16", "u32", 'h'] - ["f16", "u64", 'h'] compose: @@ -2439,21 +2367,6 @@ intrinsics: - link: "llvm.aarch64.neon.fcvtpu.{type[1]}.{type[0]}" arch: aarch64,arm64ec - - name: "vcvtp{type[2]}_{type[1]}_{type[0]}" - doc: "Floating-point convert to unsigned integer, rounding to plus infinity" - arguments: ["a: {type[0]}"] - return_type: "{type[1]}" - attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtpu]]}]] - - *neon-fp16 - - *neon-unstable-f16 - - *target-not-arm64ec - safety: safe - types: - - ["f16", "u16", 'h', 'u32'] - compose: - - 'vcvtph_{type[3]}_f16(a) as u16' - - name: "vdup{neon_type.laneq_nox}" doc: "Set all vector lanes to the same value" arguments: ["a: {neon_type}"] @@ -12034,6 +11947,7 @@ intrinsics: - *target-not-arm64ec safety: safe types: + - ["f16", "i16", 'h'] - ["f16", "i32", 'h'] - ["f16", "i64", 'h'] compose: @@ -12044,22 +11958,6 @@ intrinsics: - link: "llvm.aarch64.neon.fcvtms.{type[1]}.{type[0]}" arch: aarch64,arm64ec - - name: "vcvtm{type[2]}_{type[1]}_{type[0]}" - doc: "Floating-point convert to integer, rounding towards minus infinity" - arguments: ["a: {type[0]}"] - return_type: "{type[1]}" - attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtms]]}]] - - *neon-fp16 - - *neon-unstable-f16 - - *target-not-arm64ec - safety: safe - types: - - ["f16", "i16", 'h', 'i32'] - compose: - - 'vcvtmh_{type[3]}_f16(a) as i16' - - - name: "vcvtm{type[2]}_{type[1]}_{type[0]}" doc: "Floating-point convert to unsigned integer, rounding towards minus infinity" arguments: ["a: {type[0]}"] @@ -12071,6 +11969,7 @@ intrinsics: - *target-not-arm64ec safety: safe types: + - ["f16", "u16", 'h'] - ["f16", "u32", 'h'] - ["f16", "u64", 'h'] compose: @@ -12081,21 +11980,6 @@ intrinsics: - link: "llvm.aarch64.neon.fcvtmu.{type[1]}.{type[0]}" arch: aarch64,arm64ec - - name: "vcvtm{type[2]}_{type[1]}_{type[0]}" - doc: "Floating-point convert to integer, rounding towards minus infinity" - arguments: ["a: {type[0]}"] - return_type: "{type[1]}" - attr: - - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtmu]]}]] - - *neon-fp16 - - *neon-unstable-f16 - - *target-not-arm64ec - safety: safe - types: - - ["f16", "u16", 'h', 'u32'] - compose: - - 'vcvtmh_{type[3]}_f16(a) as u16' - - name: "vmlal_high_n_{neon_type[1]}" doc: "Multiply-add long" arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"] From 7fb11ae763ccbb8857460ef54a972c681c3026ce Mon Sep 17 00:00:00 2001 From: sayantn Date: Thu, 30 Apr 2026 06:12:54 +0530 Subject: [PATCH 2/2] Use latest clang versions from kernel.org --- .../ci/docker/aarch64-unknown-linux-gnu/Dockerfile | 10 ++++++++-- .../ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile | 9 +++++++-- .../ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile | 10 ++++++++-- .../ci/docker/x86_64-unknown-linux-gnu/Dockerfile | 7 +++++++ 4 files changed, 30 insertions(+), 6 deletions(-) diff --git a/library/stdarch/ci/docker/aarch64-unknown-linux-gnu/Dockerfile b/library/stdarch/ci/docker/aarch64-unknown-linux-gnu/Dockerfile index 2768c521ebcc..8435dd3dedd3 100644 --- a/library/stdarch/ci/docker/aarch64-unknown-linux-gnu/Dockerfile +++ b/library/stdarch/ci/docker/aarch64-unknown-linux-gnu/Dockerfile @@ -10,8 +10,14 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ qemu-user \ make \ file \ - clang \ - lld + xz-utils \ + wget + +RUN wget https://mirrors.edge.kernel.org/pub/tools/llvm/files/llvm-22.1.4-x86_64.tar.gz -O llvm.tar.xz +RUN mkdir llvm +RUN tar -xvf llvm.tar.xz --strip-components=1 -C llvm + +ENV PATH="/llvm/bin:$PATH" ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc \ CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64 -cpu max -L /usr/aarch64-linux-gnu" \ diff --git a/library/stdarch/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile b/library/stdarch/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile index f85c6a2592e9..0e8efc64bbff 100644 --- a/library/stdarch/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile +++ b/library/stdarch/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile @@ -9,10 +9,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ qemu-user \ make \ file \ - clang \ curl \ xz-utils \ - lld + wget ENV TOOLCHAIN="arm-gnu-toolchain-14.3.rel1-x86_64-aarch64_be-none-linux-gnu" @@ -21,6 +20,12 @@ RUN curl -L "https://developer.arm.com/-/media/Files/downloads/gnu/14.3.rel1/bin RUN tar -xvf "${TOOLCHAIN}.tar.xz" RUN mkdir /toolchains && mv "./${TOOLCHAIN}" /toolchains +RUN wget https://mirrors.edge.kernel.org/pub/tools/llvm/files/llvm-22.1.4-x86_64.tar.gz -O llvm.tar.xz +RUN mkdir llvm +RUN tar -xvf llvm.tar.xz --strip-components=1 -C llvm + +ENV PATH="/llvm/bin:$PATH" + ENV AARCH64_BE_TOOLCHAIN="/toolchains/${TOOLCHAIN}" ENV AARCH64_BE_LIBC="${AARCH64_BE_TOOLCHAIN}/aarch64_be-none-linux-gnu/libc" diff --git a/library/stdarch/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile b/library/stdarch/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile index 602249c0ece5..c0a4ed3e706d 100644 --- a/library/stdarch/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile +++ b/library/stdarch/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile @@ -10,8 +10,14 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ qemu-user \ make \ file \ - clang \ - lld + wget + +RUN wget https://mirrors.edge.kernel.org/pub/tools/llvm/files/llvm-22.1.4-x86_64.tar.gz -O llvm.tar.xz +RUN mkdir llvm +RUN tar -xvf llvm.tar.xz --strip-components=1 -C llvm + +ENV PATH="/llvm/bin:$PATH" + ENV CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \ CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -cpu max -L /usr/arm-linux-gnueabihf" \ OBJDUMP=arm-linux-gnueabihf-objdump diff --git a/library/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile b/library/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile index 17c6d25215ae..ca6192a38d95 100644 --- a/library/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile +++ b/library/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile @@ -15,6 +15,13 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ RUN wget http://ci-mirrors.rust-lang.org/sde-external-10.8.0-2026-03-15-lin.tar.xz -O sde.tar.xz RUN mkdir intel-sde RUN tar -xJf sde.tar.xz --strip-components=1 -C intel-sde + +RUN wget https://mirrors.edge.kernel.org/pub/tools/llvm/files/llvm-22.1.4-x86_64.tar.gz -O llvm.tar.xz +RUN mkdir llvm +RUN tar -xvf llvm.tar.xz --strip-components=1 -C llvm + +ENV PATH="/llvm/bin:$PATH" + ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="/intel-sde/sde64 \ -cpuid-in /checkout/ci/docker/x86_64-unknown-linux-gnu/cpuid.def \ -rtm-mode full -tsx --"