From 4fbcb031de05becc6cf2984446d0ce9972376757 Mon Sep 17 00:00:00 2001 From: David Wood Date: Thu, 19 Feb 2026 11:03:14 +0000 Subject: [PATCH] cg_llvm: `sve_tuple_{create,get,set}` intrinsics Clang changed to representing tuples of scalable vectors as structs rather than as wide vectors (that is, scalable vector types where the `N` part of the `` type was multiplied by the number of vectors). rustc mirrored this in the initial implementation of scalable vectors. Earlier versions of our patches used the wide vector representation and our intrinsic patches used the legacy `llvm.aarch64.sve.tuple.{create,get,set}{2,3,4}` intrinsics for creating these tuples/getting/setting the vectors, which were only supported due to LLVM's `AutoUpgrade` pass converting these intrinsics into `llvm.vector.insert`. `AutoUpgrade` only supports these legacy intrinsics with the wide vector representation. With the current struct representation, Clang has special handling in codegen for generating `insertvalue`/`extractvalue` instructions for these operations, which must be replicated by rustc's codegen for our intrinsics to use. This patch implements new intrinsics in `core::intrinsics::scalable` (mirroring the structure of `core::intrinsics::simd`) which rustc lowers to the appropriate `insertvalue`/`extractvalue` instructions. --- compiler/rustc_codegen_llvm/src/intrinsic.rs | 112 +++++++++++++++++- .../rustc_hir_analysis/src/check/intrinsic.rs | 6 + compiler/rustc_span/src/symbol.rs | 5 + .../src/intrinsics/{simd.rs => simd/mod.rs} | 2 + library/core/src/intrinsics/simd/scalable.rs | 71 +++++++++++ .../scalable-vectors/tuple-intrinsics.rs | 100 ++++++++++++++++ .../simd/masked-load-store-check-fail.stderr | 4 +- triagebot.toml | 2 +- 8 files changed, 298 insertions(+), 4 deletions(-) rename library/core/src/intrinsics/{simd.rs => simd/mod.rs} (99%) create mode 100644 library/core/src/intrinsics/simd/scalable.rs create mode 100644 tests/codegen-llvm/scalable-vectors/tuple-intrinsics.rs diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs index 39bf4c10dab1..ad2c23c99820 100644 --- a/compiler/rustc_codegen_llvm/src/intrinsic.rs +++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs @@ -3,7 +3,8 @@ use std::{assert_matches, ptr}; use rustc_abi::{ - Align, BackendRepr, ExternAbi, Float, HasDataLayout, Primitive, Size, WrappingRange, + Align, BackendRepr, ExternAbi, Float, HasDataLayout, NumScalableVectors, Primitive, Size, + WrappingRange, }; use rustc_codegen_ssa::base::{compare_simd_types, wants_msvc_seh, wants_wasm_eh}; use rustc_codegen_ssa::common::{IntPredicate, TypeKind}; @@ -605,6 +606,115 @@ fn codegen_intrinsic_call( self.pointercast(val, self.type_ptr()) } + sym::sve_tuple_create2 => { + assert_matches!( + self.layout_of(fn_args.type_at(0)).backend_repr, + BackendRepr::SimdScalableVector { + number_of_vectors: NumScalableVectors(1), + .. + } + ); + let tuple_ty = self.layout_of(fn_args.type_at(1)); + assert_matches!( + tuple_ty.backend_repr, + BackendRepr::SimdScalableVector { + number_of_vectors: NumScalableVectors(2), + .. + } + ); + let ret = self.const_poison(self.backend_type(tuple_ty)); + let ret = self.insert_value(ret, args[0].immediate(), 0); + self.insert_value(ret, args[1].immediate(), 1) + } + + sym::sve_tuple_create3 => { + assert_matches!( + self.layout_of(fn_args.type_at(0)).backend_repr, + BackendRepr::SimdScalableVector { + number_of_vectors: NumScalableVectors(1), + .. + } + ); + let tuple_ty = self.layout_of(fn_args.type_at(1)); + assert_matches!( + tuple_ty.backend_repr, + BackendRepr::SimdScalableVector { + number_of_vectors: NumScalableVectors(3), + .. + } + ); + let ret = self.const_poison(self.backend_type(tuple_ty)); + let ret = self.insert_value(ret, args[0].immediate(), 0); + let ret = self.insert_value(ret, args[1].immediate(), 1); + self.insert_value(ret, args[2].immediate(), 2) + } + + sym::sve_tuple_create4 => { + assert_matches!( + self.layout_of(fn_args.type_at(0)).backend_repr, + BackendRepr::SimdScalableVector { + number_of_vectors: NumScalableVectors(1), + .. + } + ); + let tuple_ty = self.layout_of(fn_args.type_at(1)); + assert_matches!( + tuple_ty.backend_repr, + BackendRepr::SimdScalableVector { + number_of_vectors: NumScalableVectors(4), + .. + } + ); + let ret = self.const_poison(self.backend_type(tuple_ty)); + let ret = self.insert_value(ret, args[0].immediate(), 0); + let ret = self.insert_value(ret, args[1].immediate(), 1); + let ret = self.insert_value(ret, args[2].immediate(), 2); + self.insert_value(ret, args[3].immediate(), 3) + } + + sym::sve_tuple_get => { + assert_matches!( + self.layout_of(fn_args.type_at(0)).backend_repr, + BackendRepr::SimdScalableVector { + number_of_vectors: NumScalableVectors(2 | 3 | 4 | 5 | 6 | 7 | 8), + .. + } + ); + assert_matches!( + self.layout_of(fn_args.type_at(1)).backend_repr, + BackendRepr::SimdScalableVector { + number_of_vectors: NumScalableVectors(1), + .. + } + ); + self.extract_value( + args[0].immediate(), + fn_args.const_at(2).to_leaf().to_i32() as u64, + ) + } + + sym::sve_tuple_set => { + assert_matches!( + self.layout_of(fn_args.type_at(0)).backend_repr, + BackendRepr::SimdScalableVector { + number_of_vectors: NumScalableVectors(2 | 3 | 4 | 5 | 6 | 7 | 8), + .. + } + ); + assert_matches!( + self.layout_of(fn_args.type_at(1)).backend_repr, + BackendRepr::SimdScalableVector { + number_of_vectors: NumScalableVectors(1), + .. + } + ); + self.insert_value( + args[0].immediate(), + args[1].immediate(), + fn_args.const_at(2).to_leaf().to_i32() as u64, + ) + } + _ if name.as_str().starts_with("simd_") => { // Unpack non-power-of-2 #[repr(packed, simd)] arguments. // This gives them the expected layout of a regular #[repr(simd)] vector. diff --git a/compiler/rustc_hir_analysis/src/check/intrinsic.rs b/compiler/rustc_hir_analysis/src/check/intrinsic.rs index b1dc593331c6..ca57921089fa 100644 --- a/compiler/rustc_hir_analysis/src/check/intrinsic.rs +++ b/compiler/rustc_hir_analysis/src/check/intrinsic.rs @@ -783,6 +783,12 @@ pub(crate) fn check_intrinsic_type( sym::simd_shuffle => (3, 0, vec![param(0), param(0), param(1)], param(2)), sym::simd_shuffle_const_generic => (2, 1, vec![param(0), param(0)], param(1)), + sym::sve_tuple_create2 => (2, 0, vec![param(0), param(0)], param(1)), + sym::sve_tuple_create3 => (2, 0, vec![param(0), param(0), param(0)], param(1)), + sym::sve_tuple_create4 => (2, 0, vec![param(0), param(0), param(0), param(0)], param(1)), + sym::sve_tuple_get => (2, 1, vec![param(0)], param(1)), + sym::sve_tuple_set => (2, 1, vec![param(0), param(1)], param(0)), + sym::atomic_cxchg | sym::atomic_cxchgweak => ( 1, 2, diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index 53e2527057bc..8b57421f2045 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -1979,6 +1979,11 @@ suggestion, super_let, supertrait_item_shadowing, + sve_tuple_create2, + sve_tuple_create3, + sve_tuple_create4, + sve_tuple_get, + sve_tuple_set, sym, sync, synthetic, diff --git a/library/core/src/intrinsics/simd.rs b/library/core/src/intrinsics/simd/mod.rs similarity index 99% rename from library/core/src/intrinsics/simd.rs rename to library/core/src/intrinsics/simd/mod.rs index ae86690dc418..084d8a3f1f24 100644 --- a/library/core/src/intrinsics/simd.rs +++ b/library/core/src/intrinsics/simd/mod.rs @@ -2,6 +2,8 @@ //! //! In this module, a "vector" is any `repr(simd)` type. +pub mod scalable; + use crate::marker::ConstParamTy; /// Inserts an element into a vector, returning the updated vector. diff --git a/library/core/src/intrinsics/simd/scalable.rs b/library/core/src/intrinsics/simd/scalable.rs new file mode 100644 index 000000000000..5e7b64f18e53 --- /dev/null +++ b/library/core/src/intrinsics/simd/scalable.rs @@ -0,0 +1,71 @@ +//! Scalable vector compiler intrinsics. +//! +//! In this module, a "vector" is any `#[rustc_scalable_vector]`-annotated type. + +/// Create a tuple of two vectors. +/// +/// `SVecTup` must be a scalable vector tuple (`#[rustc_scalable_vector]`) and `SVec` must be a +/// scalable vector (`#[rustc_scalable_vector(N)]`). `SVecTup` must be a tuple of vectors of +/// type `SVec`. +/// +/// Corresponds to Clang's `__builtin_sve_svcreate2*` builtins. +#[cfg(target_arch = "aarch64")] +#[rustc_nounwind] +#[rustc_intrinsic] +pub unsafe fn sve_tuple_create2(x0: SVec, x1: SVec) -> SVecTup; + +/// Create a tuple of three vectors. +/// +/// `SVecTup` must be a scalable vector tuple (`#[rustc_scalable_vector]`) and `SVec` must be a +/// scalable vector (`#[rustc_scalable_vector(N)]`). `SVecTup` must be a tuple of vectors of +/// type `SVec`. +/// +/// Corresponds to Clang's `__builtin_sve_svcreate3*` builtins. +#[cfg(target_arch = "aarch64")] +#[rustc_intrinsic] +#[rustc_nounwind] +pub unsafe fn sve_tuple_create3(x0: SVec, x1: SVec, x2: SVec) -> SVecTup; + +/// Create a tuple of four vectors. +/// +/// `SVecTup` must be a scalable vector tuple (`#[rustc_scalable_vector]`) and `SVec` must be a +/// scalable vector (`#[rustc_scalable_vector(N)]`). `SVecTup` must be a tuple of vectors of +/// type `SVec`. +/// +/// Corresponds to Clang's `__builtin_sve_svcreate4*` builtins. +#[cfg(target_arch = "aarch64")] +#[rustc_intrinsic] +#[rustc_nounwind] +pub unsafe fn sve_tuple_create4(x0: SVec, x1: SVec, x2: SVec, x3: SVec) -> SVecTup; + +/// Get one vector from a tuple of vectors. +/// +/// `SVecTup` must be a scalable vector tuple (`#[rustc_scalable_vector]`) and `SVec` must be a +/// scalable vector (`#[rustc_scalable_vector(N)]`). `SVecTup` must be a tuple of vectors of +/// type `SVec`. +/// +/// Corresponds to Clang's `__builtin_sve_svget*` builtins. +/// +/// # Safety +/// +/// `IDX` must be in-bounds of the tuple. +#[cfg(target_arch = "aarch64")] +#[rustc_intrinsic] +#[rustc_nounwind] +pub unsafe fn sve_tuple_get(tuple: SVecTup) -> SVec; + +/// Change one vector in a tuple of vectors. +/// +/// `SVecTup` must be a scalable vector tuple (`#[rustc_scalable_vector]`) and `SVec` must be a +/// scalable vector (`#[rustc_scalable_vector(N)]`). `SVecTup` must be a tuple of vectors of +/// type `SVec`. +/// +/// Corresponds to Clang's `__builtin_sve_svset*` builtins. +/// +/// # Safety +/// +/// `IDX` must be in-bounds of the tuple. +#[cfg(target_arch = "aarch64")] +#[rustc_intrinsic] +#[rustc_nounwind] +pub unsafe fn sve_tuple_set(tuple: SVecTup, x: SVec) -> SVecTup; diff --git a/tests/codegen-llvm/scalable-vectors/tuple-intrinsics.rs b/tests/codegen-llvm/scalable-vectors/tuple-intrinsics.rs new file mode 100644 index 000000000000..e19fc40cb9d6 --- /dev/null +++ b/tests/codegen-llvm/scalable-vectors/tuple-intrinsics.rs @@ -0,0 +1,100 @@ +//@ build-pass +//@ only-aarch64 +#![crate_type = "lib"] +#![allow(incomplete_features, internal_features)] +#![feature(abi_unadjusted, core_intrinsics, link_llvm_intrinsics, rustc_attrs)] + +// Tests that tuples of scalable vectors are passed as immediates and that the intrinsics for +// creating/getting/setting tuples of scalable vectors generate the correct assembly + +#[derive(Copy, Clone)] +#[rustc_scalable_vector(4)] +#[allow(non_camel_case_types)] +pub struct svfloat32_t(f32); + +#[derive(Copy, Clone)] +#[rustc_scalable_vector] +#[allow(non_camel_case_types)] +pub struct svfloat32x2_t(svfloat32_t, svfloat32_t); + +#[derive(Copy, Clone)] +#[rustc_scalable_vector] +#[allow(non_camel_case_types)] +pub struct svfloat32x3_t(svfloat32_t, svfloat32_t, svfloat32_t); + +#[derive(Copy, Clone)] +#[rustc_scalable_vector] +#[allow(non_camel_case_types)] +pub struct svfloat32x4_t(svfloat32_t, svfloat32_t, svfloat32_t, svfloat32_t); + +#[inline(never)] +#[target_feature(enable = "sve")] +pub fn svdup_n_f32(op: f32) -> svfloat32_t { + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.dup.x.nxv4f32")] + fn _svdup_n_f32(op: f32) -> svfloat32_t; + } + unsafe { _svdup_n_f32(op) } +} + +// CHECK: define { , } @svcreate2_f32( %x0, %x1) +#[no_mangle] +#[target_feature(enable = "sve")] +pub fn svcreate2_f32(x0: svfloat32_t, x1: svfloat32_t) -> svfloat32x2_t { + // CHECK: %1 = insertvalue { , } poison, %x0, 0 + // CHECK-NEXT: %2 = insertvalue { , } %1, %x1, 1 + unsafe { std::intrinsics::simd::scalable::sve_tuple_create2(x0, x1) } +} + +// CHECK: define { , , } @svcreate3_f32( %x0, %x1, %x2) +#[no_mangle] +#[target_feature(enable = "sve")] +pub fn svcreate3_f32(x0: svfloat32_t, x1: svfloat32_t, x2: svfloat32_t) -> svfloat32x3_t { + // CHECK-LABEL: @_RNvCsk3YxfLN8zWY_6tuples13svcreate3_f32 + // CHECK: %1 = insertvalue { , , } poison, %x0, 0 + // CHECK-NEXT: %2 = insertvalue { , , } %1, %x1, 1 + // CHECK-NEXT: %3 = insertvalue { , , } %2, %x2, 2 + unsafe { std::intrinsics::simd::scalable::sve_tuple_create3(x0, x1, x2) } +} + +// CHECK: define { , , , } @svcreate4_f32( %x0, %x1, %x2, %x3) +#[no_mangle] +#[target_feature(enable = "sve")] +pub fn svcreate4_f32( + x0: svfloat32_t, + x1: svfloat32_t, + x2: svfloat32_t, + x3: svfloat32_t, +) -> svfloat32x4_t { + // CHECK-LABEL: @_RNvCsk3YxfLN8zWY_6tuples13svcreate4_f32 + // CHECK: %1 = insertvalue { , , , } poison, %x0, 0 + // CHECK-NEXT: %2 = insertvalue { , , , } %1, %x1, 1 + // CHECK-NEXT: %3 = insertvalue { , , , } %2, %x2, 2 + // CHECK-NEXT: %4 = insertvalue { , , , } %3, %x3, 3 + unsafe { std::intrinsics::simd::scalable::sve_tuple_create4(x0, x1, x2, x3) } +} + +// CHECK: define @svget2_f32({ , } %tup) +#[no_mangle] +#[target_feature(enable = "sve")] +pub fn svget2_f32(tup: svfloat32x2_t) -> svfloat32_t { + // CHECK: %1 = extractvalue { , } %tup, 0 + unsafe { std::intrinsics::simd::scalable::sve_tuple_get::<_, _, { IDX }>(tup) } +} + +// CHECK: define { , } @svset2_f32({ , } %tup, %x) +#[no_mangle] +#[target_feature(enable = "sve")] +pub fn svset2_f32(tup: svfloat32x2_t, x: svfloat32_t) -> svfloat32x2_t { + // CHECK: %1 = insertvalue { , } %tup, %x, 0 + unsafe { std::intrinsics::simd::scalable::sve_tuple_set::<_, _, { IDX }>(tup, x) } +} + +// This function exists only so there are calls to the generic functions +#[target_feature(enable = "sve")] +pub fn test() { + let x = svdup_n_f32(2f32); + let tup = svcreate2_f32(x, x); + let x = svget2_f32::<0>(tup); + let tup = svset2_f32::<0>(tup, x); +} diff --git a/tests/ui/simd/masked-load-store-check-fail.stderr b/tests/ui/simd/masked-load-store-check-fail.stderr index 4e63d04a3b15..037855c8ec9c 100644 --- a/tests/ui/simd/masked-load-store-check-fail.stderr +++ b/tests/ui/simd/masked-load-store-check-fail.stderr @@ -21,7 +21,7 @@ LL | | Simd::([9; 4]), LL | | ); | |_________^ note: function defined here - --> $SRC_DIR/core/src/intrinsics/simd.rs:LL:COL + --> $SRC_DIR/core/src/intrinsics/simd/mod.rs:LL:COL error[E0308]: mismatched types --> $DIR/masked-load-store-check-fail.rs:25:13 @@ -46,7 +46,7 @@ LL | | default, LL | | ); | |_________^ note: function defined here - --> $SRC_DIR/core/src/intrinsics/simd.rs:LL:COL + --> $SRC_DIR/core/src/intrinsics/simd/mod.rs:LL:COL error: aborting due to 2 previous errors diff --git a/triagebot.toml b/triagebot.toml index f99d700310df..719fe2a75c82 100644 --- a/triagebot.toml +++ b/triagebot.toml @@ -1077,7 +1077,7 @@ cc = ["@Amanieu", "@folkertdev", "@sayantn"] message = "Some changes occurred in `std_detect`" cc = ["@Amanieu", "@folkertdev", "@sayantn"] -[mentions."library/core/src/intrinsics/simd.rs"] +[mentions."library/core/src/intrinsics/simd/mod.rs"] message = """ Some changes occurred to the platform-builtins intrinsics. Make sure the LLVM backend as well as portable-simd gets adapted for the changes.