From 826ab8ba0ec76ba9ebb81f7fe10733fdb1944f40 Mon Sep 17 00:00:00 2001 From: David Wood Date: Thu, 15 Jan 2026 15:53:53 +0000 Subject: [PATCH] core_arch: sve types Add the SVE types (without any of the generated intrinsics) and empty modules where the generated intrinsics will be. Enables the `adt_const_params` crate feature that the generated intrinsics will use. Co-authored-by: Jamie Cunliffe Co-authored-by: Luca Vizzarro Co-authored-by: Adam Gemmell Co-authored-by: Jacob Bramley --- .../crates/core_arch/src/aarch64/mod.rs | 8 + .../core_arch/src/aarch64/sve/generated.rs | 1 + .../crates/core_arch/src/aarch64/sve/mod.rs | 379 ++++++++++++++++++ .../core_arch/src/aarch64/sve2/generated.rs | 1 + .../crates/core_arch/src/aarch64/sve2/mod.rs | 17 + library/stdarch/crates/core_arch/src/lib.rs | 3 +- 6 files changed, 408 insertions(+), 1 deletion(-) create mode 100644 library/stdarch/crates/core_arch/src/aarch64/sve/generated.rs create mode 100644 library/stdarch/crates/core_arch/src/aarch64/sve/mod.rs create mode 100644 library/stdarch/crates/core_arch/src/aarch64/sve2/generated.rs create mode 100644 library/stdarch/crates/core_arch/src/aarch64/sve2/mod.rs diff --git a/library/stdarch/crates/core_arch/src/aarch64/mod.rs b/library/stdarch/crates/core_arch/src/aarch64/mod.rs index d7295659c3c9..9376e04b3b53 100644 --- a/library/stdarch/crates/core_arch/src/aarch64/mod.rs +++ b/library/stdarch/crates/core_arch/src/aarch64/mod.rs @@ -25,6 +25,14 @@ #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub use self::neon::*; +mod sve; +#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")] +pub use self::sve::*; + +mod sve2; +#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")] +pub use self::sve2::*; + mod prefetch; #[unstable(feature = "stdarch_aarch64_prefetch", issue = "117217")] pub use self::prefetch::*; diff --git a/library/stdarch/crates/core_arch/src/aarch64/sve/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/sve/generated.rs new file mode 100644 index 000000000000..8b137891791f --- /dev/null +++ b/library/stdarch/crates/core_arch/src/aarch64/sve/generated.rs @@ -0,0 +1 @@ + diff --git a/library/stdarch/crates/core_arch/src/aarch64/sve/mod.rs b/library/stdarch/crates/core_arch/src/aarch64/sve/mod.rs new file mode 100644 index 000000000000..a3f70ab61c40 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/aarch64/sve/mod.rs @@ -0,0 +1,379 @@ +//! SVE intrinsics + +#![allow(non_camel_case_types)] + +// `generated.rs` has a `super::*` and this import is for that +use crate::intrinsics::{simd::*, *}; + +#[rustfmt::skip] +mod generated; +#[rustfmt::skip] +#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")] +pub use self::generated::*; + +use crate::{marker::ConstParamTy, mem::transmute}; + +pub(super) trait AsUnsigned { + type Unsigned; + unsafe fn as_unsigned(self) -> Self::Unsigned; +} + +pub(super) trait AsSigned { + type Signed; + unsafe fn as_signed(self) -> Self::Signed; +} + +/// Same as `Into` but with into being unsafe so that it can have the required `target_feature` +pub(super) trait SveInto: Sized { + unsafe fn sve_into(self) -> T; +} + +macro_rules! impl_sve_type { + ($(($v:vis, $elem_type:ty, $name:ident, $elt:literal))*) => ($( + #[doc = concat!("Scalable vector of type ", stringify!($elem_type))] + #[derive(Clone, Copy, Debug)] + #[rustc_scalable_vector($elt)] + #[unstable(feature = "stdarch_aarch64_sve", issue = "145052")] + $v struct $name($elem_type); + )*) +} + +macro_rules! impl_sve_tuple_type { + ($(($v:vis, $vec_type:ty, $elt:tt, $name:ident))*) => ($( + impl_sve_tuple_type!(@ ($v, $vec_type, $elt, $name)); + )*); + (@ ($v:vis, $vec_type:ty, 2, $name:ident)) => ( + #[doc = concat!("Two-element tuple of scalable vectors of type ", stringify!($vec_type))] + #[derive(Clone, Copy, Debug)] + #[rustc_scalable_vector] + #[unstable(feature = "stdarch_aarch64_sve", issue = "145052")] + $v struct $name($vec_type, $vec_type); + ); + (@ ($v:vis, $vec_type:ty, 3, $name:ident)) => ( + #[doc = concat!("Three-element tuple of scalable vectors of type ", stringify!($vec_type))] + #[derive(Clone, Copy, Debug)] + #[rustc_scalable_vector] + #[unstable(feature = "stdarch_aarch64_sve", issue = "145052")] + $v struct $name($vec_type, $vec_type, $vec_type); + ); + (@ ($v:vis, $vec_type:ty, 4, $name:ident)) => ( + #[doc = concat!("Four-element tuple of scalable vectors of type ", stringify!($vec_type))] + #[derive(Clone, Copy, Debug)] + #[rustc_scalable_vector] + #[unstable(feature = "stdarch_aarch64_sve", issue = "145052")] + $v struct $name($vec_type, $vec_type, $vec_type, $vec_type); + ); +} + +macro_rules! impl_sign_conversions_sv { + ($(($signed:ty, $unsigned:ty))*) => ($( + impl AsUnsigned for $signed { + type Unsigned = $unsigned; + + #[inline] + #[target_feature(enable = "sve")] + unsafe fn as_unsigned(self) -> $unsigned { + transmute_unchecked(self) + } + } + + impl AsSigned for $unsigned { + type Signed = $signed; + + #[inline] + #[target_feature(enable = "sve")] + unsafe fn as_signed(self) -> $signed { + transmute_unchecked(self) + } + } + )*) +} + +macro_rules! impl_sign_conversions { + ($(($signed:ty, $unsigned:ty))*) => ($( + impl AsUnsigned for $signed { + type Unsigned = $unsigned; + + #[inline] + #[target_feature(enable = "sve")] + unsafe fn as_unsigned(self) -> $unsigned { + transmute(self) + } + } + + impl AsSigned for $unsigned { + type Signed = $signed; + + #[inline] + #[target_feature(enable = "sve")] + unsafe fn as_signed(self) -> $signed { + transmute(self) + } + } + )*) +} + +/// LLVM requires the predicate lane count to be the same as the lane count +/// it's working with. However the ACLE only defines one bool type and the +/// instruction set doesn't have this distinction. As a result we have to +/// create these internal types so we can match the LLVM signature. Each of +/// these internal types can be converted to the public `svbool_t` type and +/// the `svbool_t` type can be converted into these. +macro_rules! impl_internal_sve_predicate { + ($(($name:ident, $elt:literal))*) => ($( + impl_sve_type! { + (pub(super), bool, $name, $elt) + } + + impl SveInto for $name { + #[inline] + #[target_feature(enable = "sve")] + unsafe fn sve_into(self) -> svbool_t { + #[allow(improper_ctypes)] + unsafe extern "C" { + #[cfg_attr( + target_arch = "aarch64", + link_name = concat!("llvm.aarch64.sve.convert.to.svbool.nxv", $elt, "i1") + )] + fn convert_to_svbool(b: $name) -> svbool_t; + } + unsafe { convert_to_svbool(self) } + } + } + + #[unstable(feature = "stdarch_aarch64_sve", issue = "145052")] + impl SveInto<$name> for svbool_t { + #[inline] + #[target_feature(enable = "sve")] + unsafe fn sve_into(self) -> $name { + #[allow(improper_ctypes)] + unsafe extern "C" { + #[cfg_attr( + target_arch = "aarch64", + link_name = concat!("llvm.aarch64.sve.convert.from.svbool.nxv", $elt, "i1") + )] + fn convert_from_svbool(b: svbool_t) -> $name; + } + unsafe { convert_from_svbool(self) } + } + } + )*) +} + +impl_sve_type! { + (pub, bool, svbool_t, 16) + + (pub, i8, svint8_t, 16) + (pub, u8, svuint8_t, 16) + + (pub, i16, svint16_t, 8) + (pub, u16, svuint16_t, 8) + (pub, f32, svfloat32_t, 4) + (pub, i32, svint32_t, 4) + (pub, u32, svuint32_t, 4) + (pub, f64, svfloat64_t, 2) + (pub, i64, svint64_t, 2) + (pub, u64, svuint64_t, 2) + + // Internal types: + (pub(super), i8, nxv2i8, 2) + (pub(super), i8, nxv4i8, 4) + (pub(super), i8, nxv8i8, 8) + + (pub(super), i16, nxv2i16, 2) + (pub(super), i16, nxv4i16, 4) + + (pub(super), i32, nxv2i32, 2) + + (pub(super), u8, nxv2u8, 2) + (pub(super), u8, nxv4u8, 4) + (pub(super), u8, nxv8u8, 8) + + (pub(super), u16, nxv2u16, 2) + (pub(super), u16, nxv4u16, 4) + + (pub(super), u32, nxv2u32, 2) +} + +impl_sve_tuple_type! { + (pub, svint8_t, 2, svint8x2_t) + (pub, svuint8_t, 2, svuint8x2_t) + (pub, svint16_t, 2, svint16x2_t) + (pub, svuint16_t, 2, svuint16x2_t) + (pub, svfloat32_t, 2, svfloat32x2_t) + (pub, svint32_t, 2, svint32x2_t) + (pub, svuint32_t, 2, svuint32x2_t) + (pub, svfloat64_t, 2, svfloat64x2_t) + (pub, svint64_t, 2, svint64x2_t) + (pub, svuint64_t, 2, svuint64x2_t) + + (pub, svint8_t, 3, svint8x3_t) + (pub, svuint8_t, 3, svuint8x3_t) + (pub, svint16_t, 3, svint16x3_t) + (pub, svuint16_t, 3, svuint16x3_t) + (pub, svfloat32_t, 3, svfloat32x3_t) + (pub, svint32_t, 3, svint32x3_t) + (pub, svuint32_t, 3, svuint32x3_t) + (pub, svfloat64_t, 3, svfloat64x3_t) + (pub, svint64_t, 3, svint64x3_t) + (pub, svuint64_t, 3, svuint64x3_t) + + (pub, svint8_t, 4, svint8x4_t) + (pub, svuint8_t, 4, svuint8x4_t) + (pub, svint16_t, 4, svint16x4_t) + (pub, svuint16_t, 4, svuint16x4_t) + (pub, svfloat32_t, 4, svfloat32x4_t) + (pub, svint32_t, 4, svint32x4_t) + (pub, svuint32_t, 4, svuint32x4_t) + (pub, svfloat64_t, 4, svfloat64x4_t) + (pub, svint64_t, 4, svint64x4_t) + (pub, svuint64_t, 4, svuint64x4_t) +} + +impl_sign_conversions! { + (i8, u8) + (i16, u16) + (i32, u32) + (i64, u64) + (*const i8, *const u8) + (*const i16, *const u16) + (*const i32, *const u32) + (*const i64, *const u64) + (*mut i8, *mut u8) + (*mut i16, *mut u16) + (*mut i32, *mut u32) + (*mut i64, *mut u64) +} + +impl_sign_conversions_sv! { + (svint8_t, svuint8_t) + (svint16_t, svuint16_t) + (svint32_t, svuint32_t) + (svint64_t, svuint64_t) + + (svint8x2_t, svuint8x2_t) + (svint16x2_t, svuint16x2_t) + (svint32x2_t, svuint32x2_t) + (svint64x2_t, svuint64x2_t) + + (svint8x3_t, svuint8x3_t) + (svint16x3_t, svuint16x3_t) + (svint32x3_t, svuint32x3_t) + (svint64x3_t, svuint64x3_t) + + (svint8x4_t, svuint8x4_t) + (svint16x4_t, svuint16x4_t) + (svint32x4_t, svuint32x4_t) + (svint64x4_t, svuint64x4_t) + + // Internal types: + (nxv2i8, nxv2u8) + (nxv4i8, nxv4u8) + (nxv8i8, nxv8u8) + + (nxv2i16, nxv2u16) + (nxv4i16, nxv4u16) + + (nxv2i32, nxv2u32) +} + +impl_internal_sve_predicate! { + (svbool2_t, 2) + (svbool4_t, 4) + (svbool8_t, 8) +} + +/// Patterns returned by a `PTRUE` +#[repr(i32)] +#[allow(non_camel_case_types)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, ConstParamTy)] +#[non_exhaustive] +#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")] +pub enum svpattern { + /// Activate the largest power-of-two number of elements that is less than the vector length + SV_POW2 = 0, + /// Activate the first element + SV_VL1 = 1, + /// Activate the first two elements + SV_VL2 = 2, + /// Activate the first three elements + SV_VL3 = 3, + /// Activate the first four elements + SV_VL4 = 4, + /// Activate the first five elements + SV_VL5 = 5, + /// Activate the first six elements + SV_VL6 = 6, + /// Activate the first seven elements + SV_VL7 = 7, + /// Activate the first eight elements + SV_VL8 = 8, + /// Activate the first sixteen elements + SV_VL16 = 9, + /// Activate the first thirty-two elements + SV_VL32 = 10, + /// Activate the first sixty-four elements + SV_VL64 = 11, + /// Activate the first one-hundred-and-twenty-eight elements + SV_VL128 = 12, + /// Activate the first two-hundred-and-fifty-six elements + SV_VL256 = 13, + /// Activate the largest multiple-of-four number of elements that is less than the vector length + SV_MUL4 = 29, + /// Activate the largest multiple-of-three number of elements that is less than the vector + /// length + SV_MUL3 = 30, + /// Activate all elements + SV_ALL = 31, +} + +/// Addressing mode for prefetch intrinsics - allows the specification of the expected access +/// kind (read or write), the cache level to load the data, the data retention policy +/// (temporal or streaming) +#[repr(i32)] +#[allow(non_camel_case_types)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, ConstParamTy)] +#[non_exhaustive] +#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")] +pub enum svprfop { + /// Temporal fetch of the addressed location for reading to the L1 cache (i.e. allocate in + /// cache normally) + SV_PLDL1KEEP = 0, + /// Streaming fetch of the addressed location for reading to the L1 cache (i.e. memory only + /// used once) + SV_PLDL1STRM = 1, + /// Temporal fetch of the addressed location for reading to the L2 cache (i.e. allocate in + /// cache normally) + SV_PLDL2KEEP = 2, + /// Streaming fetch of the addressed location for reading to the L2 cache (i.e. memory only + /// used once) + SV_PLDL2STRM = 3, + /// Temporal fetch of the addressed location for reading to the L3 cache (i.e. allocate in + /// cache normally) + SV_PLDL3KEEP = 4, + /// Streaming fetch of the addressed location for reading to the L3 cache (i.e. memory only + /// used once) + SV_PLDL3STRM = 5, + /// Temporal fetch of the addressed location for writing to the L1 cache (i.e. allocate in + /// cache normally) + SV_PSTL1KEEP = 8, + /// Temporal fetch of the addressed location for writing to the L1 cache (i.e. memory only + /// used once) + SV_PSTL1STRM = 9, + /// Temporal fetch of the addressed location for writing to the L2 cache (i.e. allocate in + /// cache normally) + SV_PSTL2KEEP = 10, + /// Temporal fetch of the addressed location for writing to the L2 cache (i.e. memory only + /// used once) + SV_PSTL2STRM = 11, + /// Temporal fetch of the addressed location for writing to the L3 cache (i.e. allocate in + /// cache normally) + SV_PSTL3KEEP = 12, + /// Temporal fetch of the addressed location for writing to the L3 cache (i.e. memory only + /// used once) + SV_PSTL3STRM = 13, +} + +#[cfg(test)] +#[path = "ld_st_tests_aarch64.rs"] +mod ld_st_tests; diff --git a/library/stdarch/crates/core_arch/src/aarch64/sve2/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/sve2/generated.rs new file mode 100644 index 000000000000..8b137891791f --- /dev/null +++ b/library/stdarch/crates/core_arch/src/aarch64/sve2/generated.rs @@ -0,0 +1 @@ + diff --git a/library/stdarch/crates/core_arch/src/aarch64/sve2/mod.rs b/library/stdarch/crates/core_arch/src/aarch64/sve2/mod.rs new file mode 100644 index 000000000000..acf907021457 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/aarch64/sve2/mod.rs @@ -0,0 +1,17 @@ +//! SVE2 intrinsics + +#![allow(non_camel_case_types)] + +// `generated.rs` has a `super::*` and this import is for that +use super::sve::*; +use crate::intrinsics::*; + +#[rustfmt::skip] +mod generated; +#[rustfmt::skip] +#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")] +pub use self::generated::*; + +#[cfg(test)] +#[path = "ld_st_tests_aarch64.rs"] +mod ld_st_tests; diff --git a/library/stdarch/crates/core_arch/src/lib.rs b/library/stdarch/crates/core_arch/src/lib.rs index 9255994e5ee8..f2f19eba2670 100644 --- a/library/stdarch/crates/core_arch/src/lib.rs +++ b/library/stdarch/crates/core_arch/src/lib.rs @@ -40,7 +40,8 @@ const_cmp, const_eval_select, maybe_uninit_as_bytes, - movrs_target_feature + movrs_target_feature, + min_adt_const_params )] #![cfg_attr(test, feature(test, abi_vectorcall, stdarch_internal))] #![deny(clippy::missing_inline_in_public_items)]