core_arch: sve types

Add the SVE types (without any of the generated intrinsics) and empty
modules where the generated intrinsics will be. Enables the
`adt_const_params` crate feature that the generated intrinsics will use.

Co-authored-by: Jamie Cunliffe <Jamie.Cunliffe@arm.com>
Co-authored-by: Luca Vizzarro <Luca.Vizzarro@arm.com>
Co-authored-by: Adam Gemmell <adam.gemmell@arm.com>
Co-authored-by: Jacob Bramley <jacob.bramley@arm.com>
This commit is contained in:
David Wood
2026-01-15 15:53:53 +00:00
parent 2120148275
commit 826ab8ba0e
6 changed files with 408 additions and 1 deletions
@@ -25,6 +25,14 @@
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
pub use self::neon::*;
mod sve;
#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
pub use self::sve::*;
mod sve2;
#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
pub use self::sve2::*;
mod prefetch;
#[unstable(feature = "stdarch_aarch64_prefetch", issue = "117217")]
pub use self::prefetch::*;
@@ -0,0 +1 @@
@@ -0,0 +1,379 @@
//! SVE intrinsics
#![allow(non_camel_case_types)]
// `generated.rs` has a `super::*` and this import is for that
use crate::intrinsics::{simd::*, *};
#[rustfmt::skip]
mod generated;
#[rustfmt::skip]
#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
pub use self::generated::*;
use crate::{marker::ConstParamTy, mem::transmute};
pub(super) trait AsUnsigned {
type Unsigned;
unsafe fn as_unsigned(self) -> Self::Unsigned;
}
pub(super) trait AsSigned {
type Signed;
unsafe fn as_signed(self) -> Self::Signed;
}
/// Same as `Into` but with into being unsafe so that it can have the required `target_feature`
pub(super) trait SveInto<T>: Sized {
unsafe fn sve_into(self) -> T;
}
macro_rules! impl_sve_type {
($(($v:vis, $elem_type:ty, $name:ident, $elt:literal))*) => ($(
#[doc = concat!("Scalable vector of type ", stringify!($elem_type))]
#[derive(Clone, Copy, Debug)]
#[rustc_scalable_vector($elt)]
#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
$v struct $name($elem_type);
)*)
}
macro_rules! impl_sve_tuple_type {
($(($v:vis, $vec_type:ty, $elt:tt, $name:ident))*) => ($(
impl_sve_tuple_type!(@ ($v, $vec_type, $elt, $name));
)*);
(@ ($v:vis, $vec_type:ty, 2, $name:ident)) => (
#[doc = concat!("Two-element tuple of scalable vectors of type ", stringify!($vec_type))]
#[derive(Clone, Copy, Debug)]
#[rustc_scalable_vector]
#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
$v struct $name($vec_type, $vec_type);
);
(@ ($v:vis, $vec_type:ty, 3, $name:ident)) => (
#[doc = concat!("Three-element tuple of scalable vectors of type ", stringify!($vec_type))]
#[derive(Clone, Copy, Debug)]
#[rustc_scalable_vector]
#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
$v struct $name($vec_type, $vec_type, $vec_type);
);
(@ ($v:vis, $vec_type:ty, 4, $name:ident)) => (
#[doc = concat!("Four-element tuple of scalable vectors of type ", stringify!($vec_type))]
#[derive(Clone, Copy, Debug)]
#[rustc_scalable_vector]
#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
$v struct $name($vec_type, $vec_type, $vec_type, $vec_type);
);
}
macro_rules! impl_sign_conversions_sv {
($(($signed:ty, $unsigned:ty))*) => ($(
impl AsUnsigned for $signed {
type Unsigned = $unsigned;
#[inline]
#[target_feature(enable = "sve")]
unsafe fn as_unsigned(self) -> $unsigned {
transmute_unchecked(self)
}
}
impl AsSigned for $unsigned {
type Signed = $signed;
#[inline]
#[target_feature(enable = "sve")]
unsafe fn as_signed(self) -> $signed {
transmute_unchecked(self)
}
}
)*)
}
macro_rules! impl_sign_conversions {
($(($signed:ty, $unsigned:ty))*) => ($(
impl AsUnsigned for $signed {
type Unsigned = $unsigned;
#[inline]
#[target_feature(enable = "sve")]
unsafe fn as_unsigned(self) -> $unsigned {
transmute(self)
}
}
impl AsSigned for $unsigned {
type Signed = $signed;
#[inline]
#[target_feature(enable = "sve")]
unsafe fn as_signed(self) -> $signed {
transmute(self)
}
}
)*)
}
/// LLVM requires the predicate lane count to be the same as the lane count
/// it's working with. However the ACLE only defines one bool type and the
/// instruction set doesn't have this distinction. As a result we have to
/// create these internal types so we can match the LLVM signature. Each of
/// these internal types can be converted to the public `svbool_t` type and
/// the `svbool_t` type can be converted into these.
macro_rules! impl_internal_sve_predicate {
($(($name:ident, $elt:literal))*) => ($(
impl_sve_type! {
(pub(super), bool, $name, $elt)
}
impl SveInto<svbool_t> for $name {
#[inline]
#[target_feature(enable = "sve")]
unsafe fn sve_into(self) -> svbool_t {
#[allow(improper_ctypes)]
unsafe extern "C" {
#[cfg_attr(
target_arch = "aarch64",
link_name = concat!("llvm.aarch64.sve.convert.to.svbool.nxv", $elt, "i1")
)]
fn convert_to_svbool(b: $name) -> svbool_t;
}
unsafe { convert_to_svbool(self) }
}
}
#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
impl SveInto<$name> for svbool_t {
#[inline]
#[target_feature(enable = "sve")]
unsafe fn sve_into(self) -> $name {
#[allow(improper_ctypes)]
unsafe extern "C" {
#[cfg_attr(
target_arch = "aarch64",
link_name = concat!("llvm.aarch64.sve.convert.from.svbool.nxv", $elt, "i1")
)]
fn convert_from_svbool(b: svbool_t) -> $name;
}
unsafe { convert_from_svbool(self) }
}
}
)*)
}
impl_sve_type! {
(pub, bool, svbool_t, 16)
(pub, i8, svint8_t, 16)
(pub, u8, svuint8_t, 16)
(pub, i16, svint16_t, 8)
(pub, u16, svuint16_t, 8)
(pub, f32, svfloat32_t, 4)
(pub, i32, svint32_t, 4)
(pub, u32, svuint32_t, 4)
(pub, f64, svfloat64_t, 2)
(pub, i64, svint64_t, 2)
(pub, u64, svuint64_t, 2)
// Internal types:
(pub(super), i8, nxv2i8, 2)
(pub(super), i8, nxv4i8, 4)
(pub(super), i8, nxv8i8, 8)
(pub(super), i16, nxv2i16, 2)
(pub(super), i16, nxv4i16, 4)
(pub(super), i32, nxv2i32, 2)
(pub(super), u8, nxv2u8, 2)
(pub(super), u8, nxv4u8, 4)
(pub(super), u8, nxv8u8, 8)
(pub(super), u16, nxv2u16, 2)
(pub(super), u16, nxv4u16, 4)
(pub(super), u32, nxv2u32, 2)
}
impl_sve_tuple_type! {
(pub, svint8_t, 2, svint8x2_t)
(pub, svuint8_t, 2, svuint8x2_t)
(pub, svint16_t, 2, svint16x2_t)
(pub, svuint16_t, 2, svuint16x2_t)
(pub, svfloat32_t, 2, svfloat32x2_t)
(pub, svint32_t, 2, svint32x2_t)
(pub, svuint32_t, 2, svuint32x2_t)
(pub, svfloat64_t, 2, svfloat64x2_t)
(pub, svint64_t, 2, svint64x2_t)
(pub, svuint64_t, 2, svuint64x2_t)
(pub, svint8_t, 3, svint8x3_t)
(pub, svuint8_t, 3, svuint8x3_t)
(pub, svint16_t, 3, svint16x3_t)
(pub, svuint16_t, 3, svuint16x3_t)
(pub, svfloat32_t, 3, svfloat32x3_t)
(pub, svint32_t, 3, svint32x3_t)
(pub, svuint32_t, 3, svuint32x3_t)
(pub, svfloat64_t, 3, svfloat64x3_t)
(pub, svint64_t, 3, svint64x3_t)
(pub, svuint64_t, 3, svuint64x3_t)
(pub, svint8_t, 4, svint8x4_t)
(pub, svuint8_t, 4, svuint8x4_t)
(pub, svint16_t, 4, svint16x4_t)
(pub, svuint16_t, 4, svuint16x4_t)
(pub, svfloat32_t, 4, svfloat32x4_t)
(pub, svint32_t, 4, svint32x4_t)
(pub, svuint32_t, 4, svuint32x4_t)
(pub, svfloat64_t, 4, svfloat64x4_t)
(pub, svint64_t, 4, svint64x4_t)
(pub, svuint64_t, 4, svuint64x4_t)
}
impl_sign_conversions! {
(i8, u8)
(i16, u16)
(i32, u32)
(i64, u64)
(*const i8, *const u8)
(*const i16, *const u16)
(*const i32, *const u32)
(*const i64, *const u64)
(*mut i8, *mut u8)
(*mut i16, *mut u16)
(*mut i32, *mut u32)
(*mut i64, *mut u64)
}
impl_sign_conversions_sv! {
(svint8_t, svuint8_t)
(svint16_t, svuint16_t)
(svint32_t, svuint32_t)
(svint64_t, svuint64_t)
(svint8x2_t, svuint8x2_t)
(svint16x2_t, svuint16x2_t)
(svint32x2_t, svuint32x2_t)
(svint64x2_t, svuint64x2_t)
(svint8x3_t, svuint8x3_t)
(svint16x3_t, svuint16x3_t)
(svint32x3_t, svuint32x3_t)
(svint64x3_t, svuint64x3_t)
(svint8x4_t, svuint8x4_t)
(svint16x4_t, svuint16x4_t)
(svint32x4_t, svuint32x4_t)
(svint64x4_t, svuint64x4_t)
// Internal types:
(nxv2i8, nxv2u8)
(nxv4i8, nxv4u8)
(nxv8i8, nxv8u8)
(nxv2i16, nxv2u16)
(nxv4i16, nxv4u16)
(nxv2i32, nxv2u32)
}
impl_internal_sve_predicate! {
(svbool2_t, 2)
(svbool4_t, 4)
(svbool8_t, 8)
}
/// Patterns returned by a `PTRUE`
#[repr(i32)]
#[allow(non_camel_case_types)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, ConstParamTy)]
#[non_exhaustive]
#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
pub enum svpattern {
/// Activate the largest power-of-two number of elements that is less than the vector length
SV_POW2 = 0,
/// Activate the first element
SV_VL1 = 1,
/// Activate the first two elements
SV_VL2 = 2,
/// Activate the first three elements
SV_VL3 = 3,
/// Activate the first four elements
SV_VL4 = 4,
/// Activate the first five elements
SV_VL5 = 5,
/// Activate the first six elements
SV_VL6 = 6,
/// Activate the first seven elements
SV_VL7 = 7,
/// Activate the first eight elements
SV_VL8 = 8,
/// Activate the first sixteen elements
SV_VL16 = 9,
/// Activate the first thirty-two elements
SV_VL32 = 10,
/// Activate the first sixty-four elements
SV_VL64 = 11,
/// Activate the first one-hundred-and-twenty-eight elements
SV_VL128 = 12,
/// Activate the first two-hundred-and-fifty-six elements
SV_VL256 = 13,
/// Activate the largest multiple-of-four number of elements that is less than the vector length
SV_MUL4 = 29,
/// Activate the largest multiple-of-three number of elements that is less than the vector
/// length
SV_MUL3 = 30,
/// Activate all elements
SV_ALL = 31,
}
/// Addressing mode for prefetch intrinsics - allows the specification of the expected access
/// kind (read or write), the cache level to load the data, the data retention policy
/// (temporal or streaming)
#[repr(i32)]
#[allow(non_camel_case_types)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, ConstParamTy)]
#[non_exhaustive]
#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
pub enum svprfop {
/// Temporal fetch of the addressed location for reading to the L1 cache (i.e. allocate in
/// cache normally)
SV_PLDL1KEEP = 0,
/// Streaming fetch of the addressed location for reading to the L1 cache (i.e. memory only
/// used once)
SV_PLDL1STRM = 1,
/// Temporal fetch of the addressed location for reading to the L2 cache (i.e. allocate in
/// cache normally)
SV_PLDL2KEEP = 2,
/// Streaming fetch of the addressed location for reading to the L2 cache (i.e. memory only
/// used once)
SV_PLDL2STRM = 3,
/// Temporal fetch of the addressed location for reading to the L3 cache (i.e. allocate in
/// cache normally)
SV_PLDL3KEEP = 4,
/// Streaming fetch of the addressed location for reading to the L3 cache (i.e. memory only
/// used once)
SV_PLDL3STRM = 5,
/// Temporal fetch of the addressed location for writing to the L1 cache (i.e. allocate in
/// cache normally)
SV_PSTL1KEEP = 8,
/// Temporal fetch of the addressed location for writing to the L1 cache (i.e. memory only
/// used once)
SV_PSTL1STRM = 9,
/// Temporal fetch of the addressed location for writing to the L2 cache (i.e. allocate in
/// cache normally)
SV_PSTL2KEEP = 10,
/// Temporal fetch of the addressed location for writing to the L2 cache (i.e. memory only
/// used once)
SV_PSTL2STRM = 11,
/// Temporal fetch of the addressed location for writing to the L3 cache (i.e. allocate in
/// cache normally)
SV_PSTL3KEEP = 12,
/// Temporal fetch of the addressed location for writing to the L3 cache (i.e. memory only
/// used once)
SV_PSTL3STRM = 13,
}
#[cfg(test)]
#[path = "ld_st_tests_aarch64.rs"]
mod ld_st_tests;
@@ -0,0 +1,17 @@
//! SVE2 intrinsics
#![allow(non_camel_case_types)]
// `generated.rs` has a `super::*` and this import is for that
use super::sve::*;
use crate::intrinsics::*;
#[rustfmt::skip]
mod generated;
#[rustfmt::skip]
#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
pub use self::generated::*;
#[cfg(test)]
#[path = "ld_st_tests_aarch64.rs"]
mod ld_st_tests;
+2 -1
View File
@@ -40,7 +40,8 @@
const_cmp,
const_eval_select,
maybe_uninit_as_bytes,
movrs_target_feature
movrs_target_feature,
min_adt_const_params
)]
#![cfg_attr(test, feature(test, abi_vectorcall, stdarch_internal))]
#![deny(clippy::missing_inline_in_public_items)]