From f0a751d7268d0511d8097f00c3e28d58626f572f Mon Sep 17 00:00:00 2001 From: Lilith River Date: Sun, 26 Apr 2026 18:38:04 -0600 Subject: [PATCH] std_detect: support detecting more features on aarch64 Windows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wire IsProcessorFeaturePresent for the PF_ARM_* constants exposed in Windows SDK 26100 (Win11 24H2): fp16 PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE (67) i8mm PF_ARM_V82_I8MM_INSTRUCTIONS_AVAILABLE (66) bf16 PF_ARM_V86_BF16_INSTRUCTIONS_AVAILABLE (68) sha3 PF_ARM_SHA3 (64) AND PF_ARM_SHA512 (65) lse2 PF_ARM_LSE2_AVAILABLE (62) f32mm PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE (58) f64mm PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE (59) Also derive `rdm` from FEAT_DotProd. There is no PF_ARM_RDM_* constant; FEAT_DotProd is an optional v8.2-A feature only present on cores that implement at least v8.1-A, and v8.1-A with AdvSIMD mandates FEAT_RDM (Arm ARM K.a §D17.2.91). AdvSIMD is universal on Windows-on-ARM. See PR description for full rationale and .NET 10 precedent. All eight feature names have been stable in `is_aarch64_feature_detected!` on Linux/Darwin/BSD since Rust 1.60. --- .../src/detect/os/windows/aarch64.rs | 58 +++++++++++++++++-- library/std_detect/tests/cpu-detection.rs | 8 +++ 2 files changed, 61 insertions(+), 5 deletions(-) diff --git a/library/std_detect/src/detect/os/windows/aarch64.rs b/library/std_detect/src/detect/os/windows/aarch64.rs index 937f9f26eedc..825e16bc2f86 100644 --- a/library/std_detect/src/detect/os/windows/aarch64.rs +++ b/library/std_detect/src/detect/os/windows/aarch64.rs @@ -31,8 +31,14 @@ pub(crate) fn detect_features() -> cache::Initializer { const PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE: u32 = 55; const PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE: u32 = 56; // const PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE: u32 = 57; - // const PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE: u32 = 58; - // const PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE: u32 = 59; + const PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE: u32 = 58; + const PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE: u32 = 59; + const PF_ARM_LSE2_AVAILABLE: u32 = 62; + const PF_ARM_SHA3_INSTRUCTIONS_AVAILABLE: u32 = 64; + const PF_ARM_SHA512_INSTRUCTIONS_AVAILABLE: u32 = 65; + const PF_ARM_V82_I8MM_INSTRUCTIONS_AVAILABLE: u32 = 66; + const PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE: u32 = 67; + const PF_ARM_V86_BF16_INSTRUCTIONS_AVAILABLE: u32 = 68; unsafe extern "system" { fn IsProcessorFeaturePresent(ProcessorFeature: DWORD) -> BOOL; @@ -46,9 +52,11 @@ pub(crate) fn detect_features() -> cache::Initializer { } }; - // Some features may be supported on current CPU, - // but no way to detect it by OS API. - // Also, we require unsafe block for the extern "system" calls. + // Some features may be supported on the current CPU but have no + // detection path through the Win32 API; those report `false`. + // SAFETY: `IsProcessorFeaturePresent` is a Win32 entry point taking a + // `DWORD` by value and returning a `BOOL`. No pointer parameters, + // no out-parameters, no thread-safety constraints. unsafe { enable_feature( Feature::fp, @@ -112,6 +120,46 @@ pub(crate) fn detect_features() -> cache::Initializer { Feature::sve2_sm4, IsProcessorFeaturePresent(PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE) != FALSE, ); + enable_feature( + Feature::f32mm, + IsProcessorFeaturePresent(PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::f64mm, + IsProcessorFeaturePresent(PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::lse2, + IsProcessorFeaturePresent(PF_ARM_LSE2_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::fp16, + IsProcessorFeaturePresent(PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::i8mm, + IsProcessorFeaturePresent(PF_ARM_V82_I8MM_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::bf16, + IsProcessorFeaturePresent(PF_ARM_V86_BF16_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + // stdarch `sha3` is FEAT_SHA3 + FEAT_SHA512 together; Windows + // exposes them as two separate flags. + enable_feature( + Feature::sha3, + IsProcessorFeaturePresent(PF_ARM_SHA3_INSTRUCTIONS_AVAILABLE) != FALSE + && IsProcessorFeaturePresent(PF_ARM_SHA512_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + // No PF_ARM_RDM_* constant exists. Derive FEAT_RDM from FEAT_DotProd: + // DotProd is an optional v8.2-A feature only present on cores that + // implement at least v8.1-A; v8.1-A with AdvSIMD mandates FEAT_RDM + // (Arm ARM K.a §D17.2.91), and AdvSIMD is universal on Windows-on-ARM. + // Same inference shipped in .NET 10 (dotnet/runtime PR 109493). + enable_feature( + Feature::rdm, + IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE) != FALSE, + ); // PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE means aes, sha1, sha2 and // pmull support let crypto = diff --git a/library/std_detect/tests/cpu-detection.rs b/library/std_detect/tests/cpu-detection.rs index 0aad088af7de..f0b276072108 100644 --- a/library/std_detect/tests/cpu-detection.rs +++ b/library/std_detect/tests/cpu-detection.rs @@ -150,14 +150,22 @@ fn aarch64_linux() { fn aarch64_windows() { println!("asimd: {:?}", is_aarch64_feature_detected!("asimd")); println!("fp: {:?}", is_aarch64_feature_detected!("fp")); + println!("fp16: {:?}", is_aarch64_feature_detected!("fp16")); println!("crc: {:?}", is_aarch64_feature_detected!("crc")); println!("lse: {:?}", is_aarch64_feature_detected!("lse")); + println!("lse2: {:?}", is_aarch64_feature_detected!("lse2")); + println!("rdm: {:?}", is_aarch64_feature_detected!("rdm")); println!("dotprod: {:?}", is_aarch64_feature_detected!("dotprod")); + println!("i8mm: {:?}", is_aarch64_feature_detected!("i8mm")); + println!("bf16: {:?}", is_aarch64_feature_detected!("bf16")); println!("jsconv: {:?}", is_aarch64_feature_detected!("jsconv")); println!("rcpc: {:?}", is_aarch64_feature_detected!("rcpc")); println!("aes: {:?}", is_aarch64_feature_detected!("aes")); println!("pmull: {:?}", is_aarch64_feature_detected!("pmull")); println!("sha2: {:?}", is_aarch64_feature_detected!("sha2")); + println!("sha3: {:?}", is_aarch64_feature_detected!("sha3")); + println!("f32mm: {:?}", is_aarch64_feature_detected!("f32mm")); + println!("f64mm: {:?}", is_aarch64_feature_detected!("f64mm")); } #[test]