diff --git a/library/std_detect/src/detect/os/windows/aarch64.rs b/library/std_detect/src/detect/os/windows/aarch64.rs index 937f9f26eedc..825e16bc2f86 100644 --- a/library/std_detect/src/detect/os/windows/aarch64.rs +++ b/library/std_detect/src/detect/os/windows/aarch64.rs @@ -31,8 +31,14 @@ pub(crate) fn detect_features() -> cache::Initializer { const PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE: u32 = 55; const PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE: u32 = 56; // const PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE: u32 = 57; - // const PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE: u32 = 58; - // const PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE: u32 = 59; + const PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE: u32 = 58; + const PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE: u32 = 59; + const PF_ARM_LSE2_AVAILABLE: u32 = 62; + const PF_ARM_SHA3_INSTRUCTIONS_AVAILABLE: u32 = 64; + const PF_ARM_SHA512_INSTRUCTIONS_AVAILABLE: u32 = 65; + const PF_ARM_V82_I8MM_INSTRUCTIONS_AVAILABLE: u32 = 66; + const PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE: u32 = 67; + const PF_ARM_V86_BF16_INSTRUCTIONS_AVAILABLE: u32 = 68; unsafe extern "system" { fn IsProcessorFeaturePresent(ProcessorFeature: DWORD) -> BOOL; @@ -46,9 +52,11 @@ pub(crate) fn detect_features() -> cache::Initializer { } }; - // Some features may be supported on current CPU, - // but no way to detect it by OS API. - // Also, we require unsafe block for the extern "system" calls. + // Some features may be supported on the current CPU but have no + // detection path through the Win32 API; those report `false`. + // SAFETY: `IsProcessorFeaturePresent` is a Win32 entry point taking a + // `DWORD` by value and returning a `BOOL`. No pointer parameters, + // no out-parameters, no thread-safety constraints. unsafe { enable_feature( Feature::fp, @@ -112,6 +120,46 @@ pub(crate) fn detect_features() -> cache::Initializer { Feature::sve2_sm4, IsProcessorFeaturePresent(PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE) != FALSE, ); + enable_feature( + Feature::f32mm, + IsProcessorFeaturePresent(PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::f64mm, + IsProcessorFeaturePresent(PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::lse2, + IsProcessorFeaturePresent(PF_ARM_LSE2_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::fp16, + IsProcessorFeaturePresent(PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::i8mm, + IsProcessorFeaturePresent(PF_ARM_V82_I8MM_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::bf16, + IsProcessorFeaturePresent(PF_ARM_V86_BF16_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + // stdarch `sha3` is FEAT_SHA3 + FEAT_SHA512 together; Windows + // exposes them as two separate flags. + enable_feature( + Feature::sha3, + IsProcessorFeaturePresent(PF_ARM_SHA3_INSTRUCTIONS_AVAILABLE) != FALSE + && IsProcessorFeaturePresent(PF_ARM_SHA512_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + // No PF_ARM_RDM_* constant exists. Derive FEAT_RDM from FEAT_DotProd: + // DotProd is an optional v8.2-A feature only present on cores that + // implement at least v8.1-A; v8.1-A with AdvSIMD mandates FEAT_RDM + // (Arm ARM K.a §D17.2.91), and AdvSIMD is universal on Windows-on-ARM. + // Same inference shipped in .NET 10 (dotnet/runtime PR 109493). + enable_feature( + Feature::rdm, + IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE) != FALSE, + ); // PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE means aes, sha1, sha2 and // pmull support let crypto = diff --git a/library/std_detect/tests/cpu-detection.rs b/library/std_detect/tests/cpu-detection.rs index 0aad088af7de..f0b276072108 100644 --- a/library/std_detect/tests/cpu-detection.rs +++ b/library/std_detect/tests/cpu-detection.rs @@ -150,14 +150,22 @@ fn aarch64_linux() { fn aarch64_windows() { println!("asimd: {:?}", is_aarch64_feature_detected!("asimd")); println!("fp: {:?}", is_aarch64_feature_detected!("fp")); + println!("fp16: {:?}", is_aarch64_feature_detected!("fp16")); println!("crc: {:?}", is_aarch64_feature_detected!("crc")); println!("lse: {:?}", is_aarch64_feature_detected!("lse")); + println!("lse2: {:?}", is_aarch64_feature_detected!("lse2")); + println!("rdm: {:?}", is_aarch64_feature_detected!("rdm")); println!("dotprod: {:?}", is_aarch64_feature_detected!("dotprod")); + println!("i8mm: {:?}", is_aarch64_feature_detected!("i8mm")); + println!("bf16: {:?}", is_aarch64_feature_detected!("bf16")); println!("jsconv: {:?}", is_aarch64_feature_detected!("jsconv")); println!("rcpc: {:?}", is_aarch64_feature_detected!("rcpc")); println!("aes: {:?}", is_aarch64_feature_detected!("aes")); println!("pmull: {:?}", is_aarch64_feature_detected!("pmull")); println!("sha2: {:?}", is_aarch64_feature_detected!("sha2")); + println!("sha3: {:?}", is_aarch64_feature_detected!("sha3")); + println!("f32mm: {:?}", is_aarch64_feature_detected!("f32mm")); + println!("f64mm: {:?}", is_aarch64_feature_detected!("f64mm")); } #[test]