diff --git a/compiler/rustc_target/src/target_features.rs b/compiler/rustc_target/src/target_features.rs index c3de7e257662..aa272ebd4b77 100644 --- a/compiler/rustc_target/src/target_features.rs +++ b/compiler/rustc_target/src/target_features.rs @@ -392,7 +392,11 @@ pub fn toggle_allowed(&self) -> Result<(), &'static str> { "avx512vpopcntdq", ], ), - ("avx10.2", Unstable(sym::avx10_target_feature), &["avx10.1"]), + ( + "avx10.2", + Unstable(sym::avx10_target_feature), + &["avx10.1", "avxvnni", "avxvnniint8", "avxvnniint16"], + ), ("avx512bf16", Stable, &["avx512bw"]), ("avx512bitalg", Stable, &["avx512bw"]), ("avx512bw", Stable, &["avx512f"]), diff --git a/library/std_detect/src/detect/os/x86.rs b/library/std_detect/src/detect/os/x86.rs index f2205ba07dd4..b24ef6a37ef5 100644 --- a/library/std_detect/src/detect/os/x86.rs +++ b/library/std_detect/src/detect/os/x86.rs @@ -202,6 +202,28 @@ pub(crate) fn detect_features() -> cache::Initializer { // Test `XCR0.APX[19]` with the mask `0b1000_0000_0000_0000_0000 == 0x80000` let os_apx_support = xcr0 & 0x80000 == 0x80000; + if os_amx_support { + enable(extended_features_edx, 24, Feature::amx_tile); + enable(extended_features_edx, 25, Feature::amx_int8); + enable(extended_features_edx, 22, Feature::amx_bf16); + enable(extended_features_eax_leaf_1, 21, Feature::amx_fp16); + enable(extended_features_edx_leaf_1, 8, Feature::amx_complex); + + if max_basic_leaf >= 0x1e { + let CpuidResult { eax: amx_feature_flags_eax, .. } = + __cpuid_count(0x1e_u32, 1); + + enable(amx_feature_flags_eax, 4, Feature::amx_fp8); + enable(amx_feature_flags_eax, 6, Feature::amx_tf32); + enable(amx_feature_flags_eax, 7, Feature::amx_avx512); + enable(amx_feature_flags_eax, 8, Feature::amx_movrs); + } + } + + if os_apx_support { + enable(extended_features_edx_leaf_1, 21, Feature::apxf); + } + // Only if the OS and the CPU support saving/restoring the AVX // registers we enable `xsave` support: if os_avx_support { @@ -236,9 +258,10 @@ pub(crate) fn detect_features() -> cache::Initializer { enable(extended_features_ebx, 5, Feature::avx2); // "Short" versions of AVX512 instructions - enable(extended_features_eax_leaf_1, 4, Feature::avxvnni); - enable(extended_features_eax_leaf_1, 23, Feature::avxifma); - enable(extended_features_edx_leaf_1, 4, Feature::avxvnniint8); + let avxvnni = enable(extended_features_eax_leaf_1, 4, Feature::avxvnni); + let avxvnniint8 = enable(extended_features_eax_leaf_1, 23, Feature::avxifma); + let avxvnniint16 = + enable(extended_features_edx_leaf_1, 4, Feature::avxvnniint8); enable(extended_features_edx_leaf_1, 5, Feature::avxneconvert); enable(extended_features_edx_leaf_1, 10, Feature::avxvnniint16); @@ -269,37 +292,18 @@ pub(crate) fn detect_features() -> cache::Initializer { enable(extended_features_edx, 8, Feature::avx512vp2intersect); enable(extended_features_edx, 23, Feature::avx512fp16); enable(extended_features_eax_leaf_1, 5, Feature::avx512bf16); - } - } - if os_amx_support { - enable(extended_features_edx, 24, Feature::amx_tile); - enable(extended_features_edx, 25, Feature::amx_int8); - enable(extended_features_edx, 22, Feature::amx_bf16); - enable(extended_features_eax_leaf_1, 21, Feature::amx_fp16); - enable(extended_features_edx_leaf_1, 8, Feature::amx_complex); + let avx10_1 = enable(extended_features_edx_leaf_1, 19, Feature::avx10_1); + if avx10_1 { + let CpuidResult { ebx, .. } = __cpuid(0x24); + let avx10_version = ebx & 0xff; - if max_basic_leaf >= 0x1e { - let CpuidResult { eax: amx_feature_flags_eax, .. } = - __cpuid_count(0x1e_u32, 1); - - enable(amx_feature_flags_eax, 4, Feature::amx_fp8); - enable(amx_feature_flags_eax, 6, Feature::amx_tf32); - enable(amx_feature_flags_eax, 7, Feature::amx_avx512); - enable(amx_feature_flags_eax, 8, Feature::amx_movrs); - } - } - - if os_apx_support { - enable(extended_features_edx_leaf_1, 21, Feature::apxf); - } - - let avx10_1 = enable(extended_features_edx_leaf_1, 19, Feature::avx10_1); - if avx10_1 { - let CpuidResult { ebx, .. } = __cpuid(0x24); - let avx10_version = ebx & 0xff; - if avx10_version >= 2 { - value.set(Feature::avx10_2 as u32); + // AVX10.2 supports masked versions of dot-product instructions available in avxvnni etc, + // so it doesn't make sense to have it without the unmasked versions + if avx10_version >= 2 && avxvnni && avxvnniint8 && avxvnniint16 { + value.set(Feature::avx10_2 as u32); + } + } } } }