Auto merge of #154008 - JonathanBrouwer:rollup-tfGnRi2, r=JonathanBrouwer

Rollup of 14 pull requests Successful merges: - rust-lang/rust#153972 (stdarch subtree update) - rust-lang/rust#153801 (Add the option to run UI tests with the parallel frontend) - rust-lang/rust#153959 (Fix non-module `parent_module` in stripped cfg diagnostics) - rust-lang/rust#153967 (Tweak wording of failed predicate in inference error) - rust-lang/rust#152968 (Flip "region lattice" in RegionKind doc comment) - rust-lang/rust#153531 (Fix LegacyKeyValueFormat report from docker build: various) - rust-lang/rust#153622 (remove concept of soft-unstable features) - rust-lang/rust#153709 (Fix hypothetical ICE in `variances_of`) - rust-lang/rust#153884 (test `classify-runtime-const` for `f16`) - rust-lang/rust#153894 (Point at unit structs on foreign crates in type errors when they are the pattern of a binding) - rust-lang/rust#153920 (improve `#[track_caller]` invalid ABI error) - rust-lang/rust#153946 (dissolve `tests/ui/cross`) - rust-lang/rust#153965 (Fix minor kasan bugs) - rust-lang/rust#153991 (Small report_cycle refactor)
2026-04-27 18:57:42 +03:00 · 2026-03-17 16:52:08 +00:00
parent b711f95f86 880d85f409
commit 85e19b8ce8
231 changed files with 1646 additions and 908 deletions
@@ -1487,6 +1487,15 @@ fn visit_foreign_item(&mut self, fi: &'a ForeignItem) {
                    ident,
                    sig,
                );
+
+                if let Some(attr) = attr::find_by_name(fi.attrs(), sym::track_caller)
+                    && self.extern_mod_abi != Some(ExternAbi::Rust)
+                {
+                    self.dcx().emit_err(errors::RequiresRustAbi {
+                        track_caller_span: attr.span,
+                        extern_abi_span: self.current_extern_span(),
+                    });
+                }
            }
            ForeignItemKind::TyAlias(box TyAlias {
                defaultness,
@@ -1672,10 +1681,19 @@ fn visit_fn(&mut self, fk: FnKind<'a>, attrs: &AttrVec, span: Span, id: NodeId)
        }

        if let FnKind::Fn(ctxt, _, fun) = fk
-            && let Extern::Explicit(str_lit, _) = fun.sig.header.ext
+            && let Extern::Explicit(str_lit, extern_abi_span) = fun.sig.header.ext
            && let Ok(abi) = ExternAbi::from_str(str_lit.symbol.as_str())
        {
            self.check_extern_fn_signature(abi, ctxt, &fun.ident, &fun.sig);
+
+            if let Some(attr) = attr::find_by_name(attrs, sym::track_caller)
+                && abi != ExternAbi::Rust
+            {
+                self.dcx().emit_err(errors::RequiresRustAbi {
+                    track_caller_span: attr.span,
+                    extern_abi_span,
+                });
+            }
        }

        self.check_c_variadic_type(fk, attrs);
@@ -1140,3 +1140,13 @@ pub(crate) struct ScalableVectorBadArch {
    #[primary_span]
    pub span: Span,
 }
+
+#[derive(Diagnostic)]
+#[diag("`#[track_caller]` can only be used with the Rust ABI", code = E0737)]
+pub(crate) struct RequiresRustAbi {
+    #[primary_span]
+    #[label("using `#[track_caller]` here")]
+    pub track_caller_span: Span,
+    #[label("not using the Rust ABI because of this")]
+    pub extern_abi_span: Span,
+}
@@ -671,6 +671,7 @@ fn convert(cx: &mut AcceptContext<'_, '_, S>, args: &ArgParser) -> Option<Attrib
                        item.path().span(),
                        &[
                            sym::address,
+                            sym::kernel_address,
                            sym::cfi,
                            sym::kcfi,
                            sym::memory,
@@ -376,7 +376,6 @@ pub(crate) fn parse_unstability<S: Stage>(
    let mut reason = None;
    let mut issue = None;
    let mut issue_num = None;
-    let mut is_soft = false;
    let mut implied_by = None;
    let mut old_name = None;

@@ -423,12 +422,6 @@ pub(crate) fn parse_unstability<S: Stage>(
                    },
                };
            }
-            Some(sym::soft) => {
-                if let Err(span) = args.no_args() {
-                    cx.emit_err(session_diagnostics::SoftNoArgs { span });
-                }
-                is_soft = true;
-            }
            Some(sym::implied_by) => {
                insert_value_into_option_or_error(cx, &param, &mut implied_by, word.unwrap())?
            }
@@ -438,14 +431,7 @@ pub(crate) fn parse_unstability<S: Stage>(
            _ => {
                cx.expected_specific_argument(
                    param.span(),
-                    &[
-                        sym::feature,
-                        sym::reason,
-                        sym::issue,
-                        sym::soft,
-                        sym::implied_by,
-                        sym::old_name,
-                    ],
+                    &[sym::feature, sym::reason, sym::issue, sym::implied_by, sym::old_name],
                );
                return None;
            }
@@ -468,7 +454,6 @@ pub(crate) fn parse_unstability<S: Stage>(
            let level = StabilityLevel::Unstable {
                reason: UnstableReason::from_opt_reason(reason),
                issue: issue_num,
-                is_soft,
                implied_by,
                old_name,
            };
@@ -374,13 +374,6 @@ pub(crate) struct InvalidSince {
    pub span: Span,
 }

-#[derive(Diagnostic)]
-#[diag("`soft` should not have any arguments")]
-pub(crate) struct SoftNoArgs {
-    #[primary_span]
-    pub span: Span,
-}
-
 #[derive(Diagnostic)]
 #[diag("unknown version literal format, assuming it refers to a future version")]
 pub(crate) struct UnknownVersionLiteral {
@@ -150,7 +150,8 @@ fn process_builtin_attrs(
                    && let Some(fn_sig) = try_fn_sig(tcx, did, *attr_span)
                    && fn_sig.skip_binder().abi() != ExternAbi::Rust
                {
-                    tcx.dcx().emit_err(errors::RequiresRustAbi { span: *attr_span });
+                    // This error is already reported in `rustc_ast_passes/src/ast_validation.rs`.
+                    tcx.dcx().delayed_bug("`#[track_caller]` requires the Rust ABI");
                }
                if is_closure
                    && !tcx.features().closure_track_caller()
@@ -112,13 +112,6 @@ pub(crate) struct NoSavedObjectFile<'a> {
    pub cgu_name: &'a str,
 }

-#[derive(Diagnostic)]
-#[diag("`#[track_caller]` requires Rust ABI", code = E0737)]
-pub(crate) struct RequiresRustAbi {
-    #[primary_span]
-    pub span: Span,
-}
-
 #[derive(Diagnostic)]
 #[diag("unable to copy {$source_file} to {$output_path}: {$error}")]
 pub(crate) struct CopyPathBuf {
@@ -138,6 +138,14 @@ pub fn unwrap_tag(self) -> Vec<CodeSuggestion> {
            Suggestions::Disabled => Vec::new(),
        }
    }
+
+    pub fn len(&self) -> usize {
+        match self {
+            Suggestions::Enabled(suggestions) => suggestions.len(),
+            Suggestions::Sealed(suggestions) => suggestions.len(),
+            Suggestions::Disabled => 0,
+        }
+    }
 }

 impl Default for Suggestions {
@@ -277,15 +277,15 @@ fn default() -> Self {
 }

 #[derive(Debug, Clone, Encodable, Decodable, HashStable_Generic)]
-pub struct StrippedCfgItem<ModId = DefId> {
-    pub parent_module: ModId,
+pub struct StrippedCfgItem<ScopeId = DefId> {
+    pub parent_scope: ScopeId,
    pub ident: Ident,
    pub cfg: (CfgEntry, Span),
 }

-impl<ModId> StrippedCfgItem<ModId> {
-    pub fn map_mod_id<New>(self, f: impl FnOnce(ModId) -> New) -> StrippedCfgItem<New> {
-        StrippedCfgItem { parent_module: f(self.parent_module), ident: self.ident, cfg: self.cfg }
+impl<ScopeId> StrippedCfgItem<ScopeId> {
+    pub fn map_scope_id<New>(self, f: impl FnOnce(ScopeId) -> New) -> StrippedCfgItem<New> {
+        StrippedCfgItem { parent_scope: f(self.parent_scope), ident: self.ident, cfg: self.cfg }
    }
 }

@@ -112,7 +112,6 @@ pub enum StabilityLevel {
        reason: UnstableReason,
        /// Relevant `rust-lang/rust` issue.
        issue: Option<NonZero<u32>>,
-        is_soft: bool,
        /// If part of a feature is stabilized and a new feature is added for the remaining parts,
        /// then the `implied_by` attribute is used to indicate which now-stable feature previously
        /// contained an item.
@@ -1636,69 +1636,77 @@ fn emit_bad_pat_path(
            span_bug!(pat_span, "unexpected resolution for path pattern: {resolved_pat:?}");
        };

-        if let Some(span) = self.tcx.hir_res_span(pat_res) {
+        let span = match (self.tcx.hir_res_span(pat_res), res.opt_def_id()) {
+            (Some(span), _) => span,
+            (None, Some(def_id)) => self.tcx.def_span(def_id),
+            (None, None) => {
+                e.emit();
+                return;
+            }
+        };
+        if let [hir::PathSegment { ident, args: None, .. }] = segments
+            && e.suggestions.len() == 0
+        {
            e.span_label(span, format!("{} defined here", res.descr()));
-            if let [hir::PathSegment { ident, .. }] = segments {
-                e.span_label(
-                    pat_span,
-                    format!(
-                        "`{}` is interpreted as {} {}, not a new binding",
-                        ident,
-                        res.article(),
-                        res.descr(),
-                    ),
-                );
-                match self.tcx.parent_hir_node(hir_id) {
-                    hir::Node::PatField(..) => {
+            e.span_label(
+                pat_span,
+                format!(
+                    "`{}` is interpreted as {} {}, not a new binding",
+                    ident,
+                    res.article(),
+                    res.descr(),
+                ),
+            );
+            match self.tcx.parent_hir_node(hir_id) {
+                hir::Node::PatField(..) => {
+                    e.span_suggestion_verbose(
+                        ident.span.shrink_to_hi(),
+                        "bind the struct field to a different name instead",
+                        format!(": other_{}", ident.as_str().to_lowercase()),
+                        Applicability::HasPlaceholders,
+                    );
+                }
+                _ => {
+                    let (type_def_id, item_def_id) = match resolved_pat.ty.kind() {
+                        ty::Adt(def, _) => match res {
+                            Res::Def(DefKind::Const { .. }, def_id) => {
+                                (Some(def.did()), Some(def_id))
+                            }
+                            _ => (None, None),
+                        },
+                        _ => (None, None),
+                    };
+
+                    let is_range = matches!(
+                        type_def_id.and_then(|id| self.tcx.as_lang_item(id)),
+                        Some(
+                            LangItem::Range
+                                | LangItem::RangeFrom
+                                | LangItem::RangeTo
+                                | LangItem::RangeFull
+                                | LangItem::RangeInclusiveStruct
+                                | LangItem::RangeToInclusive,
+                        )
+                    );
+                    if is_range {
+                        if !self.maybe_suggest_range_literal(&mut e, item_def_id, *ident) {
+                            let msg = "constants only support matching by type, \
+                                if you meant to match against a range of values, \
+                                consider using a range pattern like `min ..= max` in the match block";
+                            e.note(msg);
+                        }
+                    } else {
+                        let msg = "introduce a new binding instead";
+                        let sugg = format!("other_{}", ident.as_str().to_lowercase());
                        e.span_suggestion_verbose(
-                            ident.span.shrink_to_hi(),
-                            "bind the struct field to a different name instead",
-                            format!(": other_{}", ident.as_str().to_lowercase()),
+                            ident.span,
+                            msg,
+                            sugg,
                            Applicability::HasPlaceholders,
                        );
                    }
-                    _ => {
-                        let (type_def_id, item_def_id) = match resolved_pat.ty.kind() {
-                            ty::Adt(def, _) => match res {
-                                Res::Def(DefKind::Const { .. }, def_id) => {
-                                    (Some(def.did()), Some(def_id))
-                                }
-                                _ => (None, None),
-                            },
-                            _ => (None, None),
-                        };
-
-                        let is_range = matches!(
-                            type_def_id.and_then(|id| self.tcx.as_lang_item(id)),
-                            Some(
-                                LangItem::Range
-                                    | LangItem::RangeFrom
-                                    | LangItem::RangeTo
-                                    | LangItem::RangeFull
-                                    | LangItem::RangeInclusiveStruct
-                                    | LangItem::RangeToInclusive,
-                            )
-                        );
-                        if is_range {
-                            if !self.maybe_suggest_range_literal(&mut e, item_def_id, *ident) {
-                                let msg = "constants only support matching by type, \
-                                    if you meant to match against a range of values, \
-                                    consider using a range pattern like `min ..= max` in the match block";
-                                e.note(msg);
-                            }
-                        } else {
-                            let msg = "introduce a new binding instead";
-                            let sugg = format!("other_{}", ident.as_str().to_lowercase());
-                            e.span_suggestion(
-                                ident.span,
-                                msg,
-                                sugg,
-                                Applicability::HasPlaceholders,
-                            );
-                        }
-                    }
-                };
-            }
+                }
+            };
        }
        e.emit();
    }
@@ -642,6 +642,7 @@ macro_rules! add_lint_group {
         see <https://github.com/rust-lang/rust/issues/40107> for more information",
    );
    store.register_removed("wasm_c_abi", "the wasm C ABI has been fixed");
+    store.register_removed("soft_unstable", "the general soft-unstable mechanism has been removed");
 }

 fn register_internals(store: &mut LintStore) {
@@ -106,7 +106,6 @@
        SEMICOLON_IN_EXPRESSIONS_FROM_MACROS,
        SHADOWING_SUPERTRAIT_ITEMS,
        SINGLE_USE_LIFETIMES,
-        SOFT_UNSTABLE,
        STABLE_FEATURES,
        TAIL_EXPR_DROP_ORDER,
        TEST_UNSTABLE_LINT,
@@ -2345,22 +2344,6 @@
    };
 }

-declare_lint! {
-    /// The `soft_unstable` lint detects unstable features that were unintentionally allowed on
-    /// stable. This is a [future-incompatible] lint to transition this to a hard error in the
-    /// future. See [issue #64266] for more details.
-    ///
-    /// [issue #64266]: https://github.com/rust-lang/rust/issues/64266
-    /// [future-incompatible]: ../index.md#future-incompatible-lints
-    pub SOFT_UNSTABLE,
-    Deny,
-    "a feature gate that doesn't break dependent crates",
-    @future_incompatible = FutureIncompatibleInfo {
-        reason: fcw!(FutureReleaseError #64266),
-        report_in_deps: true,
-    };
-}
-
 declare_lint! {
    /// The `inline_no_sanitize` lint detects incompatible use of
    /// [`#[inline(always)]`][inline] and [`#[sanitize(xyz = "off")]`][sanitize].
@@ -1223,7 +1223,7 @@ fn get_stripped_cfg_items<'tcx>(
            .root
            .stripped_cfg_items
            .decode((self, tcx))
-            .map(|item| item.map_mod_id(|index| DefId { krate: cnum, index }));
+            .map(|item| item.map_scope_id(|index| DefId { krate: cnum, index }));
        tcx.arena.alloc_from_iter(item_names)
    }

@@ -2145,7 +2145,7 @@ fn encode_stripped_cfg_items(&mut self) -> LazyArray<StrippedCfgItem<DefIndex>>
            self.tcx
                .stripped_cfg_items(LOCAL_CRATE)
                .into_iter()
-                .map(|item| item.clone().map_mod_id(|def_id| def_id.index)),
+                .map(|item| item.clone().map_scope_id(|def_id| def_id.index)),
        )
    }

@@ -11,7 +11,7 @@
 use rustc_hir::{self as hir, ConstStability, DefaultBodyStability, HirId, Stability};
 use rustc_macros::{Decodable, Encodable, HashStable, Subdiagnostic};
 use rustc_session::Session;
-use rustc_session::lint::builtin::{DEPRECATED, DEPRECATED_IN_FUTURE, SOFT_UNSTABLE};
+use rustc_session::lint::builtin::{DEPRECATED, DEPRECATED_IN_FUTURE};
 use rustc_session::lint::{BuiltinLintDiag, DeprecatedSinceKind, Level, Lint};
 use rustc_session::parse::feature_err_issue;
 use rustc_span::{Span, Symbol, sym};
@@ -68,9 +68,7 @@ pub fn report_unstable(
    reason: Option<Symbol>,
    issue: Option<NonZero<u32>>,
    suggestion: Option<(Span, String, String, Applicability)>,
-    is_soft: bool,
    span: Span,
-    soft_handler: impl FnOnce(&'static Lint, Span, String),
    kind: UnstableKind,
 ) {
    let qual = match kind {
@@ -83,18 +81,14 @@ pub fn report_unstable(
        None => format!("use of unstable{qual} library feature `{feature}`"),
    };

-    if is_soft {
-        soft_handler(SOFT_UNSTABLE, span, msg)
-    } else {
-        let mut err = feature_err_issue(sess, feature, span, GateIssue::Library(issue), msg);
-        if let Some((inner_types, msg, sugg, applicability)) = suggestion {
-            err.span_suggestion(inner_types, msg, sugg, applicability);
-        }
-        if let UnstableKind::Const(kw) = kind {
-            err.span_label(kw, "trait is not stable as const yet");
-        }
-        err.emit();
+    let mut err = feature_err_issue(sess, feature, span, GateIssue::Library(issue), msg);
+    if let Some((inner_types, msg, sugg, applicability)) = suggestion {
+        err.span_suggestion(inner_types, msg, sugg, applicability);
    }
+    if let UnstableKind::Const(kw) = kind {
+        err.span_label(kw, "trait is not stable as const yet");
+    }
+    err.emit();
 }

 fn deprecation_lint(is_in_effect: bool) -> &'static Lint {
@@ -266,7 +260,6 @@ pub enum EvalResult {
        reason: Option<Symbol>,
        issue: Option<NonZero<u32>>,
        suggestion: Option<(Span, String, String, Applicability)>,
-        is_soft: bool,
    },
    /// The item does not have the `#[stable]` or `#[unstable]` marker assigned.
    Unmarked,
@@ -386,7 +379,7 @@ pub fn eval_stability_allow_unstable(

        match stability {
            Some(Stability {
-                level: hir::StabilityLevel::Unstable { reason, issue, is_soft, implied_by, .. },
+                level: hir::StabilityLevel::Unstable { reason, issue, implied_by, .. },
                feature,
                ..
            }) => {
@@ -428,13 +421,7 @@ pub fn eval_stability_allow_unstable(
                }

                let suggestion = suggestion_for_allocator_api(self, def_id, span, feature);
-                EvalResult::Deny {
-                    feature,
-                    reason: reason.to_opt_reason(),
-                    issue,
-                    suggestion,
-                    is_soft,
-                }
+                EvalResult::Deny { feature, reason: reason.to_opt_reason(), issue, suggestion }
            }
            Some(_) => {
                // Stable APIs are always ok to call and deprecated APIs are
@@ -469,7 +456,7 @@ pub fn eval_default_body_stability(self, def_id: DefId, span: Span) -> EvalResul

        match stability {
            Some(DefaultBodyStability {
-                level: hir::StabilityLevel::Unstable { reason, issue, is_soft, .. },
+                level: hir::StabilityLevel::Unstable { reason, issue, .. },
                feature,
            }) => {
                if span.allows_unstable(feature) {
@@ -485,7 +472,6 @@ pub fn eval_default_body_stability(self, def_id: DefId, span: Span) -> EvalResul
                    reason: reason.to_opt_reason(),
                    issue,
                    suggestion: None,
-                    is_soft,
                }
            }
            Some(_) => {
@@ -563,30 +549,18 @@ pub fn check_optional_stability(
        allow_unstable: AllowUnstable,
        unmarked: impl FnOnce(Span, DefId),
    ) -> bool {
-        let soft_handler = |lint, span, msg: String| {
-            self.emit_node_span_lint(
-                lint,
-                id.unwrap_or(hir::CRATE_HIR_ID),
-                span,
-                rustc_errors::DiagDecorator(|lint| {
-                    lint.primary_message(msg);
-                }),
-            );
-        };
        let eval_result =
            self.eval_stability_allow_unstable(def_id, id, span, method_span, allow_unstable);
        let is_allowed = matches!(eval_result, EvalResult::Allow);
        match eval_result {
            EvalResult::Allow => {}
-            EvalResult::Deny { feature, reason, issue, suggestion, is_soft } => report_unstable(
+            EvalResult::Deny { feature, reason, issue, suggestion } => report_unstable(
                self.sess,
                feature,
                reason,
                issue,
                suggestion,
-                is_soft,
                span,
-                soft_handler,
                UnstableKind::Regular,
            ),
            EvalResult::Unmarked => unmarked(span, def_id),
@@ -623,12 +597,10 @@ pub fn check_const_stability(self, def_id: DefId, span: Span, const_kw_span: Spa

        match stability {
            Some(ConstStability {
-                level: hir::StabilityLevel::Unstable { reason, issue, is_soft, implied_by, .. },
+                level: hir::StabilityLevel::Unstable { reason, issue, implied_by, .. },
                feature,
                ..
            }) => {
-                assert!(!is_soft);
-
                if span.allows_unstable(feature) {
                    debug!("body stability: skipping span={:?} since it is internal", span);
                    return;
@@ -652,9 +624,7 @@ pub fn check_const_stability(self, def_id: DefId, span: Span, const_kw_span: Spa
                    reason.to_opt_reason(),
                    issue,
                    None,
-                    false,
                    span,
-                    |_, _, _| {},
                    UnstableKind::Const(const_kw_span),
                );
            }
@@ -131,7 +131,6 @@ fn inherit_stability(def_kind: DefKind) -> bool {
    level: StabilityLevel::Unstable {
        reason: UnstableReason::Default,
        issue: NonZero::new(27812),
-        is_soft: false,
        implied_by: None,
        old_name: None,
    },
@@ -8,12 +8,12 @@
 use rustc_errors::{Applicability, Diag, MultiSpan, pluralize, struct_span_code_err};
 use rustc_hir as hir;
 use rustc_hir::def::{DefKind, Res};
+use rustc_middle::bug;
 use rustc_middle::queries::{QueryVTables, TaggedQueryKey};
 use rustc_middle::query::CycleError;
 use rustc_middle::query::erase::erase_val;
 use rustc_middle::ty::layout::LayoutError;
 use rustc_middle::ty::{self, Ty, TyCtxt};
-use rustc_middle::{bug, span_bug};
 use rustc_span::def_id::{DefId, LocalDefId};
 use rustc_span::{ErrorGuaranteed, Span};

@@ -31,9 +31,9 @@ pub(crate) fn specialize_query_vtables<'tcx>(vtables: &mut QueryVTables<'tcx>) {
    vtables.check_representability_adt_ty.value_from_cycle_error =
        |tcx, _, cycle, _err| check_representability(tcx, cycle);

-    vtables.variances_of.value_from_cycle_error = |tcx, _, cycle, err| {
+    vtables.variances_of.value_from_cycle_error = |tcx, key, _, err| {
        let _guar = err.delay_as_bug();
-        erase_val(variances_of(tcx, cycle))
+        erase_val(variances_of(tcx, key))
    };

    vtables.layout_of.value_from_cycle_error = |tcx, _, cycle, err| {
@@ -103,26 +103,9 @@ fn check_representability<'tcx>(tcx: TyCtxt<'tcx>, cycle_error: CycleError<'tcx>
    guar.raise_fatal()
 }

-fn variances_of<'tcx>(tcx: TyCtxt<'tcx>, cycle_error: CycleError<'tcx>) -> &'tcx [ty::Variance] {
-    search_for_cycle_permutation(
-        &cycle_error.cycle,
-        |cycle| {
-            if let Some(frame) = cycle.get(0)
-                && let TaggedQueryKey::variances_of(def_id) = frame.node.tagged_key
-            {
-                let n = tcx.generics_of(def_id).own_params.len();
-                ControlFlow::Break(tcx.arena.alloc_from_iter(iter::repeat_n(ty::Bivariant, n)))
-            } else {
-                ControlFlow::Continue(())
-            }
-        },
-        || {
-            span_bug!(
-                cycle_error.usage.as_ref().unwrap().span,
-                "only `variances_of` returns `&[ty::Variance]`"
-            )
-        },
-    )
+fn variances_of<'tcx>(tcx: TyCtxt<'tcx>, def_id: DefId) -> &'tcx [ty::Variance] {
+    let n = tcx.generics_of(def_id).own_params.len();
+    tcx.arena.alloc_from_iter(iter::repeat_n(ty::Bivariant, n))
 }

 // Take a cycle of `Q` and try `try_cycle` on every permutation, falling back to `otherwise`.
@@ -475,13 +475,10 @@ pub(crate) fn report_cycle<'tcx>(
        cycle_stack.push(crate::error::CycleStack { span, desc: node.tagged_key.description(tcx) });
    }

-    let mut cycle_usage = None;
-    if let Some(usage) = usage {
-        cycle_usage = Some(crate::error::CycleUsage {
-            span: usage.node.tagged_key.default_span(tcx, usage.span),
-            usage: usage.node.tagged_key.description(tcx),
-        });
-    }
+    let cycle_usage = usage.as_ref().map(|usage| crate::error::CycleUsage {
+        span: usage.node.tagged_key.default_span(tcx, usage.span),
+        usage: usage.node.tagged_key.description(tcx),
+    });

    let alias = if stack
        .iter()
@@ -3083,12 +3083,8 @@ pub(crate) fn find_cfg_stripped(&self, err: &mut Diag<'_>, segment: &Symbol, mod
                .stripped_cfg_items
                .iter()
                .filter_map(|item| {
-                    let parent_module = self.opt_local_def_id(item.parent_module)?.to_def_id();
-                    Some(StrippedCfgItem {
-                        parent_module,
-                        ident: item.ident,
-                        cfg: item.cfg.clone(),
-                    })
+                    let parent_scope = self.opt_local_def_id(item.parent_scope)?.to_def_id();
+                    Some(StrippedCfgItem { parent_scope, ident: item.ident, cfg: item.cfg.clone() })
                })
                .collect::<Vec<_>>();
            local_items.as_slice()
@@ -3096,11 +3092,13 @@ pub(crate) fn find_cfg_stripped(&self, err: &mut Diag<'_>, segment: &Symbol, mod
            self.tcx.stripped_cfg_items(module.krate)
        };

-        for &StrippedCfgItem { parent_module, ident, ref cfg } in symbols {
+        for &StrippedCfgItem { parent_scope, ident, ref cfg } in symbols {
            if ident.name != *segment {
                continue;
            }

+            let parent_module = self.get_nearest_non_block_module(parent_scope).def_id();
+
            fn comes_from_same_module_for_glob(
                r: &Resolver<'_, '_>,
                parent_module: DefId,
@@ -1,7 +1,7 @@
 use rustc_errors::codes::*;
 use rustc_errors::formatting::DiagMessageAddArg;
 use rustc_errors::{
-    Applicability, Diag, DiagCtxtHandle, DiagMessage, Diagnostic, ElidedLifetimeInPathSubdiag,
+    Applicability, Diag, DiagCtxtHandle, Diagnostic, ElidedLifetimeInPathSubdiag,
    EmissionGuarantee, IntoDiagArg, Level, MultiSpan, Subdiagnostic, msg,
 };
 use rustc_macros::{Diagnostic, Subdiagnostic};
@@ -1453,17 +1453,6 @@ pub(crate) struct MacroRuleNeverUsed {
    pub name: Symbol,
 }

-pub(crate) struct UnstableFeature {
-    pub msg: DiagMessage,
-}
-
-impl<'a> Diagnostic<'a, ()> for UnstableFeature {
-    fn into_diag(self, dcx: DiagCtxtHandle<'a>, level: Level) -> Diag<'a, ()> {
-        let Self { msg } = self;
-        Diag::new(dcx, level, msg)
-    }
-}
-
 #[derive(Diagnostic)]
 #[diag("`extern crate` is not idiomatic in the new edition")]
 pub(crate) struct ExternCrateNotIdiomatic {
@@ -1817,9 +1817,9 @@ pub fn into_outputs(self) -> ResolverOutputs<'tcx> {
            .stripped_cfg_items
            .into_iter()
            .filter_map(|item| {
-                let parent_module =
-                    self.node_id_to_def_id.get(&item.parent_module)?.key().to_def_id();
-                Some(StrippedCfgItem { parent_module, ident: item.ident, cfg: item.cfg })
+                let parent_scope =
+                    self.node_id_to_def_id.get(&item.parent_scope)?.key().to_def_id();
+                Some(StrippedCfgItem { parent_scope, ident: item.ident, cfg: item.cfg })
            })
            .collect();

@@ -512,7 +512,7 @@ fn append_stripped_cfg_item(
        cfg_span: Span,
    ) {
        self.stripped_cfg_items.push(StrippedCfgItem {
-            parent_module: parent_node,
+            parent_scope: parent_node,
            ident,
            cfg: (cfg, cfg_span),
        });
@@ -1064,8 +1064,7 @@ fn check_stability_and_deprecation(
    ) {
        let span = path.span;
        if let Some(stability) = &ext.stability
-            && let StabilityLevel::Unstable { reason, issue, is_soft, implied_by, .. } =
-                stability.level
+            && let StabilityLevel::Unstable { reason, issue, implied_by, .. } = stability.level
        {
            let feature = stability.feature;

@@ -1073,25 +1072,13 @@ fn check_stability_and_deprecation(
                |feature| self.tcx.features().enabled(feature) || span.allows_unstable(feature);
            let allowed_by_implication = implied_by.is_some_and(|feature| is_allowed(feature));
            if !is_allowed(feature) && !allowed_by_implication {
-                let lint_buffer = &mut self.lint_buffer;
-                let soft_handler = |lint, span, msg: String| {
-                    lint_buffer.buffer_lint(
-                        lint,
-                        node_id,
-                        span,
-                        // FIXME make this translatable
-                        errors::UnstableFeature { msg: msg.into() },
-                    )
-                };
                stability::report_unstable(
                    self.tcx.sess,
                    feature,
                    reason.to_opt_reason(),
                    issue,
                    None,
-                    is_soft,
                    span,
-                    soft_handler,
                    stability::UnstableKind::Regular,
                );
            }
@@ -1915,7 +1915,6 @@
        slice_len_fn,
        slice_patterns,
        slicing_syntax,
-        soft,
        sparc,
        sparc64,
        sparc_target_feature,
@@ -11,6 +11,7 @@
 use rustc_infer::traits::{
    Obligation, ObligationCause, ObligationCauseCode, PolyTraitObligation, PredicateObligation,
 };
+use rustc_middle::ty::print::PrintPolyTraitPredicateExt;
 use rustc_middle::ty::{self, Ty, TyCtxt, TypeVisitable as _, TypeVisitableExt as _};
 use rustc_session::parse::feature_err_unstable_feature_bound;
 use rustc_span::{DUMMY_SP, ErrorGuaranteed, Span};
@@ -306,8 +307,18 @@ pub(super) fn maybe_report_ambiguity(
                        err.cancel();
                        return e;
                    }
-                    let pred = self.tcx.short_string(predicate, &mut err.long_ty_path());
-                    err.note(format!("cannot satisfy `{pred}`"));
+                    if let Some(clause) = predicate.as_trait_clause()
+                        && let ty::Infer(_) = clause.self_ty().skip_binder().kind()
+                    {
+                        let tr = self.tcx.short_string(
+                            clause.print_modifiers_and_trait_path(),
+                            &mut err.long_ty_path(),
+                        );
+                        err.note(format!("the type must implement `{tr}`"));
+                    } else {
+                        let pred = self.tcx.short_string(predicate, &mut err.long_ty_path());
+                        err.note(format!("cannot satisfy `{pred}`"));
+                    }
                    let impl_candidates =
                        self.find_similar_impl_candidates(predicate.as_trait_clause().unwrap());
                    if impl_candidates.len() < 40 {
@@ -34,26 +34,26 @@ pub struct RegionVid {}
 /// In general, the region lattice looks like
 ///
 /// ```text
-/// static ----------+-----...------+       (greatest)
+/// empty(Un) --------                      (smallest)
+/// |                 \
+/// ...                \
+/// |                   \
+/// empty(U1) --         \
+/// |           \         placeholder(Un)
+/// |            \                  |
+/// empty(root)   placeholder(U1)   |
+/// |                |              |
+/// |                |              |
 /// |                |              |
 /// param regions    |              |
 /// |                |              |
-/// |                |              |
-/// |                |              |
-/// empty(root)   placeholder(U1)   |
-/// |            /                  |
-/// |           /         placeholder(Un)
-/// empty(U1) --         /
-/// |                   /
-/// ...                /
-/// |                 /
-/// empty(Un) --------                      (smallest)
+/// static ----------+-----...------+       (greatest)
 /// ```
 ///
-/// Early-bound/free regions are the named lifetimes in scope from the
-/// function declaration. They have relationships to one another
-/// determined based on the declared relationships from the
-/// function.
+/// Lifetimes in scope from a function declaration are represented via
+/// [`RegionKind::ReEarlyParam`]/[`RegionKind::ReLateParam`]. They
+/// have relationships to one another and `'static` based on the
+/// declared relationships from the function.
 ///
 /// Note that inference variables and bound regions are not included
 /// in this diagram. In the case of inference variables, they should
@@ -62,29 +62,36 @@ pub struct RegionVid {}
 /// include -- the diagram indicates the relationship between free
 /// regions.
 ///
+/// You can read more about the distinction between early and late bound
+/// parameters in the rustc dev guide: [Early vs Late bound parameters].
+///
+/// A note on subtyping: If we assume that references take their region
+/// covariantly, and use that to define the subtyping relationship of regions,
+/// it may be somewhat surprising that `'empty` is Top and `'static` is Bottom,
+/// and that "`'a` is a subtype of `'b`" is defined as "`'a` is bigger than
+/// `'b`" - good to keep in mind.
+///
 /// ## Inference variables
 ///
 /// During region inference, we sometimes create inference variables,
-/// represented as `ReVar`. These will be inferred by the code in
-/// `infer::lexical_region_resolve` to some free region from the
-/// lattice above (the minimal region that meets the
+/// represented as [`RegionKind::ReVar`]. These will be inferred by
+/// the code in `infer::lexical_region_resolve` to some free region
+/// from the lattice above (the minimal region that meets the
 /// constraints).
 ///
 /// During NLL checking, where regions are defined differently, we
-/// also use `ReVar` -- in that case, the index is used to index into
-/// the NLL region checker's data structures. The variable may in fact
-/// represent either a free region or an inference variable, in that
-/// case.
+/// also use [`RegionKind::ReVar`] -- in that case, the index is used
+/// to index into the NLL region checker's data structures. The
+/// variable may in fact represent either a free region or an
+/// inference variable, in that case.
 ///
 /// ## Bound Regions
 ///
 /// These are regions that are stored behind a binder and must be instantiated
-/// with some concrete region before being used. There are two kind of
-/// bound regions: early-bound, which are bound in an item's `Generics`,
-/// and are instantiated by an `GenericArgs`, and late-bound, which are part of
-/// higher-ranked types (e.g., `for<'a> fn(&'a ())`), and are instantiated by
-/// the likes of `liberate_late_bound_regions`. The distinction exists
-/// because higher-ranked lifetimes aren't supported in all places. See [1][2].
+/// with some concrete region before being used. A type can be wrapped in a
+/// `Binder`, which introduces new type/const/lifetime variables (e.g., `for<'a>
+/// fn(&'a ())`). These parameters are referred to via [`RegionKind::ReBound`].
+/// You can instantiate them by the likes of `liberate_late_bound_regions`.
 ///
 /// Unlike `Param`s, bound regions are not supposed to exist "in the wild"
 /// outside their binder, e.g., in types passed to type inference, and
@@ -123,8 +130,7 @@ pub struct RegionVid {}
 /// happen, you can use `leak_check`. This is more clearly explained
 /// by the [rustc dev guide].
 ///
-/// [1]: https://smallcultfollowing.com/babysteps/blog/2013/10/29/intermingled-parameter-lists/
-/// [2]: https://smallcultfollowing.com/babysteps/blog/2013/11/04/intermingled-parameter-lists/
+/// [Early vs Late bound parameters]: https://rustc-dev-guide.rust-lang.org/early-late-parameters.html
 /// [rustc dev guide]: https://rustc-dev-guide.rust-lang.org/traits/hrtb.html
 #[derive_where(Clone, Copy, Hash, PartialEq; I: Interner)]
 #[derive(GenericTypeVisitable)]
@@ -160,7 +166,7 @@ pub enum RegionKind<I: Interner> {
    /// more info about early and late bound lifetime parameters.
    ReLateParam(I::LateParamRegion),

-    /// Static data that has an "infinite" lifetime. Top in the region lattice.
+    /// Static data that has an "infinite" lifetime. Bottom in the region lattice.
    ReStatic,

    /// A region variable. Should not exist outside of type inference.
@@ -182,6 +182,7 @@
 #![feature(hexagon_target_feature)]
 #![feature(loongarch_target_feature)]
 #![feature(mips_target_feature)]
+#![feature(movrs_target_feature)]
 #![feature(nvptx_target_feature)]
 #![feature(powerpc_target_feature)]
 #![feature(riscv_target_feature)]
@@ -8,7 +8,7 @@ jobs:
    name: Check Style
    runs-on: ubuntu-latest
    steps:
-    - uses: actions/checkout@v4
+    - uses: actions/checkout@v6
    - name: Install Rust
      run: rustup update nightly --no-self-update && rustup default nightly
    - run: ci/style.sh
@@ -18,7 +18,7 @@ jobs:
    needs: [style]
    runs-on: ubuntu-latest
    steps:
-    - uses: actions/checkout@v4
+    - uses: actions/checkout@v6
    - name: Install Rust
      run: rustup update nightly --no-self-update && rustup default nightly
    - run: ci/dox.sh
@@ -30,7 +30,7 @@ jobs:
    needs: [style]
    runs-on: ubuntu-latest
    steps:
-    - uses: actions/checkout@v4
+    - uses: actions/checkout@v6
    - name: Install Rust
      run: rustup update nightly --no-self-update && rustup default nightly
    - run: cargo test --manifest-path crates/stdarch-verify/Cargo.toml
@@ -216,7 +216,7 @@ jobs:
          build_std: true

    steps:
-    - uses: actions/checkout@v4
+    - uses: actions/checkout@v6
    - name: Install Rust
      run: |
        rustup update nightly --no-self-update
@@ -285,7 +285,7 @@ jobs:
            build_std: true

    steps:
-    - uses: actions/checkout@v4
+    - uses: actions/checkout@v6
    - name: Install Rust
      run: |
        rustup update nightly --no-self-update
@@ -310,7 +310,7 @@ jobs:
    name: Check stdarch-gen-{arm, loongarch, hexagon} output
    runs-on: ubuntu-latest
    steps:
-    - uses: actions/checkout@v4
+    - uses: actions/checkout@v6
    - name: Install Rust
      run: rustup update nightly && rustup default nightly && rustup component add rustfmt
    - name: Check arm spec
@@ -17,6 +17,10 @@
 #[unstable(feature = "stdarch_aarch64_mte", issue = "129010")]
 pub use self::mte::*;

+mod rand;
+#[unstable(feature = "stdarch_aarch64_rand", issue = "153514")]
+pub use self::rand::*;
+
 mod neon;
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub use self::neon::*;
@@ -3,35 +3,17 @@
 //! [ACLE documentation](https://arm-software.github.io/acle/main/acle.html#markdown-toc-mte-intrinsics)

 unsafe extern "unadjusted" {
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.irg"
-    )]
+    #[link_name = "llvm.aarch64.irg"]
    fn irg_(ptr: *const (), exclude: i64) -> *const ();
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.gmi"
-    )]
+    #[link_name = "llvm.aarch64.gmi"]
    fn gmi_(ptr: *const (), exclude: i64) -> i64;
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.ldg"
-    )]
+    #[link_name = "llvm.aarch64.ldg"]
    fn ldg_(ptr: *const (), tag_ptr: *const ()) -> *const ();
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.stg"
-    )]
+    #[link_name = "llvm.aarch64.stg"]
    fn stg_(tagged_ptr: *const (), addr_to_tag: *const ());
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.addg"
-    )]
+    #[link_name = "llvm.aarch64.addg"]
    fn addg_(ptr: *const (), value: i64) -> *const ();
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.subp"
-    )]
+    #[link_name = "llvm.aarch64.subp"]
    fn subp_(ptr_a: *const (), ptr_b: *const ()) -> i64;
 }

@@ -127,42 +109,46 @@ mod test {
    use super::*;
    use stdarch_test::assert_instr;

-    #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(irg))] // FIXME: MSVC  `dumpbin` doesn't support MTE
+    // Instruction tests are separate because the functions use generics.
+    //
+    // FIXME: As of 2026 MSVC  `dumpbin` doesn't support MTE.
+
+    #[cfg_attr(not(target_env = "msvc"), assert_instr(irg))]
    #[allow(dead_code)]
    #[target_feature(enable = "mte")]
    unsafe fn test_arm_mte_create_random_tag(src: *const (), mask: u64) -> *const () {
        __arm_mte_create_random_tag(src, mask)
    }

-    #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(addg))]
+    #[cfg_attr(not(target_env = "msvc"), assert_instr(addg))]
    #[allow(dead_code)]
    #[target_feature(enable = "mte")]
    unsafe fn test_arm_mte_increment_tag(src: *const ()) -> *const () {
        __arm_mte_increment_tag::<1, _>(src)
    }

-    #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(gmi))]
+    #[cfg_attr(not(target_env = "msvc"), assert_instr(gmi))]
    #[allow(dead_code)]
    #[target_feature(enable = "mte")]
    unsafe fn test_arm_mte_exclude_tag(src: *const (), excluded: u64) -> u64 {
        __arm_mte_exclude_tag(src, excluded)
    }

-    #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(stg))]
+    #[cfg_attr(not(target_env = "msvc"), assert_instr(stg))]
    #[allow(dead_code)]
    #[target_feature(enable = "mte")]
    unsafe fn test_arm_mte_set_tag(src: *const ()) {
        __arm_mte_set_tag(src)
    }

-    #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(ldg))]
+    #[cfg_attr(not(target_env = "msvc"), assert_instr(ldg))]
    #[allow(dead_code)]
    #[target_feature(enable = "mte")]
    unsafe fn test_arm_mte_get_tag(src: *const ()) -> *const () {
        __arm_mte_get_tag(src)
    }

-    #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(subp))]
+    #[cfg_attr(not(target_env = "msvc"), assert_instr(subp))]
    #[allow(dead_code)]
    #[target_feature(enable = "mte")]
    unsafe fn test_arm_mte_ptrdiff(a: *const (), b: *const ()) -> i64 {
@@ -13532,7 +13532,14 @@ pub fn vmaxh_f16(a: f16, b: f16) -> f16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fmaxnm))]
 pub fn vmaxnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
-    unsafe { simd_fmax(a, b) }
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnm.v1f64"
+        )]
+        fn _vmaxnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t;
+    }
+    unsafe { _vmaxnm_f64(a, b) }
 }
 #[doc = "Floating-point Maximum Number (vector)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmq_f64)"]
@@ -13541,7 +13548,14 @@ pub fn vmaxnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fmaxnm))]
 pub fn vmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    unsafe { simd_fmax(a, b) }
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnm.v2f64"
+        )]
+        fn _vmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    unsafe { _vmaxnmq_f64(a, b) }
 }
 #[doc = "Floating-point Maximum Number"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmh_f16)"]
@@ -13551,7 +13565,14 @@ pub fn vmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fmaxnm))]
 pub fn vmaxnmh_f16(a: f16, b: f16) -> f16 {
-    f16::max(a, b)
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnm.f16"
+        )]
+        fn _vmaxnmh_f16(a: f16, b: f16) -> f16;
+    }
+    unsafe { _vmaxnmh_f16(a, b) }
 }
 #[doc = "Floating-point maximum number across vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmv_f16)"]
@@ -13561,7 +13582,14 @@ pub fn vmaxnmh_f16(a: f16, b: f16) -> f16 {
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fmaxnmv))]
 pub fn vmaxnmv_f16(a: float16x4_t) -> f16 {
-    unsafe { simd_reduce_max(a) }
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnmv.f16.v4f16"
+        )]
+        fn _vmaxnmv_f16(a: float16x4_t) -> f16;
+    }
+    unsafe { _vmaxnmv_f16(a) }
 }
 #[doc = "Floating-point maximum number across vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmvq_f16)"]
@@ -13571,7 +13599,14 @@ pub fn vmaxnmv_f16(a: float16x4_t) -> f16 {
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fmaxnmv))]
 pub fn vmaxnmvq_f16(a: float16x8_t) -> f16 {
-    unsafe { simd_reduce_max(a) }
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnmv.f16.v8f16"
+        )]
+        fn _vmaxnmvq_f16(a: float16x8_t) -> f16;
+    }
+    unsafe { _vmaxnmvq_f16(a) }
 }
 #[doc = "Floating-point maximum number across vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmv_f32)"]
@@ -13580,7 +13615,14 @@ pub fn vmaxnmvq_f16(a: float16x8_t) -> f16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fmaxnmp))]
 pub fn vmaxnmv_f32(a: float32x2_t) -> f32 {
-    unsafe { simd_reduce_max(a) }
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnmv.f32.v2f32"
+        )]
+        fn _vmaxnmv_f32(a: float32x2_t) -> f32;
+    }
+    unsafe { _vmaxnmv_f32(a) }
 }
 #[doc = "Floating-point maximum number across vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmvq_f64)"]
@@ -13589,7 +13631,14 @@ pub fn vmaxnmv_f32(a: float32x2_t) -> f32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fmaxnmp))]
 pub fn vmaxnmvq_f64(a: float64x2_t) -> f64 {
-    unsafe { simd_reduce_max(a) }
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnmv.f64.v2f64"
+        )]
+        fn _vmaxnmvq_f64(a: float64x2_t) -> f64;
+    }
+    unsafe { _vmaxnmvq_f64(a) }
 }
 #[doc = "Floating-point maximum number across vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmvq_f32)"]
@@ -13598,7 +13647,14 @@ pub fn vmaxnmvq_f64(a: float64x2_t) -> f64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fmaxnmv))]
 pub fn vmaxnmvq_f32(a: float32x4_t) -> f32 {
-    unsafe { simd_reduce_max(a) }
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnmv.f32.v4f32"
+        )]
+        fn _vmaxnmvq_f32(a: float32x4_t) -> f32;
+    }
+    unsafe { _vmaxnmvq_f32(a) }
 }
 #[doc = "Floating-point maximum number across vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxv_f16)"]
@@ -13846,7 +13902,14 @@ pub fn vminh_f16(a: f16, b: f16) -> f16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fminnm))]
 pub fn vminnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
-    unsafe { simd_fmin(a, b) }
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnm.v1f64"
+        )]
+        fn _vminnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t;
+    }
+    unsafe { _vminnm_f64(a, b) }
 }
 #[doc = "Floating-point Minimum Number (vector)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmq_f64)"]
@@ -13855,7 +13918,14 @@ pub fn vminnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fminnm))]
 pub fn vminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    unsafe { simd_fmin(a, b) }
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnm.v2f64"
+        )]
+        fn _vminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    unsafe { _vminnmq_f64(a, b) }
 }
 #[doc = "Floating-point Minimum Number"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmh_f16)"]
@@ -13865,7 +13935,14 @@ pub fn vminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fminnm))]
 pub fn vminnmh_f16(a: f16, b: f16) -> f16 {
-    f16::min(a, b)
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnm.f16"
+        )]
+        fn _vminnmh_f16(a: f16, b: f16) -> f16;
+    }
+    unsafe { _vminnmh_f16(a, b) }
 }
 #[doc = "Floating-point minimum number across vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmv_f16)"]
@@ -13875,7 +13952,14 @@ pub fn vminnmh_f16(a: f16, b: f16) -> f16 {
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fminnmv))]
 pub fn vminnmv_f16(a: float16x4_t) -> f16 {
-    unsafe { simd_reduce_min(a) }
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnmv.f16.v4f16"
+        )]
+        fn _vminnmv_f16(a: float16x4_t) -> f16;
+    }
+    unsafe { _vminnmv_f16(a) }
 }
 #[doc = "Floating-point minimum number across vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmvq_f16)"]
@@ -13885,7 +13969,14 @@ pub fn vminnmv_f16(a: float16x4_t) -> f16 {
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fminnmv))]
 pub fn vminnmvq_f16(a: float16x8_t) -> f16 {
-    unsafe { simd_reduce_min(a) }
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnmv.f16.v8f16"
+        )]
+        fn _vminnmvq_f16(a: float16x8_t) -> f16;
+    }
+    unsafe { _vminnmvq_f16(a) }
 }
 #[doc = "Floating-point minimum number across vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmv_f32)"]
@@ -13894,7 +13985,14 @@ pub fn vminnmvq_f16(a: float16x8_t) -> f16 {
 #[cfg_attr(test, assert_instr(fminnmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vminnmv_f32(a: float32x2_t) -> f32 {
-    unsafe { simd_reduce_min(a) }
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnmv.f32.v2f32"
+        )]
+        fn _vminnmv_f32(a: float32x2_t) -> f32;
+    }
+    unsafe { _vminnmv_f32(a) }
 }
 #[doc = "Floating-point minimum number across vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmvq_f64)"]
@@ -13903,7 +14001,14 @@ pub fn vminnmv_f32(a: float32x2_t) -> f32 {
 #[cfg_attr(test, assert_instr(fminnmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vminnmvq_f64(a: float64x2_t) -> f64 {
-    unsafe { simd_reduce_min(a) }
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnmv.f64.v2f64"
+        )]
+        fn _vminnmvq_f64(a: float64x2_t) -> f64;
+    }
+    unsafe { _vminnmvq_f64(a) }
 }
 #[doc = "Floating-point minimum number across vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmvq_f32)"]
@@ -13912,7 +14017,14 @@ pub fn vminnmvq_f64(a: float64x2_t) -> f64 {
 #[cfg_attr(test, assert_instr(fminnmv))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vminnmvq_f32(a: float32x4_t) -> f32 {
-    unsafe { simd_reduce_min(a) }
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnmv.f32.v4f32"
+        )]
+        fn _vminnmvq_f32(a: float32x4_t) -> f32;
+    }
+    unsafe { _vminnmvq_f32(a) }
 }
 #[doc = "Floating-point minimum number across vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminv_f16)"]
@@ -0,0 +1,48 @@
+//! AArch64 Random Number intrinsics
+//!
+//! [ACLE documentation](https://arm-software.github.io/acle/main/acle.html#random-number-generation-intrinsics)
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+unsafe extern "unadjusted" {
+    #[link_name = "llvm.aarch64.rndr"]
+    fn rndr_() -> Tuple;
+
+    #[link_name = "llvm.aarch64.rndrrs"]
+    fn rndrrs_() -> Tuple;
+}
+
+#[repr(C)]
+struct Tuple {
+    bits: u64,
+    status: bool,
+}
+
+/// Stores a 64-bit random number into the object pointed to by the argument and returns
+/// zero. If the implementation could not generate a random number within a reasonable
+/// period of time the object pointed to by the input is set to zero and a non-zero value
+/// is returned.
+#[inline]
+#[target_feature(enable = "rand")]
+#[cfg_attr(test, assert_instr(mrs))]
+#[unstable(feature = "stdarch_aarch64_rand", issue = "153514")]
+pub unsafe fn __rndr(value: *mut u64) -> i32 {
+    let Tuple { bits, status } = rndr_();
+    unsafe { *value = bits };
+    status as i32
+}
+
+/// Reseeds the random number generator. After that stores a 64-bit random number into
+/// the object pointed to by the argument and returns zero. If the implementation could
+/// not generate a random number within a reasonable period of time the object pointed
+/// to by the input is set to zero and a non-zero value is returned.
+#[inline]
+#[target_feature(enable = "rand")]
+#[cfg_attr(test, assert_instr(mrs))]
+#[unstable(feature = "stdarch_aarch64_rand", issue = "153514")]
+pub unsafe fn __rndrrs(value: *mut u64) -> i32 {
+    let Tuple { bits, status } = rndrrs_();
+    unsafe { *value = bits };
+    status as i32
+}
@@ -25891,7 +25891,15 @@ pub fn vmaxq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 )]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vmaxnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
-    unsafe { simd_fmax(a, b) }
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v4f16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnm.v4f16"
+        )]
+        fn _vmaxnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t;
+    }
+    unsafe { _vmaxnm_f16(a, b) }
 }
 #[doc = "Floating-point Maximum Number (vector)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmq_f16)"]
@@ -25913,7 +25921,15 @@ pub fn vmaxnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
 )]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vmaxnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
-    unsafe { simd_fmax(a, b) }
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v8f16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnm.v8f16"
+        )]
+        fn _vmaxnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t;
+    }
+    unsafe { _vmaxnmq_f16(a, b) }
 }
 #[doc = "Floating-point Maximum Number (vector)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnm_f32)"]
@@ -25934,7 +25950,15 @@ pub fn vmaxnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    unsafe { simd_fmax(a, b) }
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v2f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnm.v2f32"
+        )]
+        fn _vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+    unsafe { _vmaxnm_f32(a, b) }
 }
 #[doc = "Floating-point Maximum Number (vector)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmq_f32)"]
@@ -25955,7 +25979,15 @@ pub fn vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    unsafe { simd_fmax(a, b) }
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v4f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnm.v4f32"
+        )]
+        fn _vmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+    unsafe { _vmaxnmq_f32(a, b) }
 }
 #[doc = "Minimum (vector)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_f16)"]
@@ -26383,7 +26415,15 @@ pub fn vminq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 )]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vminnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
-    unsafe { simd_fmin(a, b) }
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v4f16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnm.v4f16"
+        )]
+        fn _vminnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t;
+    }
+    unsafe { _vminnm_f16(a, b) }
 }
 #[doc = "Floating-point Minimum Number (vector)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmq_f16)"]
@@ -26405,7 +26445,15 @@ pub fn vminnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
 )]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vminnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
-    unsafe { simd_fmin(a, b) }
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v8f16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnm.v8f16"
+        )]
+        fn _vminnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t;
+    }
+    unsafe { _vminnmq_f16(a, b) }
 }
 #[doc = "Floating-point Minimum Number (vector)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnm_f32)"]
@@ -26426,7 +26474,15 @@ pub fn vminnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    unsafe { simd_fmin(a, b) }
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v2f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnm.v2f32"
+        )]
+        fn _vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+    unsafe { _vminnm_f32(a, b) }
 }
 #[doc = "Floating-point Minimum Number (vector)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmq_f32)"]
@@ -26447,7 +26503,15 @@ pub fn vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    unsafe { simd_fmin(a, b) }
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v4f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnm.v4f32"
+        )]
+        fn _vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+    unsafe { _vminnmq_f32(a, b) }
 }
 #[doc = "Floating-point multiply-add to accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_f32)"]
@@ -39,7 +39,8 @@
    const_trait_impl,
    const_cmp,
    const_eval_select,
-    maybe_uninit_as_bytes
+    maybe_uninit_as_bytes,
+    movrs_target_feature
 )]
 #![cfg_attr(test, feature(test, abi_vectorcall, stdarch_internal))]
 #![deny(clippy::missing_inline_in_public_items)]
@@ -335,6 +335,20 @@ struct PackedTuple<T, U> {
    #[link_name = "llvm.s390.vcfn"] fn vcfn(a: vector_signed_short, immarg: i32) -> vector_signed_short;
    #[link_name = "llvm.s390.vcnf"] fn vcnf(a: vector_signed_short, immarg: i32) -> vector_signed_short;
    #[link_name = "llvm.s390.vcrnfs"] fn vcrnfs(a: vector_float, b: vector_float, immarg: i32) -> vector_signed_short;
+
+    // These are the intrinsics we'd like to use (with mode 0). However, they require
+    // "vector-enhancements-1" and don't have a fallback, whereas `vec_min`/`vec_max` should be
+    // available with just "vector". Therefore, we cannot use them.
+    // #[link_name = "llvm.s390.vfmaxsb"] fn vfmaxsb(a: vector_float, b: vector_float, mode: i32) -> vector_float;
+    // #[link_name = "llvm.s390.vfmaxdb"] fn vfmaxdb(a: vector_double, b: vector_double, mode: i32) -> vector_double;
+    // #[link_name = "llvm.s390.vfminsb"] fn vfminsb(a: vector_float, b: vector_float, mode: i32) -> vector_float;
+    // #[link_name = "llvm.s390.vfmindb"] fn vfmindb(a: vector_double, b: vector_double, mode: i32) -> vector_double;
+    // Instead, we use "portable" LLVM intrinsics -- even though those have the wrong semantics
+    // (https://github.com/rust-lang/stdarch/issues/2060), they usually do the right thing.
+    #[link_name = "llvm.minnum.v4f32"] fn minnum_v4f32(a: vector_float, b: vector_float) -> vector_float;
+    #[link_name = "llvm.minnum.v2f64"] fn minnum_v2f64(a: vector_double, b: vector_double) -> vector_double;
+    #[link_name = "llvm.maxnum.v4f32"] fn maxnum_v4f32(a: vector_float, b: vector_float) -> vector_float;
+    #[link_name = "llvm.maxnum.v2f64"] fn maxnum_v2f64(a: vector_double, b: vector_double) -> vector_double;
 }

 #[repr(simd)]
@@ -780,8 +794,8 @@ mod impl_max {
        impl_max!(vec_vmxslg, vector_unsigned_long_long, vmxlg);
    }

-    test_impl! { vec_vfmaxsb (a: vector_float, b: vector_float) -> vector_float [simd_fmax, "vector-enhancements-1" vfmaxsb ] }
-    test_impl! { vec_vfmaxdb (a: vector_double, b: vector_double) -> vector_double [simd_fmax, "vector-enhancements-1" vfmaxdb] }
+    test_impl! { vec_vfmaxsb (a: vector_float, b: vector_float) -> vector_float [maxnum_v4f32, "vector-enhancements-1" vfmaxsb] }
+    test_impl! { vec_vfmaxdb (a: vector_double, b: vector_double) -> vector_double [maxnum_v2f64, "vector-enhancements-1" vfmaxdb] }

    impl_vec_trait!([VectorMax vec_max] vec_vfmaxsb (vector_float, vector_float) -> vector_float);
    impl_vec_trait!([VectorMax vec_max] vec_vfmaxdb (vector_double, vector_double) -> vector_double);
@@ -827,8 +841,8 @@ mod impl_min {
        impl_min!(vec_vmnslg, vector_unsigned_long_long, vmnlg);
    }

-    test_impl! { vec_vfminsb (a: vector_float, b: vector_float) -> vector_float [simd_fmin, "vector-enhancements-1" vfminsb]  }
-    test_impl! { vec_vfmindb (a: vector_double, b: vector_double) -> vector_double [simd_fmin, "vector-enhancements-1" vfmindb]  }
+    test_impl! { vec_vfminsb (a: vector_float, b: vector_float) -> vector_float [minnum_v4f32, "vector-enhancements-1" vfminsb] }
+    test_impl! { vec_vfmindb (a: vector_double, b: vector_double) -> vector_double [minnum_v2f64, "vector-enhancements-1" vfmindb] }

    impl_vec_trait!([VectorMin vec_min] vec_vfminsb (vector_float, vector_float) -> vector_float);
    impl_vec_trait!([VectorMin vec_min] vec_vfmindb (vector_double, vector_double) -> vector_double);
@@ -7477,6 +7491,30 @@ fn test_vec_cmpnrg_or_0_idx() {
        [0, !0, !0, !0]
    }

+    test_vec_2! { test_vec_max_f32, vec_max, f32x4, f32x4 -> f32x4,
+        [1.0,   f32::NAN, f32::INFINITY, 2.0],
+        [-10.0, -10.0,    5.0,           f32::NAN],
+        [1.0,   -10.0,    f32::INFINITY, 2.0]
+    }
+
+    test_vec_2! { test_vec_min_f32, vec_min, f32x4, f32x4 -> f32x4,
+        [1.0,   f32::NAN, f32::INFINITY, 2.0],
+        [-10.0, -10.0,    5.0,           f32::NAN],
+        [-10.0, -10.0,    5.0,           2.0]
+    }
+
+    test_vec_2! { test_vec_max_f64, vec_max, f64x2, f64x2 -> f64x2,
+        [f64::NAN, 2.0],
+        [-10.0,    f64::NAN],
+        [-10.0,    2.0]
+    }
+
+    test_vec_2! { test_vec_min_f64, vec_min, f64x2, f64x2 -> f64x2,
+        [f64::NAN, 2.0],
+        [-10.0,    f64::NAN],
+        [-10.0,    2.0]
+    }
+
    #[simd_test(enable = "vector")]
    fn test_vec_meadd() {
        let a = vector_unsigned_short([1, 0, 2, 0, 3, 0, 4, 0]);
@@ -11753,7 +11753,7 @@ pub const fn _mm_maskz_cvtepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovswb))]
-#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
 pub const fn _mm512_cvtsepi16_epi8(a: __m512i) -> __m256i {
    unsafe {
        simd_cast::<_, i8x32>(simd_imax(
@@ -11771,7 +11771,7 @@ pub const fn _mm512_cvtsepi16_epi8(a: __m512i) -> __m256i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovswb))]
-#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
 pub const fn _mm512_mask_cvtsepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
    unsafe {
        simd_select_bitmask(k, _mm512_cvtsepi16_epi8(a).as_i8x32(), src.as_i8x32()).as_m256i()
@@ -11785,7 +11785,7 @@ pub const fn _mm512_mask_cvtsepi16_epi8(src: __m256i, k: __mmask32, a: __m512i)
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovswb))]
-#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
 pub const fn _mm512_maskz_cvtsepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
    unsafe { simd_select_bitmask(k, _mm512_cvtsepi16_epi8(a).as_i8x32(), i8x32::ZERO).as_m256i() }
 }
@@ -11797,7 +11797,7 @@ pub const fn _mm512_maskz_cvtsepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovswb))]
-#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
 pub const fn _mm256_cvtsepi16_epi8(a: __m256i) -> __m128i {
    unsafe {
        simd_cast::<_, i8x16>(simd_imax(
@@ -11815,7 +11815,7 @@ pub const fn _mm256_cvtsepi16_epi8(a: __m256i) -> __m128i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovswb))]
-#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
 pub const fn _mm256_mask_cvtsepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
    unsafe {
        simd_select_bitmask(k, _mm256_cvtsepi16_epi8(a).as_i8x16(), src.as_i8x16()).as_m128i()
@@ -11829,7 +11829,7 @@ pub const fn _mm256_mask_cvtsepi16_epi8(src: __m128i, k: __mmask16, a: __m256i)
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovswb))]
-#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
 pub const fn _mm256_maskz_cvtsepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
    unsafe { simd_select_bitmask(k, _mm256_cvtsepi16_epi8(a).as_i8x16(), i8x16::ZERO).as_m128i() }
 }
@@ -11874,7 +11874,7 @@ pub fn _mm_maskz_cvtsepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovuswb))]
-#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
 pub const fn _mm512_cvtusepi16_epi8(a: __m512i) -> __m256i {
    unsafe {
        simd_cast::<_, u8x32>(simd_imin(a.as_u16x32(), u16x32::splat(u8::MAX as _))).as_m256i()
@@ -11888,7 +11888,7 @@ pub const fn _mm512_cvtusepi16_epi8(a: __m512i) -> __m256i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovuswb))]
-#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
 pub const fn _mm512_mask_cvtusepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
    unsafe {
        simd_select_bitmask(k, _mm512_cvtusepi16_epi8(a).as_u8x32(), src.as_u8x32()).as_m256i()
@@ -11902,7 +11902,7 @@ pub const fn _mm512_mask_cvtusepi16_epi8(src: __m256i, k: __mmask32, a: __m512i)
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovuswb))]
-#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
 pub const fn _mm512_maskz_cvtusepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
    unsafe { simd_select_bitmask(k, _mm512_cvtusepi16_epi8(a).as_u8x32(), u8x32::ZERO).as_m256i() }
 }
@@ -11914,7 +11914,7 @@ pub const fn _mm512_maskz_cvtusepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovuswb))]
-#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
 pub const fn _mm256_cvtusepi16_epi8(a: __m256i) -> __m128i {
    unsafe {
        simd_cast::<_, u8x16>(simd_imin(a.as_u16x16(), u16x16::splat(u8::MAX as _))).as_m128i()
@@ -11928,7 +11928,7 @@ pub const fn _mm256_cvtusepi16_epi8(a: __m256i) -> __m128i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovuswb))]
-#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
 pub const fn _mm256_mask_cvtusepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
    unsafe {
        simd_select_bitmask(k, _mm256_cvtusepi16_epi8(a).as_u8x16(), src.as_u8x16()).as_m128i()
@@ -11942,7 +11942,7 @@ pub const fn _mm256_mask_cvtusepi16_epi8(src: __m128i, k: __mmask16, a: __m256i)
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovuswb))]
-#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
 pub const fn _mm256_maskz_cvtusepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
    unsafe { simd_select_bitmask(k, _mm256_cvtusepi16_epi8(a).as_u8x16(), u8x16::ZERO).as_m128i() }
 }
@@ -12678,7 +12678,7 @@ pub unsafe fn _mm_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a:
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovwb))]
-#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
 pub const unsafe fn _mm512_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
    let result = _mm512_cvtepi16_epi8(a).as_i8x32();
    let mask = simd_select_bitmask(k, i8x32::splat(!0), i8x32::ZERO);
@@ -12692,7 +12692,7 @@ pub unsafe fn _mm_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a:
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovwb))]
-#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
 pub const unsafe fn _mm256_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
    let result = _mm256_cvtepi16_epi8(a).as_i8x16();
    let mask = simd_select_bitmask(k, i8x16::splat(!0), i8x16::ZERO);
@@ -12706,7 +12706,7 @@ pub unsafe fn _mm_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a:
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovwb))]
-#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
 pub const unsafe fn _mm_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
    let result: i8x8 = simd_shuffle!(
        _mm_cvtepi16_epi8(a).as_i8x16(),
@@ -12,7 +12,7 @@
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpdpwssd))]
 pub fn _mm512_dpwssd_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
-    unsafe { transmute(vpdpwssd(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) }
+    unsafe { transmute(vpdpwssd(src.as_i32x16(), a.as_i16x32(), b.as_i16x32())) }
 }

 /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -51,7 +51,7 @@ pub fn _mm512_maskz_dpwssd_epi32(k: __mmask16, src: __m512i, a: __m512i, b: __m5
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpdpwssd))]
 pub fn _mm256_dpwssd_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(vpdpwssd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
+    unsafe { transmute(vpdpwssd256(src.as_i32x8(), a.as_i16x16(), b.as_i16x16())) }
 }

 /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
@@ -62,7 +62,7 @@ pub fn _mm256_dpwssd_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpdpwssd))]
 pub fn _mm256_dpwssd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(vpdpwssd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
+    unsafe { transmute(vpdpwssd256(src.as_i32x8(), a.as_i16x16(), b.as_i16x16())) }
 }

 /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -101,7 +101,7 @@ pub fn _mm256_maskz_dpwssd_epi32(k: __mmask8, src: __m256i, a: __m256i, b: __m25
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpdpwssd))]
 pub fn _mm_dpwssd_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(vpdpwssd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
+    unsafe { transmute(vpdpwssd128(src.as_i32x4(), a.as_i16x8(), b.as_i16x8())) }
 }

 /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
@@ -112,7 +112,7 @@ pub fn _mm_dpwssd_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpdpwssd))]
 pub fn _mm_dpwssd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(vpdpwssd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
+    unsafe { transmute(vpdpwssd128(src.as_i32x4(), a.as_i16x8(), b.as_i16x8())) }
 }

 /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -151,7 +151,7 @@ pub fn _mm_maskz_dpwssd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i)
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpdpwssds))]
 pub fn _mm512_dpwssds_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
-    unsafe { transmute(vpdpwssds(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) }
+    unsafe { transmute(vpdpwssds(src.as_i32x16(), a.as_i16x32(), b.as_i16x32())) }
 }

 /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -190,7 +190,7 @@ pub fn _mm512_maskz_dpwssds_epi32(k: __mmask16, src: __m512i, a: __m512i, b: __m
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpdpwssds))]
 pub fn _mm256_dpwssds_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(vpdpwssds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
+    unsafe { transmute(vpdpwssds256(src.as_i32x8(), a.as_i16x16(), b.as_i16x16())) }
 }

 /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
@@ -201,7 +201,7 @@ pub fn _mm256_dpwssds_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpdpwssds))]
 pub fn _mm256_dpwssds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(vpdpwssds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
+    unsafe { transmute(vpdpwssds256(src.as_i32x8(), a.as_i16x16(), b.as_i16x16())) }
 }

 /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -240,7 +240,7 @@ pub fn _mm256_maskz_dpwssds_epi32(k: __mmask8, src: __m256i, a: __m256i, b: __m2
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpdpwssds))]
 pub fn _mm_dpwssds_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(vpdpwssds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
+    unsafe { transmute(vpdpwssds128(src.as_i32x4(), a.as_i16x8(), b.as_i16x8())) }
 }

 /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
@@ -251,7 +251,7 @@ pub fn _mm_dpwssds_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpdpwssds))]
 pub fn _mm_dpwssds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(vpdpwssds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
+    unsafe { transmute(vpdpwssds128(src.as_i32x4(), a.as_i16x8(), b.as_i16x8())) }
 }

 /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -290,7 +290,7 @@ pub fn _mm_maskz_dpwssds_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpdpbusd))]
 pub fn _mm512_dpbusd_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
-    unsafe { transmute(vpdpbusd(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) }
+    unsafe { transmute(vpdpbusd(src.as_i32x16(), a.as_u8x64(), b.as_i8x64())) }
 }

 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -329,7 +329,7 @@ pub fn _mm512_maskz_dpbusd_epi32(k: __mmask16, src: __m512i, a: __m512i, b: __m5
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpdpbusd))]
 pub fn _mm256_dpbusd_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(vpdpbusd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
+    unsafe { transmute(vpdpbusd256(src.as_i32x8(), a.as_u8x32(), b.as_i8x32())) }
 }

 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
@@ -340,7 +340,7 @@ pub fn _mm256_dpbusd_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpdpbusd))]
 pub fn _mm256_dpbusd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(vpdpbusd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
+    unsafe { transmute(vpdpbusd256(src.as_i32x8(), a.as_u8x32(), b.as_i8x32())) }
 }

 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -379,7 +379,7 @@ pub fn _mm256_maskz_dpbusd_epi32(k: __mmask8, src: __m256i, a: __m256i, b: __m25
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpdpbusd))]
 pub fn _mm_dpbusd_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(vpdpbusd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
+    unsafe { transmute(vpdpbusd128(src.as_i32x4(), a.as_u8x16(), b.as_i8x16())) }
 }

 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
@@ -390,7 +390,7 @@ pub fn _mm_dpbusd_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpdpbusd))]
 pub fn _mm_dpbusd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(vpdpbusd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
+    unsafe { transmute(vpdpbusd128(src.as_i32x4(), a.as_u8x16(), b.as_i8x16())) }
 }

 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -429,7 +429,7 @@ pub fn _mm_maskz_dpbusd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i)
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpdpbusds))]
 pub fn _mm512_dpbusds_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
-    unsafe { transmute(vpdpbusds(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) }
+    unsafe { transmute(vpdpbusds(src.as_i32x16(), a.as_u8x64(), b.as_i8x64())) }
 }

 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -468,7 +468,7 @@ pub fn _mm512_maskz_dpbusds_epi32(k: __mmask16, src: __m512i, a: __m512i, b: __m
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpdpbusds))]
 pub fn _mm256_dpbusds_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(vpdpbusds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
+    unsafe { transmute(vpdpbusds256(src.as_i32x8(), a.as_u8x32(), b.as_i8x32())) }
 }

 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
@@ -479,7 +479,7 @@ pub fn _mm256_dpbusds_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpdpbusds))]
 pub fn _mm256_dpbusds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(vpdpbusds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
+    unsafe { transmute(vpdpbusds256(src.as_i32x8(), a.as_u8x32(), b.as_i8x32())) }
 }

 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -518,7 +518,7 @@ pub fn _mm256_maskz_dpbusds_epi32(k: __mmask8, src: __m256i, a: __m256i, b: __m2
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpdpbusds))]
 pub fn _mm_dpbusds_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(vpdpbusds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
+    unsafe { transmute(vpdpbusds128(src.as_i32x4(), a.as_u8x16(), b.as_i8x16())) }
 }

 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
@@ -529,7 +529,7 @@ pub fn _mm_dpbusds_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpdpbusds))]
 pub fn _mm_dpbusds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(vpdpbusds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
+    unsafe { transmute(vpdpbusds128(src.as_i32x4(), a.as_u8x16(), b.as_i8x16())) }
 }

 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -570,7 +570,7 @@ pub fn _mm_maskz_dpbusds_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i
 #[cfg_attr(test, assert_instr(vpdpbssd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_dpbssd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(vpdpbssd_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
+    unsafe { transmute(vpdpbssd_128(src.as_i32x4(), a.as_i8x16(), b.as_i8x16())) }
 }

 /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit
@@ -583,7 +583,7 @@ pub fn _mm_dpbssd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(vpdpbssd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_dpbssd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(vpdpbssd_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
+    unsafe { transmute(vpdpbssd_256(src.as_i32x8(), a.as_i8x32(), b.as_i8x32())) }
 }

 /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit
@@ -596,7 +596,7 @@ pub fn _mm256_dpbssd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
 #[cfg_attr(test, assert_instr(vpdpbssds))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_dpbssds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(vpdpbssds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
+    unsafe { transmute(vpdpbssds_128(src.as_i32x4(), a.as_i8x16(), b.as_i8x16())) }
 }

 /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit
@@ -609,7 +609,7 @@ pub fn _mm_dpbssds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(vpdpbssds))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_dpbssds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(vpdpbssds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
+    unsafe { transmute(vpdpbssds_256(src.as_i32x8(), a.as_i8x32(), b.as_i8x32())) }
 }

 /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit
@@ -622,7 +622,7 @@ pub fn _mm256_dpbssds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
 #[cfg_attr(test, assert_instr(vpdpbsud))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_dpbsud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(vpdpbsud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
+    unsafe { transmute(vpdpbsud_128(src.as_i32x4(), a.as_i8x16(), b.as_u8x16())) }
 }

 /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit
@@ -635,7 +635,7 @@ pub fn _mm_dpbsud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(vpdpbsud))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_dpbsud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(vpdpbsud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
+    unsafe { transmute(vpdpbsud_256(src.as_i32x8(), a.as_i8x32(), b.as_u8x32())) }
 }

 /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit
@@ -648,7 +648,7 @@ pub fn _mm256_dpbsud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
 #[cfg_attr(test, assert_instr(vpdpbsuds))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_dpbsuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(vpdpbsuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
+    unsafe { transmute(vpdpbsuds_128(src.as_i32x4(), a.as_i8x16(), b.as_u8x16())) }
 }

 /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit
@@ -661,7 +661,7 @@ pub fn _mm_dpbsuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(vpdpbsuds))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_dpbsuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(vpdpbsuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
+    unsafe { transmute(vpdpbsuds_256(src.as_i32x8(), a.as_i8x32(), b.as_u8x32())) }
 }

 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit
@@ -674,7 +674,7 @@ pub fn _mm256_dpbsuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
 #[cfg_attr(test, assert_instr(vpdpbuud))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_dpbuud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(vpdpbuud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
+    unsafe { transmute(vpdpbuud_128(src.as_i32x4(), a.as_u8x16(), b.as_u8x16())) }
 }

 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit
@@ -687,7 +687,7 @@ pub fn _mm_dpbuud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(vpdpbuud))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_dpbuud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(vpdpbuud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
+    unsafe { transmute(vpdpbuud_256(src.as_i32x8(), a.as_u8x32(), b.as_u8x32())) }
 }

 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit
@@ -700,7 +700,7 @@ pub fn _mm256_dpbuud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
 #[cfg_attr(test, assert_instr(vpdpbuuds))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_dpbuuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(vpdpbuuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
+    unsafe { transmute(vpdpbuuds_128(src.as_i32x4(), a.as_u8x16(), b.as_u8x16())) }
 }

 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit
@@ -713,7 +713,7 @@ pub fn _mm_dpbuuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(vpdpbuuds))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_dpbuuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(vpdpbuuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
+    unsafe { transmute(vpdpbuuds_256(src.as_i32x8(), a.as_u8x32(), b.as_u8x32())) }
 }

 /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit
@@ -726,7 +726,7 @@ pub fn _mm256_dpbuuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
 #[cfg_attr(test, assert_instr(vpdpwsud))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_dpwsud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(vpdpwsud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
+    unsafe { transmute(vpdpwsud_128(src.as_i32x4(), a.as_i16x8(), b.as_u16x8())) }
 }

 /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit
@@ -739,7 +739,7 @@ pub fn _mm_dpwsud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(vpdpwsud))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_dpwsud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(vpdpwsud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
+    unsafe { transmute(vpdpwsud_256(src.as_i32x8(), a.as_i16x16(), b.as_u16x16())) }
 }

 /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit
@@ -752,7 +752,7 @@ pub fn _mm256_dpwsud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
 #[cfg_attr(test, assert_instr(vpdpwsuds))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_dpwsuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(vpdpwsuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
+    unsafe { transmute(vpdpwsuds_128(src.as_i32x4(), a.as_i16x8(), b.as_u16x8())) }
 }

 /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit
@@ -765,7 +765,7 @@ pub fn _mm_dpwsuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(vpdpwsuds))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_dpwsuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(vpdpwsuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
+    unsafe { transmute(vpdpwsuds_256(src.as_i32x8(), a.as_i16x16(), b.as_u16x16())) }
 }

 /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit
@@ -778,7 +778,7 @@ pub fn _mm256_dpwsuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
 #[cfg_attr(test, assert_instr(vpdpwusd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_dpwusd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(vpdpwusd_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
+    unsafe { transmute(vpdpwusd_128(src.as_i32x4(), a.as_u16x8(), b.as_i16x8())) }
 }

 /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit
@@ -791,7 +791,7 @@ pub fn _mm_dpwusd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(vpdpwusd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_dpwusd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(vpdpwusd_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
+    unsafe { transmute(vpdpwusd_256(src.as_i32x8(), a.as_u16x16(), b.as_i16x16())) }
 }

 /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit
@@ -804,7 +804,7 @@ pub fn _mm256_dpwusd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
 #[cfg_attr(test, assert_instr(vpdpwusds))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_dpwusds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(vpdpwusds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
+    unsafe { transmute(vpdpwusds_128(src.as_i32x4(), a.as_u16x8(), b.as_i16x8())) }
 }

 /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit
@@ -817,7 +817,7 @@ pub fn _mm_dpwusds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(vpdpwusds))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_dpwusds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(vpdpwusds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
+    unsafe { transmute(vpdpwusds_256(src.as_i32x8(), a.as_u16x16(), b.as_i16x16())) }
 }

 /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit
@@ -830,7 +830,7 @@ pub fn _mm256_dpwusds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
 #[cfg_attr(test, assert_instr(vpdpwuud))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_dpwuud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(vpdpwuud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
+    unsafe { transmute(vpdpwuud_128(src.as_i32x4(), a.as_u16x8(), b.as_u16x8())) }
 }

 /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit
@@ -843,7 +843,7 @@ pub fn _mm_dpwuud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(vpdpwuud))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_dpwuud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(vpdpwuud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
+    unsafe { transmute(vpdpwuud_256(src.as_i32x8(), a.as_u16x16(), b.as_u16x16())) }
 }

 /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit
@@ -856,7 +856,7 @@ pub fn _mm256_dpwuud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
 #[cfg_attr(test, assert_instr(vpdpwuuds))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_dpwuuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(vpdpwuuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
+    unsafe { transmute(vpdpwuuds_128(src.as_i32x4(), a.as_u16x8(), b.as_u16x8())) }
 }

 /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit
@@ -869,98 +869,98 @@ pub fn _mm_dpwuuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(vpdpwuuds))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_dpwuuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(vpdpwuuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
+    unsafe { transmute(vpdpwuuds_256(src.as_i32x8(), a.as_u16x16(), b.as_u16x16())) }
 }

 #[allow(improper_ctypes)]
 unsafe extern "C" {
    #[link_name = "llvm.x86.avx512.vpdpwssd.512"]
-    fn vpdpwssd(src: i32x16, a: i32x16, b: i32x16) -> i32x16;
+    fn vpdpwssd(src: i32x16, a: i16x32, b: i16x32) -> i32x16;
    #[link_name = "llvm.x86.avx512.vpdpwssd.256"]
-    fn vpdpwssd256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
+    fn vpdpwssd256(src: i32x8, a: i16x16, b: i16x16) -> i32x8;
    #[link_name = "llvm.x86.avx512.vpdpwssd.128"]
-    fn vpdpwssd128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
+    fn vpdpwssd128(src: i32x4, a: i16x8, b: i16x8) -> i32x4;

    #[link_name = "llvm.x86.avx512.vpdpwssds.512"]
-    fn vpdpwssds(src: i32x16, a: i32x16, b: i32x16) -> i32x16;
+    fn vpdpwssds(src: i32x16, a: i16x32, b: i16x32) -> i32x16;
    #[link_name = "llvm.x86.avx512.vpdpwssds.256"]
-    fn vpdpwssds256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
+    fn vpdpwssds256(src: i32x8, a: i16x16, b: i16x16) -> i32x8;
    #[link_name = "llvm.x86.avx512.vpdpwssds.128"]
-    fn vpdpwssds128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
+    fn vpdpwssds128(src: i32x4, a: i16x8, b: i16x8) -> i32x4;

    #[link_name = "llvm.x86.avx512.vpdpbusd.512"]
-    fn vpdpbusd(src: i32x16, a: i32x16, b: i32x16) -> i32x16;
+    fn vpdpbusd(src: i32x16, a: u8x64, b: i8x64) -> i32x16;
    #[link_name = "llvm.x86.avx512.vpdpbusd.256"]
-    fn vpdpbusd256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
+    fn vpdpbusd256(src: i32x8, a: u8x32, b: i8x32) -> i32x8;
    #[link_name = "llvm.x86.avx512.vpdpbusd.128"]
-    fn vpdpbusd128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
+    fn vpdpbusd128(src: i32x4, a: u8x16, b: i8x16) -> i32x4;

    #[link_name = "llvm.x86.avx512.vpdpbusds.512"]
-    fn vpdpbusds(src: i32x16, a: i32x16, b: i32x16) -> i32x16;
+    fn vpdpbusds(src: i32x16, a: u8x64, b: i8x64) -> i32x16;
    #[link_name = "llvm.x86.avx512.vpdpbusds.256"]
-    fn vpdpbusds256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
+    fn vpdpbusds256(src: i32x8, a: u8x32, b: i8x32) -> i32x8;
    #[link_name = "llvm.x86.avx512.vpdpbusds.128"]
-    fn vpdpbusds128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
+    fn vpdpbusds128(src: i32x4, a: u8x16, b: i8x16) -> i32x4;

    #[link_name = "llvm.x86.avx2.vpdpbssd.128"]
-    fn vpdpbssd_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
+    fn vpdpbssd_128(src: i32x4, a: i8x16, b: i8x16) -> i32x4;
    #[link_name = "llvm.x86.avx2.vpdpbssd.256"]
-    fn vpdpbssd_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
+    fn vpdpbssd_256(src: i32x8, a: i8x32, b: i8x32) -> i32x8;

    #[link_name = "llvm.x86.avx2.vpdpbssds.128"]
-    fn vpdpbssds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
+    fn vpdpbssds_128(src: i32x4, a: i8x16, b: i8x16) -> i32x4;
    #[link_name = "llvm.x86.avx2.vpdpbssds.256"]
-    fn vpdpbssds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
+    fn vpdpbssds_256(src: i32x8, a: i8x32, b: i8x32) -> i32x8;

    #[link_name = "llvm.x86.avx2.vpdpbsud.128"]
-    fn vpdpbsud_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
+    fn vpdpbsud_128(src: i32x4, a: i8x16, b: u8x16) -> i32x4;
    #[link_name = "llvm.x86.avx2.vpdpbsud.256"]
-    fn vpdpbsud_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
+    fn vpdpbsud_256(src: i32x8, a: i8x32, b: u8x32) -> i32x8;

    #[link_name = "llvm.x86.avx2.vpdpbsuds.128"]
-    fn vpdpbsuds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
+    fn vpdpbsuds_128(src: i32x4, a: i8x16, b: u8x16) -> i32x4;
    #[link_name = "llvm.x86.avx2.vpdpbsuds.256"]
-    fn vpdpbsuds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
+    fn vpdpbsuds_256(src: i32x8, a: i8x32, b: u8x32) -> i32x8;

    #[link_name = "llvm.x86.avx2.vpdpbuud.128"]
-    fn vpdpbuud_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
+    fn vpdpbuud_128(src: i32x4, a: u8x16, b: u8x16) -> i32x4;
    #[link_name = "llvm.x86.avx2.vpdpbuud.256"]
-    fn vpdpbuud_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
+    fn vpdpbuud_256(src: i32x8, a: u8x32, b: u8x32) -> i32x8;

    #[link_name = "llvm.x86.avx2.vpdpbuuds.128"]
-    fn vpdpbuuds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
+    fn vpdpbuuds_128(src: i32x4, a: u8x16, b: u8x16) -> i32x4;
    #[link_name = "llvm.x86.avx2.vpdpbuuds.256"]
-    fn vpdpbuuds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
+    fn vpdpbuuds_256(src: i32x8, a: u8x32, b: u8x32) -> i32x8;

    #[link_name = "llvm.x86.avx2.vpdpwsud.128"]
-    fn vpdpwsud_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
+    fn vpdpwsud_128(src: i32x4, a: i16x8, b: u16x8) -> i32x4;
    #[link_name = "llvm.x86.avx2.vpdpwsud.256"]
-    fn vpdpwsud_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
+    fn vpdpwsud_256(src: i32x8, a: i16x16, b: u16x16) -> i32x8;

    #[link_name = "llvm.x86.avx2.vpdpwsuds.128"]
-    fn vpdpwsuds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
+    fn vpdpwsuds_128(src: i32x4, a: i16x8, b: u16x8) -> i32x4;
    #[link_name = "llvm.x86.avx2.vpdpwsuds.256"]
-    fn vpdpwsuds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
+    fn vpdpwsuds_256(src: i32x8, a: i16x16, b: u16x16) -> i32x8;

    #[link_name = "llvm.x86.avx2.vpdpwusd.128"]
-    fn vpdpwusd_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
+    fn vpdpwusd_128(src: i32x4, a: u16x8, b: i16x8) -> i32x4;
    #[link_name = "llvm.x86.avx2.vpdpwusd.256"]
-    fn vpdpwusd_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
+    fn vpdpwusd_256(src: i32x8, a: u16x16, b: i16x16) -> i32x8;

    #[link_name = "llvm.x86.avx2.vpdpwusds.128"]
-    fn vpdpwusds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
+    fn vpdpwusds_128(src: i32x4, a: u16x8, b: i16x8) -> i32x4;
    #[link_name = "llvm.x86.avx2.vpdpwusds.256"]
-    fn vpdpwusds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
+    fn vpdpwusds_256(src: i32x8, a: u16x16, b: i16x16) -> i32x8;

    #[link_name = "llvm.x86.avx2.vpdpwuud.128"]
-    fn vpdpwuud_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
+    fn vpdpwuud_128(src: i32x4, a: u16x8, b: u16x8) -> i32x4;
    #[link_name = "llvm.x86.avx2.vpdpwuud.256"]
-    fn vpdpwuud_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
+    fn vpdpwuud_256(src: i32x8, a: u16x16, b: u16x16) -> i32x8;

    #[link_name = "llvm.x86.avx2.vpdpwuuds.128"]
-    fn vpdpwuuds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
+    fn vpdpwuuds_128(src: i32x4, a: u16x8, b: u16x8) -> i32x4;
    #[link_name = "llvm.x86.avx2.vpdpwuuds.256"]
-    fn vpdpwuuds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
+    fn vpdpwuuds_256(src: i32x8, a: u16x16, b: u16x16) -> i32x8;
 }

 #[cfg(test)]
@@ -774,3 +774,7 @@ pub(crate) const fn $as_from(self) -> $from {
 mod kl;
 #[stable(feature = "keylocker_x86", since = "1.89.0")]
 pub use self::kl::*;
+
+mod movrs;
+#[unstable(feature = "movrs_target_feature", issue = "137976")]
+pub use self::movrs::*;
@@ -0,0 +1,23 @@
+//! Read-shared move intrinsics
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+unsafe extern "unadjusted" {
+    #[link_name = "llvm.x86.prefetchrs"]
+    fn prefetchrs(p: *const u8);
+}
+
+/// Prefetches the cache line that contains address `p`, with an indication that the source memory
+/// location is likely to become read-shared by multiple processors, i.e., read in the future by at
+/// least one other processor before it is written, assuming it is ever written in the future.
+///
+/// Note: this intrinsic is safe to use even though it takes a raw pointer argument. In general, this
+/// cannot change the behavior of the program, including not trapping on invalid pointers.
+#[inline]
+#[target_feature(enable = "movrs")]
+#[cfg_attr(all(test, not(target_vendor = "apple")), assert_instr(prefetchrst2))]
+#[unstable(feature = "movrs_target_feature", issue = "137976")]
+pub fn _m_prefetchrs(p: *const u8) {
+    unsafe { prefetchrs(p) }
+}
@@ -398,6 +398,22 @@ pub unsafe fn _tile_cvtrowd2ps<const TILE: i32>(row: u32) -> __m512 {
    tcvtrowd2ps(TILE as i8, row).as_m512()
 }

+/// Moves a row from a tile register to a zmm register, converting the packed 32-bit signed integer
+/// elements to packed single-precision (32-bit) floating-point elements.
+#[inline]
+#[rustc_legacy_const_generics(0, 1)]
+#[target_feature(enable = "amx-avx512,avx10.2")]
+#[cfg_attr(
+    all(test, any(target_os = "linux", target_env = "msvc")),
+    assert_instr(tcvtrowd2ps, TILE = 0, ROW = 0)
+)]
+#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
+pub unsafe fn _tile_cvtrowd2psi<const TILE: i32, const ROW: i32>() -> __m512 {
+    static_assert_uimm_bits!(TILE, 3);
+    static_assert_uimm_bits!(ROW, 6);
+    tcvtrowd2psi(TILE as i8, ROW as u32).as_m512()
+}
+
 /// Moves a row from a tile register to a zmm register, converting the packed single-precision (32-bit)
 /// floating-point elements to packed half-precision (16-bit) floating-point elements. The resulting
 /// 16-bit elements are placed in the high 16-bits within each 32-bit element of the returned vector.
@@ -414,6 +430,23 @@ pub unsafe fn _tile_cvtrowps2phh<const TILE: i32>(row: u32) -> __m512h {
    tcvtrowps2phh(TILE as i8, row).as_m512h()
 }

+/// Moves a row from a tile register to a zmm register, converting the packed single-precision (32-bit)
+/// floating-point elements to packed half-precision (16-bit) floating-point elements. The resulting
+/// 16-bit elements are placed in the high 16-bits within each 32-bit element of the returned vector.
+#[inline]
+#[rustc_legacy_const_generics(0, 1)]
+#[target_feature(enable = "amx-avx512,avx10.2")]
+#[cfg_attr(
+    all(test, any(target_os = "linux", target_env = "msvc")),
+    assert_instr(tcvtrowps2phh, TILE = 0, ROW = 0)
+)]
+#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
+pub unsafe fn _tile_cvtrowps2phhi<const TILE: i32, const ROW: i32>() -> __m512h {
+    static_assert_uimm_bits!(TILE, 3);
+    static_assert_uimm_bits!(ROW, 6);
+    tcvtrowps2phhi(TILE as i8, ROW as u32).as_m512h()
+}
+
 /// Moves a row from a tile register to a zmm register, converting the packed single-precision (32-bit)
 /// floating-point elements to packed half-precision (16-bit) floating-point elements. The resulting
 /// 16-bit elements are placed in the low 16-bits within each 32-bit element of the returned vector.
@@ -430,6 +463,23 @@ pub unsafe fn _tile_cvtrowps2phl<const TILE: i32>(row: u32) -> __m512h {
    tcvtrowps2phl(TILE as i8, row).as_m512h()
 }

+/// Moves a row from a tile register to a zmm register, converting the packed single-precision (32-bit)
+/// floating-point elements to packed half-precision (16-bit) floating-point elements. The resulting
+/// 16-bit elements are placed in the low 16-bits within each 32-bit element of the returned vector.
+#[inline]
+#[rustc_legacy_const_generics(0, 1)]
+#[target_feature(enable = "amx-avx512,avx10.2")]
+#[cfg_attr(
+    all(test, any(target_os = "linux", target_env = "msvc")),
+    assert_instr(tcvtrowps2phl, TILE = 0, ROW = 0)
+)]
+#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
+pub unsafe fn _tile_cvtrowps2phli<const TILE: i32, const ROW: i32>() -> __m512h {
+    static_assert_uimm_bits!(TILE, 3);
+    static_assert_uimm_bits!(ROW, 6);
+    tcvtrowps2phli(TILE as i8, ROW as u32).as_m512h()
+}
+
 /// Moves one row of tile data into a zmm vector register
 #[inline]
 #[rustc_legacy_const_generics(0)]
@@ -444,6 +494,21 @@ pub unsafe fn _tile_movrow<const TILE: i32>(row: u32) -> __m512i {
    tilemovrow(TILE as i8, row).as_m512i()
 }

+/// Moves one row of tile data into a zmm vector register
+#[inline]
+#[rustc_legacy_const_generics(0, 1)]
+#[target_feature(enable = "amx-avx512,avx10.2")]
+#[cfg_attr(
+    all(test, any(target_os = "linux", target_env = "msvc")),
+    assert_instr(tilemovrow, TILE = 0, ROW = 0)
+)]
+#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
+pub unsafe fn _tile_movrowi<const TILE: i32, const ROW: i32>() -> __m512i {
+    static_assert_uimm_bits!(TILE, 3);
+    static_assert_uimm_bits!(ROW, 6);
+    tilemovrowi(TILE as i8, ROW as u32).as_m512i()
+}
+
 #[allow(improper_ctypes)]
 unsafe extern "C" {
    #[link_name = "llvm.x86.ldtilecfg"]
@@ -492,12 +557,20 @@ pub unsafe fn _tile_movrow<const TILE: i32>(row: u32) -> __m512i {
    fn tmmultf32ps(dst: i8, a: i8, b: i8);
    #[link_name = "llvm.x86.tcvtrowd2ps"]
    fn tcvtrowd2ps(tile: i8, row: u32) -> f32x16;
+    #[link_name = "llvm.x86.tcvtrowd2psi"]
+    fn tcvtrowd2psi(tile: i8, row: u32) -> f32x16;
    #[link_name = "llvm.x86.tcvtrowps2phh"]
    fn tcvtrowps2phh(tile: i8, row: u32) -> f16x32;
+    #[link_name = "llvm.x86.tcvtrowps2phhi"]
+    fn tcvtrowps2phhi(tile: i8, row: u32) -> f16x32;
    #[link_name = "llvm.x86.tcvtrowps2phl"]
    fn tcvtrowps2phl(tile: i8, row: u32) -> f16x32;
+    #[link_name = "llvm.x86.tcvtrowps2phli"]
+    fn tcvtrowps2phli(tile: i8, row: u32) -> f16x32;
    #[link_name = "llvm.x86.tilemovrow"]
    fn tilemovrow(tile: i8, row: u32) -> i32x16;
+    #[link_name = "llvm.x86.tilemovrowi"]
+    fn tilemovrowi(tile: i8, row: u32) -> i32x16;
 }

 #[cfg(test)]
@@ -1032,6 +1105,50 @@ fn test_tile_movrow() {
        }
    }

+    macro_rules! wrap_imm4 {
+        ($name:ident :: <$TILE:literal>, $row:expr) => {
+            match $row {
+                0 => $name::<$TILE, 0>(),
+                1 => $name::<$TILE, 1>(),
+                2 => $name::<$TILE, 2>(),
+                3 => $name::<$TILE, 3>(),
+                4 => $name::<$TILE, 4>(),
+                5 => $name::<$TILE, 5>(),
+                6 => $name::<$TILE, 6>(),
+                7 => $name::<$TILE, 7>(),
+                8 => $name::<$TILE, 8>(),
+                9 => $name::<$TILE, 9>(),
+                10 => $name::<$TILE, 10>(),
+                11 => $name::<$TILE, 11>(),
+                12 => $name::<$TILE, 12>(),
+                13 => $name::<$TILE, 13>(),
+                14 => $name::<$TILE, 14>(),
+                15 => $name::<$TILE, 15>(),
+                _ => panic!("row index out of range"),
+            }
+        };
+    }
+
+    #[simd_test(enable = "amx-avx512,avx10.2")]
+    fn test_tile_movrowi() {
+        unsafe {
+            _init_amx();
+            let array: [[u8; 64]; 16] = array::from_fn(|i| [i as _; _]);
+
+            let mut config = __tilecfg::default();
+            config.palette = 1;
+            config.colsb[0] = 64;
+            config.rows[0] = 16;
+            _tile_loadconfig(config.as_ptr());
+            _tile_loadd::<0>(array.as_ptr().cast(), 64);
+
+            for i in 0..16 {
+                let row = wrap_imm4!(_tile_movrowi::<0>, i);
+                assert_eq!(*row.as_u8x64().as_array(), [i as _; _]);
+            }
+        }
+    }
+
    #[simd_test(enable = "amx-avx512,avx10.2")]
    fn test_tile_cvtrowd2ps() {
        unsafe {
@@ -1051,6 +1168,26 @@ fn test_tile_cvtrowd2ps() {
        }
    }

+    #[simd_test(enable = "amx-avx512,avx10.2")]
+    fn test_tile_cvtrowd2psi() {
+        unsafe {
+            _init_amx();
+            let array: [[u32; 16]; 16] = array::from_fn(|i| [i as _; _]);
+
+            let mut config = __tilecfg::default();
+            config.palette = 1;
+            config.colsb[0] = 64;
+            config.rows[0] = 16;
+            _tile_loadconfig(config.as_ptr());
+            _tile_loadd::<0>(array.as_ptr().cast(), 64);
+
+            for i in 0..16 {
+                let row = wrap_imm4!(_tile_cvtrowd2psi::<0>, i);
+                assert_eq!(*row.as_f32x16().as_array(), [i as _; _]);
+            }
+        }
+    }
+
    #[simd_test(enable = "amx-avx512,avx10.2")]
    fn test_tile_cvtrowps2phh() {
        unsafe {
@@ -1073,6 +1210,28 @@ fn test_tile_cvtrowps2phh() {
        }
    }

+    #[simd_test(enable = "amx-avx512,avx10.2")]
+    fn test_tile_cvtrowps2phhi() {
+        unsafe {
+            _init_amx();
+            let array: [[f32; 16]; 16] = array::from_fn(|i| [i as _; _]);
+
+            let mut config = __tilecfg::default();
+            config.palette = 1;
+            config.colsb[0] = 64;
+            config.rows[0] = 16;
+            _tile_loadconfig(config.as_ptr());
+            _tile_loadd::<0>(array.as_ptr().cast(), 64);
+            for i in 0..16 {
+                let row = wrap_imm4!(_tile_cvtrowps2phhi::<0>, i);
+                assert_eq!(
+                    *row.as_f16x32().as_array(),
+                    array::from_fn(|j| if j & 1 == 0 { 0.0 } else { i as _ })
+                );
+            }
+        }
+    }
+
    #[simd_test(enable = "amx-avx512,avx10.2")]
    fn test_tile_cvtrowps2phl() {
        unsafe {
@@ -1095,6 +1254,28 @@ fn test_tile_cvtrowps2phl() {
        }
    }

+    #[simd_test(enable = "amx-avx512,avx10.2")]
+    fn test_tile_cvtrowps2phli() {
+        unsafe {
+            _init_amx();
+            let array: [[f32; 16]; 16] = array::from_fn(|i| [i as _; _]);
+
+            let mut config = __tilecfg::default();
+            config.palette = 1;
+            config.colsb[0] = 64;
+            config.rows[0] = 16;
+            _tile_loadconfig(config.as_ptr());
+            _tile_loadd::<0>(array.as_ptr().cast(), 64);
+            for i in 0..16 {
+                let row = wrap_imm4!(_tile_cvtrowps2phli::<0>, i);
+                assert_eq!(
+                    *row.as_f16x32().as_array(),
+                    array::from_fn(|j| if j & 1 == 0 { i as _ } else { 0.0 })
+                );
+            }
+        }
+    }
+
    #[simd_test(enable = "amx-tf32")]
    fn test_tile_mmultf32ps() {
        unsafe {
@@ -81,3 +81,7 @@
 mod amx;
 #[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
 pub use self::amx::*;
+
+mod movrs;
+#[unstable(feature = "movrs_target_feature", issue = "137976")]
+pub use self::movrs::*;
@@ -0,0 +1,94 @@
+//! Read-shared Move instructions
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+unsafe extern "unadjusted" {
+    #[link_name = "llvm.x86.movrsqi"]
+    fn movrsqi(src: *const i8) -> i8;
+    #[link_name = "llvm.x86.movrshi"]
+    fn movrshi(src: *const i16) -> i16;
+    #[link_name = "llvm.x86.movrssi"]
+    fn movrssi(src: *const i32) -> i32;
+    #[link_name = "llvm.x86.movrsdi"]
+    fn movrsdi(src: *const i64) -> i64;
+}
+
+/// Moves a byte from the source to the destination, with an indication that the source memory
+/// location is likely to become read-shared by multiple processors, i.e., read in the future by at
+/// least one other processor before it is written, assuming it is ever written in the future.
+#[inline]
+#[target_feature(enable = "movrs")]
+#[cfg_attr(all(test, not(target_vendor = "apple")), assert_instr(movrs))]
+#[unstable(feature = "movrs_target_feature", issue = "137976")]
+pub unsafe fn _movrs_i8(src: *const i8) -> i8 {
+    movrsqi(src)
+}
+
+/// Moves a 16-bit word from the source to the destination, with an indication that the source memory
+/// location is likely to become read-shared by multiple processors, i.e., read in the future by at
+/// least one other processor before it is written, assuming it is ever written in the future.
+#[inline]
+#[target_feature(enable = "movrs")]
+#[cfg_attr(all(test, not(target_vendor = "apple")), assert_instr(movrs))]
+#[unstable(feature = "movrs_target_feature", issue = "137976")]
+pub unsafe fn _movrs_i16(src: *const i16) -> i16 {
+    movrshi(src)
+}
+
+/// Moves a 32-bit doubleword from the source to the destination, with an indication that the source
+/// memory location is likely to become read-shared by multiple processors, i.e., read in the future
+/// by at least one other processor before it is written, assuming it is ever written in the future.
+#[inline]
+#[target_feature(enable = "movrs")]
+#[cfg_attr(all(test, not(target_vendor = "apple")), assert_instr(movrs))]
+#[unstable(feature = "movrs_target_feature", issue = "137976")]
+pub unsafe fn _movrs_i32(src: *const i32) -> i32 {
+    movrssi(src)
+}
+
+/// Moves a 64-bit quadword from the source to the destination, with an indication that the source
+/// memory location is likely to become read-shared by multiple processors, i.e., read in the future
+/// by at least one other processor before it is written, assuming it is ever written in the future.
+#[inline]
+#[target_feature(enable = "movrs")]
+#[cfg_attr(all(test, not(target_vendor = "apple")), assert_instr(movrs))]
+#[unstable(feature = "movrs_target_feature", issue = "137976")]
+pub unsafe fn _movrs_i64(src: *const i64) -> i64 {
+    movrsdi(src)
+}
+
+#[cfg(test)]
+mod tests {
+    use stdarch_test::simd_test;
+
+    use super::*;
+
+    #[simd_test(enable = "movrs")]
+    fn test_movrs_i8() {
+        let x: i8 = 42;
+        let y = unsafe { _movrs_i8(&x) };
+        assert_eq!(x, y);
+    }
+
+    #[simd_test(enable = "movrs")]
+    fn test_movrs_i16() {
+        let x: i16 = 42;
+        let y = unsafe { _movrs_i16(&x) };
+        assert_eq!(x, y);
+    }
+
+    #[simd_test(enable = "movrs")]
+    fn test_movrs_i32() {
+        let x: i32 = 42;
+        let y = unsafe { _movrs_i32(&x) };
+        assert_eq!(x, y);
+    }
+
+    #[simd_test(enable = "movrs")]
+    fn test_movrs_i64() {
+        let x: i64 = 42;
+        let y = unsafe { _movrs_i64(&x) };
+        assert_eq!(x, y);
+    }
+}
@@ -6625,6 +6625,7 @@ intrinsics:
              arch: aarch64,arm64ec


+
  - name: "vmaxnm{neon_type.no}"
    doc: Floating-point Maximum Number (vector)
    arguments: ["a: {neon_type}", "b: {neon_type}"]
@@ -6636,7 +6637,11 @@ intrinsics:
      - float64x1_t
      - float64x2_t
    compose:
-      - FnCall: [simd_fmax, [a, b]]
+      - LLVMLink:
+          name: "fmaxnm.{neon_type}"
+          links:
+            - link: "llvm.aarch64.neon.fmaxnm.{neon_type}"
+              arch: aarch64,arm64ec


  - name: "vmaxnmh_{type}"
@@ -6652,7 +6657,11 @@ intrinsics:
    types:
      - f16
    compose:
-      - FnCall: ["f16::max", [a, b]]
+      - LLVMLink:
+          name: "vmaxh.{neon_type}"
+          links:
+            - link: "llvm.aarch64.neon.fmaxnm.{type}"
+              arch: aarch64,arm64ec


  - name: "vminnmh_{type}"
@@ -6668,7 +6677,11 @@ intrinsics:
    types:
      - f16
    compose:
-      - FnCall: ["f16::min", [a, b]]
+      - LLVMLink:
+          name: "vminh.{neon_type}"
+          links:
+            - link: "llvm.aarch64.neon.fminnm.{type}"
+              arch: aarch64,arm64ec


  - name: "vmaxnmv{neon_type[0].no}"
@@ -6682,7 +6695,11 @@ intrinsics:
      - [float32x2_t, f32]
      - [float64x2_t, f64]
    compose:
-      - FnCall: [simd_reduce_max, [a]]
+      - LLVMLink:
+          name: "fmaxnmv.{neon_type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.fmaxnmv.{type[1]}.{neon_type[0]}"
+              arch: aarch64,arm64ec

  - name: "vmaxnmv{neon_type[0].no}"
    doc: Floating-point maximum number across vector
@@ -6694,7 +6711,11 @@ intrinsics:
    types:
      - [float32x4_t, f32]
    compose:
-      - FnCall: [simd_reduce_max, [a]]
+      - LLVMLink:
+          name: "fmaxnmv.{neon_type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.fmaxnmv.{type[1]}.{neon_type[0]}"
+              arch: aarch64,arm64ec


  - name: "vmaxnmv{neon_type[0].no}"
@@ -6711,7 +6732,11 @@ intrinsics:
      - [float16x4_t, f16]
      - [float16x8_t, f16]
    compose:
-      - FnCall: [simd_reduce_max, [a]]
+      - LLVMLink:
+          name: "fmaxnmv.{neon_type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.fmaxnmv.{type[1]}.{neon_type[0]}"
+              arch: aarch64,arm64ec


  - name: "vminnmv{neon_type[0].no}"
@@ -6728,7 +6753,11 @@ intrinsics:
      - [float16x4_t, f16]
      - [float16x8_t, f16]
    compose:
-      - FnCall: [simd_reduce_min, [a]]
+      - LLVMLink:
+          name: "fminnmv.{neon_type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.fminnmv.{type[1]}.{neon_type[0]}"
+              arch: aarch64,arm64ec


  - name: "vmaxv{neon_type[0].no}"
@@ -6837,7 +6866,11 @@ intrinsics:
      - float64x1_t
      - float64x2_t
    compose:
-      - FnCall: [simd_fmin, [a, b]]
+      - LLVMLink:
+          name: "fminnm.{neon_type}"
+          links:
+            - link: "llvm.aarch64.neon.fminnm.{neon_type}"
+              arch: aarch64,arm64ec

  - name: "vminnmv{neon_type[0].no}"
    doc: "Floating-point minimum number across vector"
@@ -6851,7 +6884,11 @@ intrinsics:
      - [float32x2_t, "f32"]
      - [float64x2_t, "f64"]
    compose:
-      - FnCall: [simd_reduce_min, [a]]
+      - LLVMLink:
+          name: "vminnmv.{neon_type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.fminnmv.{type[1]}.{neon_type[0]}"
+              arch: aarch64,arm64ec

  - name: "vminnmv{neon_type[0].no}"
    doc: "Floating-point minimum number across vector"
@@ -6864,7 +6901,11 @@ intrinsics:
    types:
      - [float32x4_t, "f32"]
    compose:
-      - FnCall: [simd_reduce_min, [a]]
+      - LLVMLink:
+          name: "vminnmv.{neon_type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.fminnmv.{type[1]}.{neon_type[0]}"
+              arch: aarch64,arm64ec

  - name: "vmovl_high{neon_type[0].noq}"
    doc: Vector move
@@ -7324,7 +7324,13 @@ intrinsics:
      - float32x2_t
      - float32x4_t
    compose:
-      - FnCall: [simd_fmax, [a, b]]
+      - LLVMLink:
+          name: "fmaxnm.{neon_type}"
+          links:
+            - link: "llvm.arm.neon.vmaxnm.{neon_type}"
+              arch: arm
+            - link: "llvm.aarch64.neon.fmaxnm.{neon_type}"
+              arch: aarch64,arm64ec


  - name: "vmaxnm{neon_type.no}"
@@ -7344,7 +7350,13 @@ intrinsics:
      - float16x4_t
      - float16x8_t
    compose:
-      - FnCall: [simd_fmax, [a, b]]
+      - LLVMLink:
+          name: "fmaxnm.{neon_type}"
+          links:
+            - link: "llvm.arm.neon.vmaxnm.{neon_type}"
+              arch: arm
+            - link: "llvm.aarch64.neon.fmaxnm.{neon_type}"
+              arch: aarch64,arm64ec


  - name: "vminnm{neon_type.no}"
@@ -7364,7 +7376,13 @@ intrinsics:
      - float16x4_t
      - float16x8_t
    compose:
-      - FnCall: [simd_fmin, [a, b]]
+      - LLVMLink:
+          name: "fminnm.{neon_type}"
+          links:
+            - link: "llvm.arm.neon.vminnm.{neon_type}"
+              arch: arm
+            - link: "llvm.aarch64.neon.fminnm.{neon_type}"
+              arch: aarch64,arm64ec


  - name: "vmin{neon_type.no}"
@@ -7477,7 +7495,13 @@ intrinsics:
      - float32x2_t
      - float32x4_t
    compose:
-      - FnCall: [simd_fmin, [a, b]]
+      - LLVMLink:
+          name: "fminnm.{neon_type}"
+          links:
+            - link: "llvm.arm.neon.vminnm.{neon_type}"
+              arch: arm
+            - link: "llvm.aarch64.neon.fminnm.{neon_type}"
+              arch: aarch64,arm64ec

  - name: "vpadd{neon_type.no}"
    doc: Floating-point add pairwise
@@ -445,6 +445,7 @@ fn verify_all_signatures() {
                    && !rust.file.ends_with("v7.rs\"")
                    && !rust.file.ends_with("v8.rs\"")
                    && !rust.file.ends_with("mte.rs\"")
+                    && !rust.file.ends_with("rand.rs\"")
                    && !rust.file.ends_with("ex.rs\"")
                    && !skip_intrinsic_verify.contains(&rust.name)
                {
@@ -211,6 +211,7 @@ fn verify_all_signatures() {
                "_rdseed64_step",
                // Prefetch
                "_mm_prefetch",
+                "_m_prefetchrs",
                // CMPXCHG
                "cmpxchg16b",
                // Undefined
@@ -305,7 +306,7 @@ fn verify_all_signatures() {
            }

            // FIXME: these have not been added to Intrinsics Guide yet
-            if ["amx-avx512", "amx-fp8", "amx-movrs", "amx-tf32"]
+            if ["amx-avx512", "amx-fp8", "amx-movrs", "amx-tf32", "movrs"]
                .iter()
                .any(|f| feature.contains(f))
            {
@@ -13,7 +13,6 @@
 //! and you should see `746573740a` get printed out.

 #![allow(internal_features)]
-#![feature(wasm_target_feature)]
 #![cfg_attr(test, feature(test))]
 #![cfg_attr(
    any(target_arch = "x86", target_arch = "x86_64"),
@@ -1 +1 @@
-139651428df86cf88443295542c12ea617cbb587
+eda4fc7733ee89e484d7120cafbd80dcb2fce66e
@@ -152,16 +152,14 @@ ENV CFLAGS_armv5te_unknown_linux_musleabi="-march=armv5te -marm -mfloat-abi=soft
    CC_riscv64gc_unknown_none_elf=riscv64-unknown-elf-gcc \
    CFLAGS_riscv64gc_unknown_none_elf=-march=rv64gc -mabi=lp64

-ENV RUST_CONFIGURE_ARGS \
-      --musl-root-armv5te=/musl-armv5te \
+ENV RUST_CONFIGURE_ARGS="--musl-root-armv5te=/musl-armv5te \
      --musl-root-arm=/musl-arm \
      --musl-root-armhf=/musl-armhf \
      --musl-root-armv7hf=/musl-armv7hf \
-      --disable-docs
+      --disable-docs"

-ENV SCRIPT \
-      python3 ../x.py --stage 2 test --host='' --target $RUN_MAKE_TARGETS tests/run-make tests/run-make-cargo && \
-      python3 ../x.py dist --host='' --target $TARGETS
+ENV SCRIPT="python3 ../x.py --stage 2 test --host= --target $RUN_MAKE_TARGETS tests/run-make tests/run-make-cargo && \
+      python3 ../x.py dist --host= --target $TARGETS"

 # sccache
 COPY scripts/sccache.sh /scripts/
@@ -95,16 +95,14 @@ RUN /tmp/freebsd-toolchain.sh i686
 COPY scripts/sccache.sh /scripts/
 RUN sh /scripts/sccache.sh

-ENV CARGO_TARGET_X86_64_UNKNOWN_FUCHSIA_AR /usr/local/bin/llvm-ar
-ENV CARGO_TARGET_X86_64_UNKNOWN_FUCHSIA_RUSTFLAGS \
-C link-arg=--sysroot=/usr/local/core-linux-amd64-fuchsia-sdk/arch/x64/sysroot \
+ENV CARGO_TARGET_X86_64_UNKNOWN_FUCHSIA_AR="/usr/local/bin/llvm-ar"
+ENV CARGO_TARGET_X86_64_UNKNOWN_FUCHSIA_RUSTFLAGS="-C link-arg=--sysroot=/usr/local/core-linux-amd64-fuchsia-sdk/arch/x64/sysroot \
 -Lnative=/usr/local/core-linux-amd64-fuchsia-sdk/arch/x64/sysroot/lib \
-Lnative=/usr/local/core-linux-amd64-fuchsia-sdk/arch/x64/lib
-ENV CARGO_TARGET_AARCH64_UNKNOWN_FUCHSIA_AR /usr/local/bin/llvm-ar
-ENV CARGO_TARGET_AARCH64_UNKNOWN_FUCHSIA_RUSTFLAGS \
-C link-arg=--sysroot=/usr/local/core-linux-amd64-fuchsia-sdk/arch/arm64/sysroot \
+-Lnative=/usr/local/core-linux-amd64-fuchsia-sdk/arch/x64/lib"
+ENV CARGO_TARGET_AARCH64_UNKNOWN_FUCHSIA_AR="/usr/local/bin/llvm-ar"
+ENV CARGO_TARGET_AARCH64_UNKNOWN_FUCHSIA_RUSTFLAGS="-C link-arg=--sysroot=/usr/local/core-linux-amd64-fuchsia-sdk/arch/arm64/sysroot \
 -Lnative=/usr/local/core-linux-amd64-fuchsia-sdk/arch/arm64/sysroot/lib \
-Lnative=/usr/local/core-linux-amd64-fuchsia-sdk/arch/arm64/lib
+-Lnative=/usr/local/core-linux-amd64-fuchsia-sdk/arch/arm64/lib"

 ENV TARGETS=x86_64-unknown-fuchsia
 ENV TARGETS=$TARGETS,aarch64-unknown-fuchsia
@@ -136,8 +134,8 @@ RUN ln -s /usr/include/x86_64-linux-gnu/asm /usr/local/include/asm
 # musl-gcc can't find libgcc_s.so.1 since it doesn't use the standard search paths.
 RUN ln -s /usr/riscv64-linux-gnu/lib/libgcc_s.so.1 /usr/lib/gcc-cross/riscv64-linux-gnu/11/

-ENV RUST_CONFIGURE_ARGS --enable-extended --enable-lld --enable-llvm-bitcode-linker --disable-docs \
+ENV RUST_CONFIGURE_ARGS="--enable-extended --enable-lld --enable-llvm-bitcode-linker --disable-docs \
  --musl-root-armv7=/musl-armv7 \
-  --musl-root-riscv64gc=/musl-riscv64gc
+  --musl-root-riscv64gc=/musl-riscv64gc"

-ENV SCRIPT python3 ../x.py dist --host='' --target $TARGETS && python3 ../x.py dist --host='' --set build.sanitizers=true --target $TARGETS_SANITIZERS
+ENV SCRIPT="python3 ../x.py dist --host= --target $TARGETS && python3 ../x.py dist --host= --set build.sanitizers=true --target $TARGETS_SANITIZERS"
@@ -45,9 +45,8 @@ RUN curl -L https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-3
  tar -xz
 ENV WASI_SDK_PATH=/wasi-sdk-30.0-x86_64-linux

-ENV RUST_CONFIGURE_ARGS \
-  --musl-root-x86_64=/usr/local/x86_64-linux-musl \
-  --set rust.lld
+ENV RUST_CONFIGURE_ARGS="--musl-root-x86_64=/usr/local/x86_64-linux-musl \
+  --set rust.lld"

 # Some run-make tests have assertions about code size, and enabling debug
 # assertions in libstd causes the binary to be much bigger than it would
@@ -58,10 +57,10 @@ ENV NO_OVERFLOW_CHECKS=1

 RUN curl -L https://github.com/bytecodealliance/wasmtime/releases/download/v38.0.4/wasmtime-v38.0.4-x86_64-linux.tar.xz | \
  tar -xJ
-ENV PATH "$PATH:/wasmtime-v38.0.4-x86_64-linux"
+ENV PATH="$PATH:/wasmtime-v38.0.4-x86_64-linux"

 ENV WASM_WASIP_TARGET=wasm32-wasip1
-ENV WASM_WASIP_SCRIPT python3 /checkout/x.py --stage 2 test --host='' --target $WASM_WASIP_TARGET \
+ENV WASM_WASIP_SCRIPT="python3 /checkout/x.py --stage 2 test --host= --target $WASM_WASIP_TARGET \
  tests/run-make \
  tests/run-make-cargo \
  tests/ui \
@@ -69,18 +68,18 @@ ENV WASM_WASIP_SCRIPT python3 /checkout/x.py --stage 2 test --host='' --target $
  tests/codegen-units \
  tests/codegen-llvm \
  tests/assembly-llvm \
-  library/core
+  library/core"

 ENV NVPTX_TARGETS=nvptx64-nvidia-cuda
-ENV NVPTX_SCRIPT python3 /checkout/x.py --stage 2 test --host='' --target $NVPTX_TARGETS \
+ENV NVPTX_SCRIPT="python3 /checkout/x.py --stage 2 test --host= --target $NVPTX_TARGETS \
  tests/run-make \
  tests/run-make-cargo \
-  tests/assembly-llvm
+  tests/assembly-llvm"

 ENV MUSL_TARGETS=x86_64-unknown-linux-musl \
    CC_x86_64_unknown_linux_musl=x86_64-linux-musl-gcc \
    CXX_x86_64_unknown_linux_musl=x86_64-linux-musl-g++
-ENV MUSL_SCRIPT python3 /checkout/x.py --stage 2 test --host='' --target $MUSL_TARGETS
+ENV MUSL_SCRIPT="python3 /checkout/x.py --stage 2 test --host= --target $MUSL_TARGETS"

 ENV UEFI_TARGETS=aarch64-unknown-uefi,i686-unknown-uefi,x86_64-unknown-uefi \
    CC_aarch64_unknown_uefi=clang-11 \
@@ -89,9 +88,9 @@ ENV UEFI_TARGETS=aarch64-unknown-uefi,i686-unknown-uefi,x86_64-unknown-uefi \
    CXX_i686_unknown_uefi=clang++-11 \
    CC_x86_64_unknown_uefi=clang-11 \
    CXX_x86_64_unknown_uefi=clang++-11
-ENV UEFI_SCRIPT python3 /checkout/x.py --stage 2 build --host='' --target $UEFI_TARGETS && \
+ENV UEFI_SCRIPT="python3 /checkout/x.py --stage 2 build --host= --target $UEFI_TARGETS && \
  python3 /checkout/x.py --stage 2 test tests/run-make-cargo/uefi-qemu/rmake.rs --target aarch64-unknown-uefi && \
  python3 /checkout/x.py --stage 2 test tests/run-make-cargo/uefi-qemu/rmake.rs --target i686-unknown-uefi && \
-  python3 /checkout/x.py --stage 2 test tests/run-make-cargo/uefi-qemu/rmake.rs --target x86_64-unknown-uefi
+  python3 /checkout/x.py --stage 2 test tests/run-make-cargo/uefi-qemu/rmake.rs --target x86_64-unknown-uefi"

-ENV SCRIPT $WASM_WASIP_SCRIPT && $NVPTX_SCRIPT && $MUSL_SCRIPT && $UEFI_SCRIPT
+ENV SCRIPT="$WASM_WASIP_SCRIPT && $NVPTX_SCRIPT && $MUSL_SCRIPT && $UEFI_SCRIPT"
@@ -3,6 +3,12 @@

 > **NOTE**: This chapter largely talks about early/late bound as being solely relevant when discussing function item types/function definitions. This is potentially not completely true, async blocks and closures should likely be discussed somewhat in this chapter.

+See also these blog posts from when the distinction between early and late bound parameters was
+introduced: [Intermingled parameter lists] and [Intermingled parameter lists, take 2].
+
+[Intermingled parameter lists]: https://smallcultfollowing.com/babysteps/blog/2013/10/29/intermingled-parameter-lists/
+[Intermingled parameter lists, take 2]: https://smallcultfollowing.com/babysteps/blog/2013/11/04/intermingled-parameter-lists/
+
 ## What does it mean to be "early" bound or "late" bound

 Every function definition has a corresponding ZST that implements the `Fn*` traits known as a [function item type][function_item_type]. This part of the chapter will talk a little bit about the "desugaring" of function item types as it is useful context for explaining the difference between early bound and late bound generic parameters.
@@ -836,3 +836,20 @@ In CI, compare modes are only used in one Linux builder, and only with the follo
 Note that compare modes are separate to [revisions](#revisions).
 All revisions are tested when running `./x test tests/ui`, however compare-modes must be
 manually run individually via the `--compare-mode` flag.
+
+## Parallel frontend
+
+Compiletest can be run with the `--parallel-frontend-threads` flag to run the compiler in parallel mode.
+This can be used to check that the compiler produces the same output in parallel mode as in non-parallel mode, and to check for any issues that might arise in parallel mode.
+
+To run the tests in parallel mode, you need to pass the `--parallel-frontend-threads` CLI flag:
+
+```bash
+./x test tests/ui -- --parallel-frontend-threads=N --iteration-count=M
+```
+
+Where `N` is the number of threads to use for the parallel frontend, and `M` is the number of times to run each test in parallel mode (to increase the chances of catching any non-determinism).
+
+Also, when running with `--parallel-frontend-threads`, the `compare-output-by-lines` directive would be implied for all tests, since the output from the parallel frontend can be non-deterministic in terms of the order of lines.
+
+The parallel frontend is available in UI tests only at the moment, and is not currently supported in other test suites.
@@ -148,6 +148,7 @@ Some examples of `X` in `ignore-X` or `only-X`:
 - When [remote testing] is used: `remote`
 - When particular debuggers are being tested: `cdb`, `gdb`, `lldb`
 - When particular debugger versions are matched: `ignore-gdb-version`
+- When the [parallel frontend] is enabled: `ignore-parallel-frontend`
 - Specific [compare modes]: `compare-mode-polonius`, `compare-mode-chalk`,
  `compare-mode-split-dwarf`, `compare-mode-split-dwarf-single`
 - The two different test modes used by coverage tests:
@@ -233,6 +234,7 @@ The following directives will check LLVM support:
 See also [Debuginfo tests](compiletest.md#debuginfo-tests) for directives for ignoring debuggers.

 [remote testing]: running.md#running-tests-on-a-remote-machine
+[parallel frontend]: compiletest.md#parallel-frontend
 [compare modes]: ui.md#compare-modes
 [`x86_64-gnu-debug`]: https://github.com/rust-lang/rust/blob/ab3dba92db355b8d97db915a2dca161a117e959c/src/ci/docker/host-x86_64/x86_64-gnu-debug/Dockerfile#L32
 [`aarch64-gnu-debug`]: https://github.com/rust-lang/rust/blob/20c909ff9cdd88d33768a4ddb8952927a675b0ad/src/ci/docker/host-aarch64/aarch64-gnu-debug/Dockerfile#L32
@@ -2430,7 +2430,7 @@ mod size_asserts {
    static_assert_size!(GenericParamDef, 40);
    static_assert_size!(Generics, 16);
    static_assert_size!(Item, 8);
-    static_assert_size!(ItemInner, 144);
+    static_assert_size!(ItemInner, 136);
    static_assert_size!(ItemKind, 48);
    static_assert_size!(PathSegment, 32);
    static_assert_size!(Type, 32);
@@ -721,9 +721,17 @@ pub struct Config {
    ///
    /// This is forwarded from bootstrap's `jobs` configuration.
    pub jobs: u32,
+
+    /// Number of parallel threads to use for the frontend when building test artifacts.
+    pub parallel_frontend_threads: u32,
+    /// Number of times to execute each test.
+    pub iteration_count: u32,
 }

 impl Config {
+    pub const DEFAULT_PARALLEL_FRONTEND_THREADS: u32 = 1;
+    pub const DEFAULT_ITERATION_COUNT: u32 = 1;
+
    /// FIXME: this run scheme is... confusing.
    pub fn run_enabled(&self) -> bool {
        self.run.unwrap_or_else(|| {
@@ -834,6 +842,17 @@ pub fn has_subprocess_support(&self) -> bool {
            || self.target_cfg().os == "emscripten";
        !unsupported_target
    }
+
+    /// Whether the parallel frontend is enabled,
+    /// which is the case when `parallel_frontend_threads` is not set to `1`.
+    ///
+    /// - `0` means auto-detect: use the number of available hardware threads on the host.
+    ///   But we treat it as the parallel frontend being enabled in this case.
+    /// - `1` means single-threaded (parallel frontend disabled).
+    /// - `>1` means an explicitly configured thread count.
+    pub fn parallel_frontend_enabled(&self) -> bool {
+        self.parallel_frontend_threads != 1
+    }
 }

 /// Known widths of `target_has_atomic`.
@@ -67,7 +67,7 @@ pub(crate) fn from_file_directives(
        let mut props = EarlyProps::default();

        iter_directives(
-            config.mode,
+            config,
            file_directives,
            // (dummy comment to force args into vertical layout)
            &mut |ln: &DirectiveLine<'_>| {
@@ -362,7 +362,7 @@ fn load_from(&mut self, testfile: &Utf8Path, test_revision: Option<&str>, config
            let file_directives = FileDirectives::from_file_contents(testfile, &file_contents);

            iter_directives(
-                config.mode,
+                config,
                &file_directives,
                // (dummy comment to force args into vertical layout)
                &mut |ln: &DirectiveLine<'_>| {
@@ -574,43 +574,51 @@ fn check_directive<'a>(
 }

 fn iter_directives(
-    mode: TestMode,
+    config: &Config,
    file_directives: &FileDirectives<'_>,
    it: &mut dyn FnMut(&DirectiveLine<'_>),
 ) {
    let testfile = file_directives.path;

-    // Coverage tests in coverage-run mode always have these extra directives, without needing to
-    // specify them manually in every test file.
-    //
-    // FIXME(jieyouxu): I feel like there's a better way to do this, leaving for later.
-    if mode == TestMode::CoverageRun {
-        let extra_directives: &[&str] = &[
-            "//@ needs-profiler-runtime",
-            // FIXME(pietroalbini): this test currently does not work on cross-compiled targets
-            // because remote-test is not capable of sending back the *.profraw files generated by
-            // the LLVM instrumentation.
-            "//@ ignore-cross-compile",
-        ];
-        // Process the extra implied directives, with a dummy line number of 0.
-        for directive_str in extra_directives {
-            let directive_line = line_directive(testfile, LineNumber::ZERO, directive_str)
-                .unwrap_or_else(|| panic!("bad extra-directive line: {directive_str:?}"));
-            it(&directive_line);
+    let extra_directives = match config.mode {
+        TestMode::CoverageRun => {
+            // Coverage tests in coverage-run mode always have these extra directives, without needing to
+            // specify them manually in every test file.
+            //
+            // FIXME(jieyouxu): I feel like there's a better way to do this, leaving for later.
+            vec![
+                "//@ needs-profiler-runtime",
+                // FIXME(pietroalbini): this test currently does not work on cross-compiled targets
+                // because remote-test is not capable of sending back the *.profraw files generated by
+                // the LLVM instrumentation.
+                "//@ ignore-cross-compile",
+            ]
+        }
+        TestMode::Codegen if !file_directives.has_explicit_no_std_core_attribute => {
+            // Note: affects all codegen test suites under test mode `codegen`, e.g. `codegen-llvm`.
+            //
+            // Codegen tests automatically receive implied `//@ needs-target-std`, unless
+            // `#![no_std]`/`#![no_core]` attribute was explicitly seen. The rationale is basically to avoid
+            // having to manually maintain a bunch of `//@ needs-target-std` directives esp. for targets
+            // tested/built out-of-tree.
+            vec!["//@ needs-target-std"]
+        }
+        TestMode::Ui if config.parallel_frontend_enabled() => {
+            // UI tests in parallel-frontend mode always have this extra directive, without needing to
+            // specify it manually in every test file.
+            vec!["//@ compare-output-by-lines"]
        }
-    }

-    // Note: affects all codegen test suites under test mode `codegen`, e.g. `codegen-llvm`.
-    //
-    // Codegen tests automatically receive implied `//@ needs-target-std`, unless
-    // `#![no_std]`/`#![no_core]` attribute was explicitly seen. The rationale is basically to avoid
-    // having to manually maintain a bunch of `//@ needs-target-std` directives esp. for targets
-    // tested/built out-of-tree.
-    if mode == TestMode::Codegen && !file_directives.has_explicit_no_std_core_attribute {
-        let implied_needs_target_std_line =
-            line_directive(testfile, LineNumber::ZERO, "//@ needs-target-std")
-                .expect("valid `needs-target-std` directive line");
-        it(&implied_needs_target_std_line);
+        _ => {
+            // No extra directives for other test modes.
+            vec![]
+        }
+    };
+
+    for directive_str in extra_directives {
+        let directive_line = line_directive(testfile, LineNumber::ZERO, directive_str)
+            .unwrap_or_else(|| panic!("bad extra-directive line: {directive_str:?}"));
+        it(&directive_line);
    }

    for directive_line in &file_directives.lines {
@@ -951,55 +959,52 @@ pub(crate) fn make_test_description(
    let mut should_fail = false;

    // Scan through the test file to handle `ignore-*`, `only-*`, and `needs-*` directives.
-    iter_directives(
-        config.mode,
-        file_directives,
-        &mut |ln @ &DirectiveLine { line_number, .. }| {
-            if !ln.applies_to_test_revision(test_revision) {
-                return;
-            }
+    iter_directives(config, file_directives, &mut |ln @ &DirectiveLine { line_number, .. }| {
+        if !ln.applies_to_test_revision(test_revision) {
+            return;
+        }

-            // Parse `aux-*` directives, for use by up-to-date checks.
-            parse_and_update_aux(config, ln, aux_props);
+        // Parse `aux-*` directives, for use by up-to-date checks.
+        parse_and_update_aux(config, ln, aux_props);

-            macro_rules! decision {
-                ($e:expr) => {
-                    match $e {
-                        IgnoreDecision::Ignore { reason } => {
-                            ignore = true;
-                            ignore_message = Some(reason.into());
-                        }
-                        IgnoreDecision::Error { message } => {
-                            error!("{path}:{line_number}: {message}");
-                            *poisoned = true;
-                            return;
-                        }
-                        IgnoreDecision::Continue => {}
+        macro_rules! decision {
+            ($e:expr) => {
+                match $e {
+                    IgnoreDecision::Ignore { reason } => {
+                        ignore = true;
+                        ignore_message = Some(reason.into());
                    }
-                };
-            }
+                    IgnoreDecision::Error { message } => {
+                        error!("{path}:{line_number}: {message}");
+                        *poisoned = true;
+                        return;
+                    }
+                    IgnoreDecision::Continue => {}
+                }
+            };
+        }

-            decision!(cfg::handle_ignore(&cache.cfg_conditions, ln));
-            decision!(cfg::handle_only(&cache.cfg_conditions, ln));
-            decision!(needs::handle_needs(&cache.needs, config, ln));
-            decision!(ignore_llvm(config, ln));
-            decision!(ignore_backends(config, ln));
-            decision!(needs_backends(config, ln));
-            decision!(ignore_cdb(config, ln));
-            decision!(ignore_gdb(config, ln));
-            decision!(ignore_lldb(config, ln));
+        decision!(cfg::handle_ignore(&cache.cfg_conditions, ln));
+        decision!(cfg::handle_only(&cache.cfg_conditions, ln));
+        decision!(needs::handle_needs(&cache.needs, config, ln));
+        decision!(ignore_llvm(config, ln));
+        decision!(ignore_backends(config, ln));
+        decision!(needs_backends(config, ln));
+        decision!(ignore_cdb(config, ln));
+        decision!(ignore_gdb(config, ln));
+        decision!(ignore_lldb(config, ln));
+        decision!(ignore_parallel_frontend(config, ln));

-            if config.target == "wasm32-unknown-unknown"
-                && config.parse_name_directive(ln, directives::CHECK_RUN_RESULTS)
-            {
-                decision!(IgnoreDecision::Ignore {
-                    reason: "ignored on WASM as the run results cannot be checked there".into(),
-                });
-            }
+        if config.target == "wasm32-unknown-unknown"
+            && config.parse_name_directive(ln, directives::CHECK_RUN_RESULTS)
+        {
+            decision!(IgnoreDecision::Ignore {
+                reason: "ignored on WASM as the run results cannot be checked there".into(),
+            });
+        }

-            should_fail |= config.parse_name_directive(ln, "should-fail");
-        },
-    );
+        should_fail |= config.parse_name_directive(ln, "should-fail");
+    });

    // The `should-fail` annotation doesn't apply to pretty tests,
    // since we run the pretty printer across all tests by default.
@@ -1270,6 +1275,17 @@ fn ignore_llvm(config: &Config, line: &DirectiveLine<'_>) -> IgnoreDecision {
    IgnoreDecision::Continue
 }

+fn ignore_parallel_frontend(config: &Config, line: &DirectiveLine<'_>) -> IgnoreDecision {
+    if config.parallel_frontend_enabled()
+        && config.parse_name_directive(line, "ignore-parallel-frontend")
+    {
+        return IgnoreDecision::Ignore {
+            reason: "ignored when the parallel frontend is enabled".into(),
+        };
+    }
+    IgnoreDecision::Continue
+}
+
 enum IgnoreDecision {
    Ignore { reason: String },
    Continue,
@@ -11,6 +11,7 @@
    "ignore-backends",
    "ignore-gdb-version",
    "ignore-llvm-version",
+    "ignore-parallel-frontend",
    "ignore-pass",
    // tidy-alphabetical-end
 ];
@@ -101,6 +101,7 @@
    "ignore-nvptx64",
    "ignore-nvptx64-nvidia-cuda",
    "ignore-openbsd",
+    "ignore-parallel-frontend",
    "ignore-pass",
    "ignore-powerpc",
    "ignore-powerpc64",
@@ -176,6 +177,7 @@
    "needs-sanitizer-cfi",
    "needs-sanitizer-dataflow",
    "needs-sanitizer-hwaddress",
+    "needs-sanitizer-kasan",
    "needs-sanitizer-kcfi",
    "needs-sanitizer-leak",
    "needs-sanitizer-memory",
@@ -219,7 +219,14 @@ fn parse_config(args: Vec<String>) -> Config {
            "CODEGEN BACKEND [NAME | PATH]",
        )
        .optflag("", "bypass-ignore-backends", "ignore `//@ ignore-backends` directives")
-        .reqopt("", "jobs", "number of parallel jobs bootstrap was configured with", "JOBS");
+        .reqopt("", "jobs", "number of parallel jobs bootstrap was configured with", "JOBS")
+        .optopt(
+            "",
+            "parallel-frontend-threads",
+            "number of parallel threads to use for the frontend when building test artifacts",
+            "THREADS_COUNT",
+        )
+        .optopt("", "iteration-count", "number of times to execute each test", "COUNT");

    let (argv0, args_) = args.split_first().unwrap();
    if args.len() == 1 || args[1] == "-h" || args[1] == "--help" {
@@ -369,6 +376,20 @@ fn opt_path(m: &getopts::Matches, nm: &str) -> Utf8PathBuf {
        None => panic!("`--jobs` is required"),
    };

+    let parallel_frontend_threads = match matches.opt_str("parallel-frontend-threads") {
+        Some(threads) => {
+            threads.parse::<u32>().expect("expected `--parallel-frontend-threads` to be an `u32`")
+        }
+        None => Config::DEFAULT_PARALLEL_FRONTEND_THREADS,
+    };
+    let iteration_count = match matches.opt_str("iteration-count") {
+        Some(count) => {
+            count.parse::<u32>().expect("expected `--iteration-count` to be a positive integer")
+        }
+        None => Config::DEFAULT_ITERATION_COUNT,
+    };
+    assert!(iteration_count > 0, "`--iteration-count` must be a positive integer");
+
    Config {
        bless: matches.opt_present("bless"),
        fail_fast: matches.opt_present("fail-fast")
@@ -489,6 +510,9 @@ fn opt_path(m: &getopts::Matches, nm: &str) -> Utf8PathBuf {
        bypass_ignore_backends: matches.opt_present("bypass-ignore-backends"),

        jobs,
+
+        parallel_frontend_threads,
+        iteration_count,
    }
 }

@@ -272,22 +272,26 @@ fn run_revision(&self) {
        {
            self.fatal("cannot use should-ice in a test that is not cfail");
        }
-        match self.config.mode {
-            TestMode::Pretty => self.run_pretty_test(),
-            TestMode::DebugInfo => self.run_debuginfo_test(),
-            TestMode::Codegen => self.run_codegen_test(),
-            TestMode::RustdocHtml => self.run_rustdoc_html_test(),
-            TestMode::RustdocJson => self.run_rustdoc_json_test(),
-            TestMode::CodegenUnits => self.run_codegen_units_test(),
-            TestMode::Incremental => self.run_incremental_test(),
-            TestMode::RunMake => self.run_rmake_test(),
-            TestMode::Ui => self.run_ui_test(),
-            TestMode::MirOpt => self.run_mir_opt_test(),
-            TestMode::Assembly => self.run_assembly_test(),
-            TestMode::RustdocJs => self.run_rustdoc_js_test(),
-            TestMode::CoverageMap => self.run_coverage_map_test(), // see self::coverage
-            TestMode::CoverageRun => self.run_coverage_run_test(), // see self::coverage
-            TestMode::Crashes => self.run_crash_test(),
+        // Run the test multiple times if requested.
+        // This is useful for catching flaky tests under the parallel frontend.
+        for _ in 0..self.config.iteration_count {
+            match self.config.mode {
+                TestMode::Pretty => self.run_pretty_test(),
+                TestMode::DebugInfo => self.run_debuginfo_test(),
+                TestMode::Codegen => self.run_codegen_test(),
+                TestMode::RustdocHtml => self.run_rustdoc_html_test(),
+                TestMode::RustdocJson => self.run_rustdoc_json_test(),
+                TestMode::CodegenUnits => self.run_codegen_units_test(),
+                TestMode::Incremental => self.run_incremental_test(),
+                TestMode::RunMake => self.run_rmake_test(),
+                TestMode::Ui => self.run_ui_test(),
+                TestMode::MirOpt => self.run_mir_opt_test(),
+                TestMode::Assembly => self.run_assembly_test(),
+                TestMode::RustdocJs => self.run_rustdoc_js_test(),
+                TestMode::CoverageMap => self.run_coverage_map_test(), // see self::coverage
+                TestMode::CoverageRun => self.run_coverage_run_test(), // see self::coverage
+                TestMode::Crashes => self.run_crash_test(),
+            }
        }
    }

@@ -1752,6 +1756,14 @@ fn make_compile_args(
                compiler.arg("-Zwrite-long-types-to-disk=no");
                // FIXME: use this for other modes too, for perf?
                compiler.arg("-Cstrip=debuginfo");
+
+                if self.config.parallel_frontend_enabled() {
+                    // Currently, we only use multiple threads for the UI test suite,
+                    // because UI tests can effectively verify the parallel frontend and
+                    // require minimal modification. The option will later be extended to
+                    // other test suites.
+                    compiler.arg(&format!("-Zthreads={}", self.config.parallel_frontend_threads));
+                }
            }
            TestMode::MirOpt => {
                // We check passes under test to minimize the mir-opt test dump
@@ -140,5 +140,7 @@ fn incomplete_config_for_rustdoc_gui_test() -> Config {
        override_codegen_backend: None,
        bypass_ignore_backends: Default::default(),
        jobs: Default::default(),
+        parallel_frontend_threads: Config::DEFAULT_PARALLEL_FRONTEND_THREADS,
+        iteration_count: Config::DEFAULT_ITERATION_COUNT,
    }
 }
@@ -188,23 +188,26 @@ fn vpdpbusd<'tcx>(
    let (b, b_len) = ecx.project_to_simd(b)?;
    let (dest, dest_len) = ecx.project_to_simd(dest)?;

-    // fn vpdpbusd(src: i32x16, a: i32x16, b: i32x16) -> i32x16;
-    // fn vpdpbusd256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
-    // fn vpdpbusd128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
+    // fn vpdpbusd(src: i32x16, a: u8x64, b: i8x64) -> i32x16;
+    // fn vpdpbusd256(src: i32x8, a: u8x32, b: i8x32) -> i32x8;
+    // fn vpdpbusd128(src: i32x4, a: u8x16, b: i8x16) -> i32x4;
    assert_eq!(dest_len, src_len);
-    assert_eq!(dest_len, a_len);
-    assert_eq!(dest_len, b_len);
+    assert_eq!(dest_len * 4, a_len);
+    assert_eq!(a_len, b_len);

    for i in 0..dest_len {
        let src = ecx.read_scalar(&ecx.project_index(&src, i)?)?.to_i32()?;
-        let a = ecx.read_scalar(&ecx.project_index(&a, i)?)?.to_u32()?;
-        let b = ecx.read_scalar(&ecx.project_index(&b, i)?)?.to_u32()?;
        let dest = ecx.project_index(&dest, i)?;

-        let zipped = a.to_le_bytes().into_iter().zip(b.to_le_bytes());
-        let intermediate_sum: i32 = zipped
-            .map(|(a, b)| i32::from(a).strict_mul(i32::from(b.cast_signed())))
-            .fold(0, |x, y| x.strict_add(y));
+        let mut intermediate_sum: i32 = 0;
+        for j in 0..4 {
+            let idx = i.strict_mul(4).strict_add(j);
+            let a = ecx.read_scalar(&ecx.project_index(&a, idx)?)?.to_u8()?;
+            let b = ecx.read_scalar(&ecx.project_index(&b, idx)?)?.to_i8()?;
+
+            let product = i32::from(a).strict_mul(i32::from(b));
+            intermediate_sum = intermediate_sum.strict_add(product);
+        }

        // Use `wrapping_add` because `src` is an arbitrary i32 and the addition can overflow.
        let res = Scalar::from_i32(intermediate_sum.wrapping_add(src));
@@ -98,8 +98,8 @@ pub unsafe fn issue_75761() {

 macro_rules! check {
    ($func:ident $ty:ident $class:ident $mov:literal $modifier:literal) => {
-        // FIXME(f16_f128): Change back to `$func(x: $ty) -> $ty` once arm64ec can pass and return
-        // `f16` and `f128` without LLVM erroring.
+        // FIXME(f128): Change back to `$func(x: $ty) -> $ty` once arm64ec can pass and return
+        // `f128` without LLVM erroring.
        // LLVM issue: <https://github.com/llvm/llvm-project/issues/94434>
        #[no_mangle]
        pub unsafe fn $func(inp: &$ty, out: &mut $ty) {
@@ -117,7 +117,7 @@ pub unsafe fn $func(inp: &$ty, out: &mut $ty) {

 macro_rules! check_reg {
    ($func:ident $ty:ident $reg:tt $mov:literal) => {
-        // FIXME(f16_f128): See FIXME in `check!`
+        // FIXME(f128): See FIXME in `check!`
        #[no_mangle]
        pub unsafe fn $func(inp: &$ty, out: &mut $ty) {
            let x = *inp;
@@ -344,10 +344,6 @@ Tests for `#![feature(coverage_attribute)]`. See [Tracking issue for function at

 Tests for crate resolution and loading behavior, including `extern crate` declarations, `--extern` flags, or the `use` keyword.

-## `tests/ui/cross/`: Various tests related to the concept of "cross"
-
-**FIXME**: The unifying topic of these tests appears to be that their filenames begin with the word "cross". The similarities end there - one test is about "cross-borrowing" a `Box<T>` into `&T`, while another is about a global trait used "across" files. Some of this terminology is really outdated and does not match the current terminology. Additionally, "cross" is also way too generic, it's easy to confuse with cross-compile.
-
 ## `tests/ui/cross-crate/`: Cross-Crate Interaction

 Tests for behavior spanning multiple crates, including visibility rules, trait implementations, and type resolution across crate boundaries.
@@ -1,7 +1,7 @@
 //@ only-x86_64
 //@ build-fail
 //@ compile-flags: -C target-feature=-avx
-
+//@ ignore-parallel-frontend post-monomorphization errors
 #![feature(portable_simd)]
 #![feature(simd_ffi)]
 #![allow(improper_ctypes_definitions)]
@@ -1,7 +1,7 @@
 //@ edition:2018
 // Test that impl trait does not allow creating recursive types that are
 // otherwise forbidden when using `async` and `await`.
-
+//@ ignore-parallel-frontend  query cycle
 async fn rec_1() { //~ ERROR recursion in an async fn
    rec_2().await;
 }
@@ -9,7 +9,11 @@ LL |     let bar = 5;
   |         |
   |         expected integer, found `bar`
   |         `bar` is interpreted as a unit struct, not a new binding
-   |         help: introduce a new binding instead: `other_bar`
+   |
+help: introduce a new binding instead
+   |
+LL |     let other_bar = 5;
+   |         ++++++

 error: aborting due to 1 previous error

@@ -0,0 +1,11 @@
+// Test that implicitly converting from `Box<T>` to `&T` is
+// forbidden when `T` is a trait.
+
+struct Foo;
+trait Trait { fn foo(&self) {} }
+impl Trait for Foo {}
+
+pub fn main() {
+    let x: Box<dyn Trait> = Box::new(Foo);
+    let _y: &dyn Trait = x; //~ ERROR E0308
+}
@@ -1,5 +1,5 @@
 error[E0308]: mismatched types
-  --> $DIR/cross-borrow-trait.rs:12:26
+  --> $DIR/no-implicit-box-to-ref-coercion.rs:10:26
   |
 LL |     let _y: &dyn Trait = x;
   |             ----------   ^ expected `&dyn Trait`, found `Box<dyn Trait>`
@@ -1,5 +1,5 @@
 //@ dont-require-annotations: NOTE
-
+//@ ignore-parallel-frontend different alloc ids
 #![allow(incomplete_features)]
 #![feature(adt_const_params, unsized_const_params)]

@@ -6,7 +6,7 @@ LL |     let _ = foo([0; 1]);
   |             |
   |             required by a bound introduced by this call
   |
-   = note: cannot satisfy `_: Foo`
+   = note: the type must implement `Foo`
 help: the trait `Foo` is implemented for `u8`
  --> $DIR/issue-83249.rs:8:1
   |
@@ -1,6 +1,6 @@
 //@ stderr-per-bitwidth
 //@ dont-require-annotations: NOTE
-
+//@ ignore-parallel-frontend different alloc ids
 use std::mem::transmute;

 fn get_flag<const FlagSet: bool, const ShortName: char>() -> Option<char> {
@@ -1,7 +1,7 @@
 // Strip out raw byte dumps to make comparison platform-independent:
 //@ normalize-stderr: "(the raw bytes of the constant) \(size: [0-9]*, align: [0-9]*\)" -> "$1 (size: $$SIZE, align: $$ALIGN)"
 //@ normalize-stderr: "([0-9a-f][0-9a-f] |╾─*A(LLOC)?[0-9]+(\+[a-z0-9]+)?(<imm>)?─*╼ )+ *│.*" -> "HEX_DUMP"
-
+//@ ignore-parallel-frontend  different alloc ids
 #![feature(
    slice_from_ptr_range,
    const_slice_from_ptr_range,
@@ -1,6 +1,6 @@
 fn main() {
    use std::ptr;
-
+//@ ignore-parallel-frontend different alloc ids
    const DATA: [u32; 1] = [42];

    const PAST_END_PTR: *const u32 = unsafe { DATA.as_ptr().add(1) };
@@ -1,5 +1,5 @@
 //@ run-pass
-
+//@ ignore-parallel-frontend queries overflow the depth limit
 // https://github.com/rust-lang/rust/issues/34997

 pub const CST_1: u32 = 0;
@@ -1,5 +1,5 @@
 //@ check-fail
-
+//@ ignore-parallel-frontend different alloc ids
 #![feature(core_intrinsics, const_cmp)]
 use std::intrinsics::compare_bytes;
 use std::mem::MaybeUninit;
@@ -1,5 +1,5 @@
 //@ stderr-per-bitwidth
-
+//@ ignore-parallel-frontend different alloc ids
 #[derive(Copy, Clone)]
 union Foo {
    a: isize,
@@ -1,5 +1,5 @@
 //@ build-fail
-
+//@ ignore-parallel-frontend different alloc ids
 #![feature(c_variadic)]
 #![feature(const_c_variadic)]
 #![feature(const_trait_impl)]
@@ -1,7 +1,7 @@
 //@ only-x86_64
 //@ stderr-per-bitwidth
 //@ dont-require-annotations: NOTE
-
+//@ ignore-parallel-frontend different alloc ids
 #[repr(C)]
 union Nonsense {
    u: usize,
@@ -3,7 +3,7 @@
 #![feature(core_intrinsics)]
 #![feature(const_heap)]
 use std::intrinsics;
-
+//@ ignore-parallel-frontend different alloc ids
 const BAR: &i32 = unsafe { //~ ERROR: uninitialized memory
    // Make the pointer immutable to avoid errors related to mutable pointers in constants.
    &*(intrinsics::const_make_global(intrinsics::const_allocate(4, 4)) as *const i32)
@@ -1,6 +1,6 @@
 #![feature(core_intrinsics)]
 #![feature(const_heap)]
-
+//@ ignore-parallel-frontend different alloc ids
 // Strip out raw byte dumps to make comparison platform-independent:
 //@ normalize-stderr: "(the raw bytes of the constant) \(size: [0-9]*, align: [0-9]*\)" -> "$1 (size: $$SIZE, align: $$ALIGN)"
 //@ normalize-stderr: "([0-9a-f][0-9a-f] |╾─*A(LLOC)?[0-9]+(\+[a-z0-9]+)?(<imm>)?─*╼ )+ *│.*" -> "HEX_DUMP"
@@ -1,6 +1,6 @@
 #![feature(core_intrinsics)]
 #![feature(const_heap)]
-
+//@ ignore-parallel-frontend different alloc ids
 use std::intrinsics;

 const _X: () = unsafe {
@@ -1,6 +1,6 @@
 #![feature(core_intrinsics)]
 #![feature(const_heap)]
-
+//@ ignore-parallel-frontend different alloc ids
 use std::intrinsics;

 const _X: () = unsafe {
@@ -1,5 +1,5 @@
 // Ensure that we can't call `const_make_global` on dangling pointers.
-
+//@ ignore-parallel-frontend different alloc ids
 #![feature(core_intrinsics)]
 #![feature(const_heap)]

@@ -1,5 +1,5 @@
 // Ensure that we can't call `const_make_global` on pointers not in the current interpreter.
-
+//@ ignore-parallel-frontend different alloc ids
 #![feature(core_intrinsics)]
 #![feature(const_heap)]

@@ -1,5 +1,5 @@
 // Ensure that we can't call `const_make_global` twice.
-
+//@ ignore-parallel-frontend different alloc ids
 #![feature(core_intrinsics)]
 #![feature(const_heap)]

@@ -2,7 +2,7 @@
 #![feature(core_intrinsics)]
 #![feature(const_heap)]
 use std::intrinsics;
-
+//@ ignore-parallel-frontend different alloc ids
 const A: &u8 = unsafe {
    let ptr = intrinsics::const_allocate(1, 1);
    *ptr = 1;
@@ -1,5 +1,5 @@
 //@ build-fail
-
+//@ ignore-parallel-frontend post-monomorphization errors
 // Regression test for #66975
 #![warn(unconditional_panic)]
 #![feature(never_type)]
@@ -1,5 +1,5 @@
 // issue-49296: Unsafe shenigans in constants can result in missing errors
-
+//@ ignore-parallel-frontend different alloc ids
 use std::mem::transmute;

 const fn wat(x: u64) -> &'static u64 {
@@ -2,7 +2,7 @@
 //@ revisions: normal mir-opt
 //@ [mir-opt]compile-flags: -Zmir-opt-level=4
 //@ dont-require-annotations: NOTE
-
+//@ ignore-parallel-frontend post-monomorphization errors
 trait C {
    const BOO: usize;
 }
@@ -1,6 +1,6 @@
 //@ build-fail
 //@ dont-require-annotations: NOTE
-
+//@ ignore-parallel-frontend post-monomorphization errors
 trait Unsigned {
    const MAX: u8;
 }
@@ -1,6 +1,6 @@
 //! This mixes fragments from different pointers, in a way that we should not accept.
 //! See <https://github.com/rust-lang/rust/issues/146291>.
-
+//@ ignore-parallel-frontend different alloc ids
 static A: u8 = 123;
 static B: u8 = 123;

@@ -3,7 +3,7 @@
 // ignore-tidy-linelength
 //@ normalize-stderr: "╾─*ALLOC[0-9]+(\+[a-z0-9]+)?(<imm>)?─*╼" -> "╾ALLOC_ID$1╼"
 //@ dont-require-annotations: NOTE
-
+//@ ignore-parallel-frontend different alloc ids
 #![allow(invalid_value, unnecessary_transmutes)]
 #![feature(never_type, rustc_attrs, ptr_metadata, slice_from_ptr_range, const_slice_from_ptr_range)]

@@ -4,7 +4,7 @@
    let _val = *ptr; //~NOTE: failed here
    //~^ERROR: based on pointer with alignment 1, but alignment 4 is required
 };
-
+//@ ignore-parallel-frontend different alloc ids
 const MISALIGNED_STORE: () = unsafe {
    let mut mem = [0u32; 8];
    let ptr = mem.as_mut_ptr().byte_add(1);
@@ -1,5 +1,5 @@
 //! Ensure we error when trying to load from a pointer whose provenance has been messed with.
-
+//@ ignore-parallel-frontend different alloc ids
 const PARTIAL_OVERWRITE: () = {
    let mut p = &42;
    // Overwrite one byte with a no-provenance value.
@@ -2,7 +2,7 @@ enum E {
    A(u8),
    B,
 }
-
+//@ ignore-parallel-frontend different alloc ids
 const _: u8 = {
    let mut e = E::A(1);
    let p = if let E::A(x) = &mut e { x as *mut u8 } else { unreachable!() };
@@ -4,7 +4,7 @@
 //@ normalize-stderr: "0x0+" -> "0x0"
 //@ normalize-stderr: "0x[0-9](\.\.|\])" -> "0x%$1"
 //@ dont-require-annotations: NOTE
-
+//@ ignore-parallel-frontend different alloc ids
 #![feature(never_type)]
 #![allow(invalid_value, unnecessary_transmutes)]

@@ -12,7 +12,7 @@

 //@ stderr-per-bitwidth
 //@ dont-require-annotations: NOTE
-
+//@ ignore-parallel-frontend different alloc ids
 trait Trait {}

 const INVALID_VTABLE_ALIGNMENT: &dyn Trait =
@@ -2,7 +2,7 @@
 //@ normalize-stderr: "(the raw bytes of the constant) \(size: [0-9]*, align: [0-9]*\)" -> "$1 (size: $$SIZE, align: $$ALIGN)"
 //@ normalize-stderr: "([0-9a-f][0-9a-f] |╾─*ALLOC[0-9]+(\+[a-z0-9]+)?─*╼ )+ *│.*" -> "HEX_DUMP"
 //@ dont-require-annotations: NOTE
-
+//@ ignore-parallel-frontend different alloc ids
 #![allow(invalid_value)] // make sure we cannot allow away the errors tested here
 #![feature(rustc_attrs, ptr_metadata)]

--- a/Show More
+++ b/Show More