diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6061b22354..579967a0a1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -133,9 +133,9 @@ else()
     set(ZIG_SYSTEM_LIBCXX "stdc++" CACHE STRING "system libcxx name for build.zig")
 endif()
 
-find_package(llvm 21)
-find_package(clang 21)
-find_package(lld 21)
+find_package(llvm 22)
+find_package(clang 22)
+find_package(lld 22)
 
 if(ZIG_STATIC_ZLIB)
     if (MSVC)
diff --git a/build.zig b/build.zig
index 3ac840c309..ddbb9c5db0 100644
--- a/build.zig
+++ b/build.zig
@@ -1206,13 +1206,16 @@ const zig_cpp_sources = [_][]const u8{
 const clang_libs = [_][]const u8{
     "clangFrontendTool",
     "clangCodeGen",
-    "clangFrontend",
-    "clangDriver",
-    "clangSerialization",
-    "clangSema",
     "clangStaticAnalyzerFrontend",
     "clangStaticAnalyzerCheckers",
     "clangStaticAnalyzerCore",
+    "clangCrossTU",
+    "clangFrontend",
+    "clangDriver",
+    "clangOptions",
+    "clangSerialization",
+    "clangSema",
+    "clangAnalysisLifetimeSafety",
     "clangAnalysis",
     "clangASTMatchers",
     "clangAST",
@@ -1224,8 +1227,9 @@ const clang_libs = [_][]const u8{
     "clangLex",
     "clangRewriteFrontend",
     "clangRewrite",
-    "clangCrossTU",
     "clangIndex",
+    "clangFormat",
+    "clangToolingInclusions",
     "clangToolingCore",
     "clangExtractAPI",
     "clangSupport",
@@ -1373,11 +1377,12 @@ const llvm_libs = [_][]const u8{
     "LLVMObjCopy",
     "LLVMMCA",
     "LLVMMCDisassembler",
+    "LLVMDTLTO",
     "LLVMLTO",
     "LLVMFrontendOpenACC",
-    "LLVMFrontendHLSL",
     "LLVMFrontendDriver",
     "LLVMExtensions",
+    "LLVMPlugins",
     "LLVMPasses",
     "LLVMHipStdPar",
     "LLVMCoroutines",
@@ -1404,6 +1409,7 @@ const llvm_libs = [_][]const u8{
     "LLVMObjCARCOpts",
     "LLVMCodeGenTypes",
     "LLVMCGData",
+    "LLVMCAS",
     "LLVMIRPrinter",
     "LLVMInterfaceStub",
     "LLVMFileCheck",
@@ -1422,15 +1428,17 @@ const llvm_libs = [_][]const u8{
     "LLVMDebugInfoCodeView",
     "LLVMDebugInfoGSYM",
     "LLVMDebugInfoDWARF",
-    "LLVMDebugInfoDWARFLowLevel",
     "LLVMObject",
     "LLVMTextAPI",
     "LLVMMCParser",
     "LLVMIRReader",
     "LLVMAsmParser",
     "LLVMMC",
+    "LLVMDebugInfoDWARFLowLevel",
     "LLVMBitReader",
+    "LLVMFrontendHLSL",
     "LLVMFuzzerCLI",
+    "LLVMABI",
     "LLVMCore",
     "LLVMRemarks",
     "LLVMBitstreamReader",
diff --git a/ci/aarch64-freebsd-debug.sh b/ci/aarch64-freebsd-debug.sh
index 2f0ebdc723..ad852e4601 100755
--- a/ci/aarch64-freebsd-debug.sh
+++ b/ci/aarch64-freebsd-debug.sh
@@ -7,7 +7,7 @@ set -e
 
 TARGET="aarch64-freebsd-none"
 MCPU="baseline"
-CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.16.0-dev.2287+eb3f16db5"
+CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523"
 PREFIX="$HOME/deps/$CACHE_BASENAME"
 ZIG="$PREFIX/bin/zig"
 
diff --git a/ci/aarch64-freebsd-release.sh b/ci/aarch64-freebsd-release.sh
index 4f12e8367b..3db9e9a21d 100755
--- a/ci/aarch64-freebsd-release.sh
+++ b/ci/aarch64-freebsd-release.sh
@@ -7,7 +7,7 @@ set -e
 
 TARGET="aarch64-freebsd-none"
 MCPU="baseline"
-CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.16.0-dev.2287+eb3f16db5"
+CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523"
 PREFIX="$HOME/deps/$CACHE_BASENAME"
 ZIG="$PREFIX/bin/zig"
 
diff --git a/ci/aarch64-linux-debug.sh b/ci/aarch64-linux-debug.sh
index 7a4a6daa2a..5ee0a33c1c 100755
--- a/ci/aarch64-linux-debug.sh
+++ b/ci/aarch64-linux-debug.sh
@@ -7,7 +7,7 @@ set -e
 
 TARGET="aarch64-linux-musl"
 MCPU="baseline"
-CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.16.0-dev.104+689461e31"
+CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523"
 PREFIX="$HOME/deps/$CACHE_BASENAME"
 ZIG="$PREFIX/bin/zig"
 
diff --git a/ci/aarch64-linux-release.sh b/ci/aarch64-linux-release.sh
index 39ad9767ab..bcdeade117 100755
--- a/ci/aarch64-linux-release.sh
+++ b/ci/aarch64-linux-release.sh
@@ -7,7 +7,7 @@ set -e
 
 TARGET="aarch64-linux-musl"
 MCPU="baseline"
-CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.16.0-dev.104+689461e31"
+CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523"
 PREFIX="$HOME/deps/$CACHE_BASENAME"
 ZIG="$PREFIX/bin/zig"
 
diff --git a/ci/aarch64-macos-debug.sh b/ci/aarch64-macos-debug.sh
index 3a8a6c6484..9592a825de 100755
--- a/ci/aarch64-macos-debug.sh
+++ b/ci/aarch64-macos-debug.sh
@@ -8,7 +8,7 @@ set -e
 ZIGDIR="$PWD"
 TARGET="aarch64-macos-none"
 MCPU="baseline"
-CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.16.0-dev.104+689461e31"
+CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523"
 PREFIX="$HOME/$CACHE_BASENAME"
 ZIG="$PREFIX/bin/zig"
 
diff --git a/ci/aarch64-macos-release.sh b/ci/aarch64-macos-release.sh
index 4c4c240786..dc9837012c 100755
--- a/ci/aarch64-macos-release.sh
+++ b/ci/aarch64-macos-release.sh
@@ -8,7 +8,7 @@ set -e
 ZIGDIR="$PWD"
 TARGET="aarch64-macos-none"
 MCPU="baseline"
-CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.16.0-dev.104+689461e31"
+CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523"
 PREFIX="$HOME/$CACHE_BASENAME"
 ZIG="$PREFIX/bin/zig"
 
diff --git a/ci/aarch64-netbsd-debug.sh b/ci/aarch64-netbsd-debug.sh
index 4f5eb0d410..3445cd6526 100755
--- a/ci/aarch64-netbsd-debug.sh
+++ b/ci/aarch64-netbsd-debug.sh
@@ -7,7 +7,7 @@ set -e
 
 TARGET="aarch64-netbsd-none"
 MCPU="baseline"
-CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.16.0-dev.2287+eb3f16db5"
+CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523"
 PREFIX="$HOME/deps/$CACHE_BASENAME"
 ZIG="$PREFIX/bin/zig"
 
diff --git a/ci/aarch64-netbsd-release.sh b/ci/aarch64-netbsd-release.sh
index d9d9477904..e866720e30 100755
--- a/ci/aarch64-netbsd-release.sh
+++ b/ci/aarch64-netbsd-release.sh
@@ -7,7 +7,7 @@ set -e
 
 TARGET="aarch64-netbsd-none"
 MCPU="baseline"
-CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.16.0-dev.2287+eb3f16db5"
+CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523"
 PREFIX="$HOME/deps/$CACHE_BASENAME"
 ZIG="$PREFIX/bin/zig"
 
diff --git a/ci/aarch64-windows.ps1 b/ci/aarch64-windows.ps1
index 96e0764256..c711127a61 100644
--- a/ci/aarch64-windows.ps1
+++ b/ci/aarch64-windows.ps1
@@ -1,5 +1,5 @@
 $TARGET = "aarch64-windows-gnu"
-$ZIG_LLVM_CLANG_LLD_NAME = "zig+llvm+lld+clang-$TARGET-0.16.0-dev.104+689461e31"
+$ZIG_LLVM_CLANG_LLD_NAME = "zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523"
 $MCPU = "baseline"
 $ZIG_LLVM_CLANG_LLD_URL = "https://ziglang.org/deps/$ZIG_LLVM_CLANG_LLD_NAME.zip"
 $PREFIX_PATH = "$(Get-Location)\..\$ZIG_LLVM_CLANG_LLD_NAME"
diff --git a/ci/loongarch64-linux-debug.sh b/ci/loongarch64-linux-debug.sh
index 4cba17b031..a31239d41a 100755
--- a/ci/loongarch64-linux-debug.sh
+++ b/ci/loongarch64-linux-debug.sh
@@ -7,7 +7,7 @@ set -e
 
 TARGET="loongarch64-linux-musl"
 MCPU="baseline"
-CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.16.0-dev.157+7fdd60df1"
+CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523"
 PREFIX="$HOME/deps/$CACHE_BASENAME"
 ZIG="$PREFIX/bin/zig"
 
diff --git a/ci/loongarch64-linux-release.sh b/ci/loongarch64-linux-release.sh
index 5b05284d26..5163b07fde 100755
--- a/ci/loongarch64-linux-release.sh
+++ b/ci/loongarch64-linux-release.sh
@@ -7,7 +7,7 @@ set -e
 
 TARGET="loongarch64-linux-musl"
 MCPU="baseline"
-CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.16.0-dev.157+7fdd60df1"
+CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523"
 PREFIX="$HOME/deps/$CACHE_BASENAME"
 ZIG="$PREFIX/bin/zig"
 
diff --git a/ci/powerpc64le-linux-debug.sh b/ci/powerpc64le-linux-debug.sh
index 1b9a51e44d..5950a5cd8b 100755
--- a/ci/powerpc64le-linux-debug.sh
+++ b/ci/powerpc64le-linux-debug.sh
@@ -7,7 +7,7 @@ set -e
 
 TARGET="powerpc64le-linux-musl"
 MCPU="baseline"
-CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.16.0-dev.1594+9fa433d71"
+CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523"
 PREFIX="$HOME/deps/$CACHE_BASENAME"
 ZIG="$PREFIX/bin/zig"
 
diff --git a/ci/powerpc64le-linux-release.sh b/ci/powerpc64le-linux-release.sh
index 77e1ca803a..2b911a9aef 100755
--- a/ci/powerpc64le-linux-release.sh
+++ b/ci/powerpc64le-linux-release.sh
@@ -7,7 +7,7 @@ set -e
 
 TARGET="powerpc64le-linux-musl"
 MCPU="baseline"
-CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.16.0-dev.1594+9fa433d71"
+CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523"
 PREFIX="$HOME/deps/$CACHE_BASENAME"
 ZIG="$PREFIX/bin/zig"
 
diff --git a/ci/riscv64-linux-debug.sh b/ci/riscv64-linux-debug.sh
index 6eace21297..631573cfec 100755
--- a/ci/riscv64-linux-debug.sh
+++ b/ci/riscv64-linux-debug.sh
@@ -7,7 +7,7 @@ set -e
 
 TARGET="riscv64-linux-musl"
 MCPU="baseline"
-CACHE_BASENAME="zig+llvm+lld+clang-riscv64-linux-musl-0.16.0-dev.104+689461e31"
+CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523"
 PREFIX="$HOME/deps/$CACHE_BASENAME"
 ZIG="$PREFIX/bin/zig"
 
diff --git a/ci/riscv64-linux-release.sh b/ci/riscv64-linux-release.sh
index c3d28ee5e6..1f51b7d8c2 100755
--- a/ci/riscv64-linux-release.sh
+++ b/ci/riscv64-linux-release.sh
@@ -7,7 +7,7 @@ set -e
 
 TARGET="riscv64-linux-musl"
 MCPU="baseline"
-CACHE_BASENAME="zig+llvm+lld+clang-riscv64-linux-musl-0.16.0-dev.104+689461e31"
+CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523"
 PREFIX="$HOME/deps/$CACHE_BASENAME"
 ZIG="$PREFIX/bin/zig"
 
diff --git a/ci/s390x-linux-debug.sh b/ci/s390x-linux-debug.sh
index ffe4d0f02b..c66717b460 100755
--- a/ci/s390x-linux-debug.sh
+++ b/ci/s390x-linux-debug.sh
@@ -7,7 +7,7 @@ set -e
 
 TARGET="s390x-linux-musl"
 MCPU="z15"
-CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.16.0-dev.1354+94e98bfe8"
+CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523"
 PREFIX="$HOME/deps/$CACHE_BASENAME"
 ZIG="$PREFIX/bin/zig"
 
diff --git a/ci/s390x-linux-release.sh b/ci/s390x-linux-release.sh
index 7fb6cd3641..ed050d8148 100755
--- a/ci/s390x-linux-release.sh
+++ b/ci/s390x-linux-release.sh
@@ -7,7 +7,7 @@ set -e
 
 TARGET="s390x-linux-musl"
 MCPU="z15"
-CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.16.0-dev.1354+94e98bfe8"
+CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523"
 PREFIX="$HOME/deps/$CACHE_BASENAME"
 ZIG="$PREFIX/bin/zig"
 
diff --git a/ci/x86_64-freebsd-debug.sh b/ci/x86_64-freebsd-debug.sh
index 21839b6460..911bd9ecc1 100755
--- a/ci/x86_64-freebsd-debug.sh
+++ b/ci/x86_64-freebsd-debug.sh
@@ -7,7 +7,7 @@ set -e
 
 TARGET="x86_64-freebsd-none"
 MCPU="baseline"
-CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.16.0-dev.312+164c598cd"
+CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523"
 PREFIX="$HOME/deps/$CACHE_BASENAME"
 ZIG="$PREFIX/bin/zig"
 
diff --git a/ci/x86_64-freebsd-release.sh b/ci/x86_64-freebsd-release.sh
index 94c33537a4..114f63bf86 100755
--- a/ci/x86_64-freebsd-release.sh
+++ b/ci/x86_64-freebsd-release.sh
@@ -7,7 +7,7 @@ set -e
 
 TARGET="x86_64-freebsd-none"
 MCPU="baseline"
-CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.16.0-dev.312+164c598cd"
+CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523"
 PREFIX="$HOME/deps/$CACHE_BASENAME"
 ZIG="$PREFIX/bin/zig"
 
diff --git a/ci/x86_64-linux-debug-llvm.sh b/ci/x86_64-linux-debug-llvm.sh
index 9140063f1b..661c457ba1 100755
--- a/ci/x86_64-linux-debug-llvm.sh
+++ b/ci/x86_64-linux-debug-llvm.sh
@@ -7,7 +7,7 @@ set -e
 
 TARGET="x86_64-linux-musl"
 MCPU="baseline"
-CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.16.0-dev.104+689461e31"
+CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523"
 PREFIX="$HOME/deps/$CACHE_BASENAME"
 ZIG="$PREFIX/bin/zig"
 
diff --git a/ci/x86_64-linux-debug.sh b/ci/x86_64-linux-debug.sh
index 90d9ff1797..5e15672668 100755
--- a/ci/x86_64-linux-debug.sh
+++ b/ci/x86_64-linux-debug.sh
@@ -7,7 +7,7 @@ set -e
 
 TARGET="x86_64-linux-musl"
 MCPU="baseline"
-CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.16.0-dev.104+689461e31"
+CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523"
 PREFIX="$HOME/deps/$CACHE_BASENAME"
 ZIG="$PREFIX/bin/zig"
 
diff --git a/ci/x86_64-linux-release.sh b/ci/x86_64-linux-release.sh
index bb1866d456..4c82a88eac 100755
--- a/ci/x86_64-linux-release.sh
+++ b/ci/x86_64-linux-release.sh
@@ -7,7 +7,7 @@ set -e
 
 TARGET="x86_64-linux-musl"
 MCPU="baseline"
-CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.16.0-dev.104+689461e31"
+CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523"
 PREFIX="$HOME/deps/$CACHE_BASENAME"
 ZIG="$PREFIX/bin/zig"
 
diff --git a/ci/x86_64-netbsd-debug.sh b/ci/x86_64-netbsd-debug.sh
index 68e9081f3b..5328d0ea77 100755
--- a/ci/x86_64-netbsd-debug.sh
+++ b/ci/x86_64-netbsd-debug.sh
@@ -7,7 +7,7 @@ set -e
 
 TARGET="x86_64-netbsd-none"
 MCPU="baseline"
-CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.16.0-dev.2287+eb3f16db5"
+CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523"
 PREFIX="$HOME/deps/$CACHE_BASENAME"
 ZIG="$PREFIX/bin/zig"
 
diff --git a/ci/x86_64-netbsd-release.sh b/ci/x86_64-netbsd-release.sh
index 225a527686..6fe3302c93 100755
--- a/ci/x86_64-netbsd-release.sh
+++ b/ci/x86_64-netbsd-release.sh
@@ -7,7 +7,7 @@ set -e
 
 TARGET="x86_64-netbsd-none"
 MCPU="baseline"
-CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.16.0-dev.2287+eb3f16db5"
+CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523"
 PREFIX="$HOME/deps/$CACHE_BASENAME"
 ZIG="$PREFIX/bin/zig"
 
diff --git a/ci/x86_64-openbsd-debug.sh b/ci/x86_64-openbsd-debug.sh
index 50066305af..fe4e6ade53 100755
--- a/ci/x86_64-openbsd-debug.sh
+++ b/ci/x86_64-openbsd-debug.sh
@@ -7,7 +7,7 @@ set -e
 
 TARGET="x86_64-openbsd-none"
 MCPU="baseline"
-CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.16.0-dev.2051+28b83e3b0"
+CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523"
 PREFIX="$HOME/deps/$CACHE_BASENAME"
 ZIG="$PREFIX/bin/zig"
 
diff --git a/ci/x86_64-openbsd-release.sh b/ci/x86_64-openbsd-release.sh
index ab5162fc60..d340ae8581 100755
--- a/ci/x86_64-openbsd-release.sh
+++ b/ci/x86_64-openbsd-release.sh
@@ -7,7 +7,7 @@ set -e
 
 TARGET="x86_64-openbsd-none"
 MCPU="baseline"
-CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.16.0-dev.2051+28b83e3b0"
+CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523"
 PREFIX="$HOME/deps/$CACHE_BASENAME"
 ZIG="$PREFIX/bin/zig"
 
diff --git a/ci/x86_64-windows-debug.ps1 b/ci/x86_64-windows-debug.ps1
index 01cd5be244..a00f6812e0 100644
--- a/ci/x86_64-windows-debug.ps1
+++ b/ci/x86_64-windows-debug.ps1
@@ -1,6 +1,6 @@
 $TARGET = "x86_64-windows-gnu"
 $MCPU = "baseline"
-$PREFIX_PATH = "$($Env:USERPROFILE)\deps\zig+llvm+lld+clang-$TARGET-0.16.0-dev.104+689461e31"
+$PREFIX_PATH = "$($Env:USERPROFILE)\deps\zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523"
 $ZIG = "$PREFIX_PATH\bin\zig.exe"
 $ZIG_LIB_DIR = "$(Get-Location)\lib"
 $ZSF_MAX_RSS = if ($Env:ZSF_MAX_RSS) { $Env:ZSF_MAX_RSS } else { 0 }
diff --git a/ci/x86_64-windows-release.ps1 b/ci/x86_64-windows-release.ps1
index a16e2ddea5..181474a290 100644
--- a/ci/x86_64-windows-release.ps1
+++ b/ci/x86_64-windows-release.ps1
@@ -1,6 +1,6 @@
 $TARGET = "x86_64-windows-gnu"
 $MCPU = "baseline"
-$PREFIX_PATH = "$($Env:USERPROFILE)\deps\zig+llvm+lld+clang-$TARGET-0.16.0-dev.104+689461e31"
+$PREFIX_PATH = "$($Env:USERPROFILE)\deps\zig+llvm+lld+clang-$TARGET-0.17.0-dev.203+073889523"
 $ZIG = "$PREFIX_PATH\bin\zig.exe"
 $ZIG_LIB_DIR = "$(Get-Location)\lib"
 $ZSF_MAX_RSS = if ($Env:ZSF_MAX_RSS) { $Env:ZSF_MAX_RSS } else { 0 }
diff --git a/cmake/Findclang.cmake b/cmake/Findclang.cmake
index 4b7363da9c..b34c9ce57f 100644
--- a/cmake/Findclang.cmake
+++ b/cmake/Findclang.cmake
@@ -17,10 +17,10 @@ find_path(CLANG_INCLUDE_DIRS NAMES clang/Frontend/ASTUnit.h
 if(${LLVM_LINK_MODE} STREQUAL "shared")
   find_library(CLANG_LIBRARIES
     NAMES
-      libclang-cpp.so.21
-      libclang-cpp.so.21.1
-      clang-cpp-21.0
-      clang-cpp210
+      libclang-cpp.so.22
+      libclang-cpp.so.22.1
+      clang-cpp-22.0
+      clang-cpp220
       clang-cpp
     NAMES_PER_DIR
     HINTS "${LLVM_LIBDIRS}"
@@ -44,13 +44,16 @@ else()
 
   FIND_AND_ADD_CLANG_LIB(clangFrontendTool)
   FIND_AND_ADD_CLANG_LIB(clangCodeGen)
-  FIND_AND_ADD_CLANG_LIB(clangFrontend)
-  FIND_AND_ADD_CLANG_LIB(clangDriver)
-  FIND_AND_ADD_CLANG_LIB(clangSerialization)
-  FIND_AND_ADD_CLANG_LIB(clangSema)
   FIND_AND_ADD_CLANG_LIB(clangStaticAnalyzerFrontend)
   FIND_AND_ADD_CLANG_LIB(clangStaticAnalyzerCheckers)
   FIND_AND_ADD_CLANG_LIB(clangStaticAnalyzerCore)
+  FIND_AND_ADD_CLANG_LIB(clangCrossTU)
+  FIND_AND_ADD_CLANG_LIB(clangFrontend)
+  FIND_AND_ADD_CLANG_LIB(clangDriver)
+  FIND_AND_ADD_CLANG_LIB(clangOptions)
+  FIND_AND_ADD_CLANG_LIB(clangSerialization)
+  FIND_AND_ADD_CLANG_LIB(clangSema)
+  FIND_AND_ADD_CLANG_LIB(clangAnalysisLifetimeSafety)
   FIND_AND_ADD_CLANG_LIB(clangAnalysis)
   FIND_AND_ADD_CLANG_LIB(clangASTMatchers)
   FIND_AND_ADD_CLANG_LIB(clangAST)
@@ -62,8 +65,9 @@ else()
   FIND_AND_ADD_CLANG_LIB(clangLex)
   FIND_AND_ADD_CLANG_LIB(clangRewriteFrontend)
   FIND_AND_ADD_CLANG_LIB(clangRewrite)
-  FIND_AND_ADD_CLANG_LIB(clangCrossTU)
   FIND_AND_ADD_CLANG_LIB(clangIndex)
+  FIND_AND_ADD_CLANG_LIB(clangFormat)
+  FIND_AND_ADD_CLANG_LIB(clangToolingInclusions)
   FIND_AND_ADD_CLANG_LIB(clangToolingCore)
   FIND_AND_ADD_CLANG_LIB(clangExtractAPI)
   FIND_AND_ADD_CLANG_LIB(clangSupport)
diff --git a/cmake/Findlld.cmake b/cmake/Findlld.cmake
index 61cf1cd883..5a6e6f4680 100644
--- a/cmake/Findlld.cmake
+++ b/cmake/Findlld.cmake
@@ -9,23 +9,23 @@
 find_path(LLD_INCLUDE_DIRS NAMES lld/Common/Driver.h
     HINTS ${LLVM_INCLUDE_DIRS}
     PATHS
-        /usr/lib/llvm-21/include
-        /usr/local/llvm210/include
-        /usr/local/llvm21/include
-        /usr/local/opt/lld@21/include
-        /opt/homebrew/opt/lld@21/include
-        /home/linuxbrew/.linuxbrew/opt/lld@21/include
+        /usr/lib/llvm-22/include
+        /usr/local/llvm220/include
+        /usr/local/llvm22/include
+        /usr/local/opt/lld@22/include
+        /opt/homebrew/opt/lld@22/include
+        /home/linuxbrew/.linuxbrew/opt/lld@22/include
         /mingw64/include)
 
-find_library(LLD_LIBRARY NAMES lld-21.0 lld210 lld NAMES_PER_DIR
+find_library(LLD_LIBRARY NAMES lld-22.0 lld220 lld NAMES_PER_DIR
     HINTS ${LLVM_LIBDIRS}
     PATHS
-        /usr/lib/llvm-21/lib
-        /usr/local/llvm210/lib
-        /usr/local/llvm21/lib
-        /usr/local/opt/lld@21/lib
-        /opt/homebrew/opt/lld@21/lib
-        /home/linuxbrew/.linuxbrew/opt/lld@21/lib
+        /usr/lib/llvm-22/lib
+        /usr/local/llvm220/lib
+        /usr/local/llvm22/lib
+        /usr/local/opt/lld@22/lib
+        /opt/homebrew/opt/lld@22/lib
+        /home/linuxbrew/.linuxbrew/opt/lld@22/lib
 )
 if(EXISTS ${LLD_LIBRARY})
     set(LLD_LIBRARIES ${LLD_LIBRARY})
@@ -36,12 +36,12 @@ else()
             HINTS ${LLVM_LIBDIRS}
             PATHS
                 ${LLD_LIBDIRS}
-                /usr/lib/llvm-21/lib
-                /usr/local/llvm210/lib
-                /usr/local/llvm21/lib
-                /usr/local/opt/lld@21/lib
-                /opt/homebrew/opt/lld@21/lib
-                /home/linuxbrew/.linuxbrew/opt/lld@21/lib
+                /usr/lib/llvm-22/lib
+                /usr/local/llvm220/lib
+                /usr/local/llvm22/lib
+                /usr/local/opt/lld@22/lib
+                /opt/homebrew/opt/lld@22/lib
+                /home/linuxbrew/.linuxbrew/opt/lld@22/lib
                 /mingw64/lib
                 /c/msys64/mingw64/lib
                 c:/msys64/mingw64/lib)
diff --git a/cmake/Findllvm.cmake b/cmake/Findllvm.cmake
index 0c08d4f0ac..7c3b347685 100644
--- a/cmake/Findllvm.cmake
+++ b/cmake/Findllvm.cmake
@@ -17,12 +17,12 @@ if(ZIG_USE_LLVM_CONFIG)
     # terminate when the right LLVM version is not found.
     unset(LLVM_CONFIG_EXE CACHE)
     find_program(LLVM_CONFIG_EXE
-        NAMES llvm-config-21 llvm-config-21.0 llvm-config210 llvm-config21 llvm-config NAMES_PER_DIR
+        NAMES llvm-config-22 llvm-config-22.0 llvm-config220 llvm-config22 llvm-config NAMES_PER_DIR
         PATHS
             "/mingw64/bin"
             "/c/msys64/mingw64/bin"
             "c:/msys64/mingw64/bin"
-            "C:/Libraries/llvm-21.0.0/bin")
+            "C:/Libraries/llvm-22.0.0/bin")
 
     if ("${LLVM_CONFIG_EXE}" STREQUAL "LLVM_CONFIG_EXE-NOTFOUND")
       if (NOT LLVM_CONFIG_ERROR_MESSAGES STREQUAL "")
@@ -40,9 +40,9 @@ if(ZIG_USE_LLVM_CONFIG)
       OUTPUT_STRIP_TRAILING_WHITESPACE)
 
     get_filename_component(LLVM_CONFIG_DIR "${LLVM_CONFIG_EXE}" DIRECTORY)
-    if("${LLVM_CONFIG_VERSION}" VERSION_LESS 21 OR "${LLVM_CONFIG_VERSION}" VERSION_EQUAL 22 OR "${LLVM_CONFIG_VERSION}" VERSION_GREATER 22)
+    if("${LLVM_CONFIG_VERSION}" VERSION_LESS 22 OR "${LLVM_CONFIG_VERSION}" VERSION_EQUAL 23 OR "${LLVM_CONFIG_VERSION}" VERSION_GREATER 23)
       # Save the error message, in case this is the last llvm-config we find
-      list(APPEND LLVM_CONFIG_ERROR_MESSAGES "expected LLVM 21.x but found ${LLVM_CONFIG_VERSION} using ${LLVM_CONFIG_EXE}")
+      list(APPEND LLVM_CONFIG_ERROR_MESSAGES "expected LLVM 22.x but found ${LLVM_CONFIG_VERSION} using ${LLVM_CONFIG_EXE}")
 
       # Ignore this directory and try the search again
       list(APPEND CMAKE_IGNORE_PATH "${LLVM_CONFIG_DIR}")
@@ -66,9 +66,9 @@ if(ZIG_USE_LLVM_CONFIG)
       if (LLVM_CONFIG_ERROR)
         # Save the error message, in case this is the last llvm-config we find
         if (ZIG_SHARED_LLVM)
-          list(APPEND LLVM_CONFIG_ERROR_MESSAGES "LLVM 21.x found at ${LLVM_CONFIG_EXE} does not support linking as a shared library")
+          list(APPEND LLVM_CONFIG_ERROR_MESSAGES "LLVM 22.x found at ${LLVM_CONFIG_EXE} does not support linking as a shared library")
         else()
-          list(APPEND LLVM_CONFIG_ERROR_MESSAGES "LLVM 21.x found at ${LLVM_CONFIG_EXE} does not support linking as a static library")
+          list(APPEND LLVM_CONFIG_ERROR_MESSAGES "LLVM 22.x found at ${LLVM_CONFIG_EXE} does not support linking as a static library")
         endif()
 
         # Ignore this directory and try the search again
@@ -321,11 +321,12 @@ else()
   FIND_AND_ADD_LLVM_LIB(LLVMObjCopy)
   FIND_AND_ADD_LLVM_LIB(LLVMMCA)
   FIND_AND_ADD_LLVM_LIB(LLVMMCDisassembler)
+  FIND_AND_ADD_LLVM_LIB(LLVMDTLTO)
   FIND_AND_ADD_LLVM_LIB(LLVMLTO)
   FIND_AND_ADD_LLVM_LIB(LLVMFrontendOpenACC)
-  FIND_AND_ADD_LLVM_LIB(LLVMFrontendHLSL)
   FIND_AND_ADD_LLVM_LIB(LLVMFrontendDriver)
   FIND_AND_ADD_LLVM_LIB(LLVMExtensions)
+  FIND_AND_ADD_LLVM_LIB(LLVMPlugins)
   FIND_AND_ADD_LLVM_LIB(LLVMPasses)
   FIND_AND_ADD_LLVM_LIB(LLVMHipStdPar)
   FIND_AND_ADD_LLVM_LIB(LLVMCoroutines)
@@ -352,6 +353,7 @@ else()
   FIND_AND_ADD_LLVM_LIB(LLVMObjCARCOpts)
   FIND_AND_ADD_LLVM_LIB(LLVMCodeGenTypes)
   FIND_AND_ADD_LLVM_LIB(LLVMCGData)
+  FIND_AND_ADD_LLVM_LIB(LLVMCAS)
   FIND_AND_ADD_LLVM_LIB(LLVMIRPrinter)
   FIND_AND_ADD_LLVM_LIB(LLVMInterfaceStub)
   FIND_AND_ADD_LLVM_LIB(LLVMFileCheck)
@@ -370,15 +372,17 @@ else()
   FIND_AND_ADD_LLVM_LIB(LLVMDebugInfoCodeView)
   FIND_AND_ADD_LLVM_LIB(LLVMDebugInfoGSYM)
   FIND_AND_ADD_LLVM_LIB(LLVMDebugInfoDWARF)
-  FIND_AND_ADD_LLVM_LIB(LLVMDebugInfoDWARFLowLevel)
   FIND_AND_ADD_LLVM_LIB(LLVMObject)
   FIND_AND_ADD_LLVM_LIB(LLVMTextAPI)
   FIND_AND_ADD_LLVM_LIB(LLVMMCParser)
   FIND_AND_ADD_LLVM_LIB(LLVMIRReader)
   FIND_AND_ADD_LLVM_LIB(LLVMAsmParser)
   FIND_AND_ADD_LLVM_LIB(LLVMMC)
+  FIND_AND_ADD_LLVM_LIB(LLVMDebugInfoDWARFLowLevel)
   FIND_AND_ADD_LLVM_LIB(LLVMBitReader)
+  FIND_AND_ADD_LLVM_LIB(LLVMFrontendHLSL)
   FIND_AND_ADD_LLVM_LIB(LLVMFuzzerCLI)
+  FIND_AND_ADD_LLVM_LIB(LLVMABI)
   FIND_AND_ADD_LLVM_LIB(LLVMCore)
   FIND_AND_ADD_LLVM_LIB(LLVMRemarks)
   FIND_AND_ADD_LLVM_LIB(LLVMBitstreamReader)
diff --git a/lib/compiler/aro/aro/Compilation.zig b/lib/compiler/aro/aro/Compilation.zig
index e4d2f5e467..bf3670a033 100644
--- a/lib/compiler/aro/aro/Compilation.zig
+++ b/lib/compiler/aro/aro/Compilation.zig
@@ -498,7 +498,6 @@ fn generateSystemDefines(comp: *Compilation, w: *Io.Writer) !void {
                 .{ .fma, "__FMA__" },
                 .{ .f16c, "__F16C__" },
                 .{ .gfni, "__GFNI__" },
-                .{ .evex512, "__EVEX512__" },
 
                 .{ .avx10_1, "__AVX10_1__" },
                 .{ .avx10_1, "__AVX10_1_512__" },
@@ -560,7 +559,6 @@ fn generateSystemDefines(comp: *Compilation, w: *Io.Writer) !void {
                 .{ .amx_complex, "__AMX_COMPLEX__" },
                 .{ .amx_fp8, "__AMX_FP8__" },
                 .{ .amx_movrs, "__AMX_MOVRS__" },
-                .{ .amx_transpose, "__AMX_TRANSPOSE__" },
                 .{ .amx_avx512, "__AMX_AVX512__" },
                 .{ .amx_tf32, "__AMX_TF32__" },
                 .{ .cmpccxadd, "__CMPCCXADD__" },
@@ -798,7 +796,6 @@ fn generateSystemDefines(comp: *Compilation, w: *Io.Writer) !void {
                 .{ .fullfp16, "FP16_SCALAR_ARITHMETIC" },
                 .{ .dotprod, "DOTPROD" },
                 .{ .mte, "MEMORY_TAGGING" },
-                .{ .tme, "TME" },
                 .{ .i8mm, "MATMUL_INT8" },
                 .{ .lse, "ATOMICS" },
                 .{ .f64mm, "SVE_MATMUL_FP64" },
diff --git a/lib/compiler_rt.zig b/lib/compiler_rt.zig
index 501d35f0a2..9e62f047bb 100644
--- a/lib/compiler_rt.zig
+++ b/lib/compiler_rt.zig
@@ -447,29 +447,8 @@ pub const gnu_f16_abi = switch (builtin.cpu.arch) {
 
 pub const want_sparc_abi = builtin.cpu.arch.isSPARC();
 
-/// This seems to mostly correspond to `clang::TargetInfo::HasFloat16`.
 pub fn F16T(comptime OtherType: type) type {
     return switch (builtin.cpu.arch) {
-        .amdgcn,
-        .arm,
-        .armeb,
-        .thumb,
-        .thumbeb,
-        .aarch64,
-        .aarch64_be,
-        .hexagon,
-        .loongarch32,
-        .loongarch64,
-        .nvptx,
-        .nvptx64,
-        .riscv32,
-        .riscv32be,
-        .riscv64,
-        .riscv64be,
-        .s390x,
-        .spirv32,
-        .spirv64,
-        => f16,
         .x86, .x86_64 => if (builtin.target.os.tag.isDarwin()) switch (OtherType) {
             // Starting with LLVM 16, Darwin uses different abi for f16
             // depending on the type of the other return/argument..???
@@ -477,7 +456,7 @@ pub fn F16T(comptime OtherType: type) type {
             f80, f128 => f16,
             else => unreachable,
         } else f16,
-        else => u16,
+        else => f16,
     };
 }
 
diff --git a/lib/compiler_rt/trunctfhf2.zig b/lib/compiler_rt/trunctfhf2.zig
index 46c6e34ec9..5af87f9c12 100644
--- a/lib/compiler_rt/trunctfhf2.zig
+++ b/lib/compiler_rt/trunctfhf2.zig
@@ -4,6 +4,9 @@ const truncf = @import("./truncf.zig").truncf;
 
 comptime {
     symbol(&__trunctfhf2, "__trunctfhf2");
+    if (compiler_rt.want_ppc_abi) {
+        symbol(&__trunctfhf2, "__trunckfhf2");
+    }
 }
 
 pub fn __trunctfhf2(a: f128) callconv(.c) compiler_rt.F16T(f128) {
diff --git a/lib/include/__clang_spirv_builtins.h b/lib/include/__clang_spirv_builtins.h
index 9915cdfcae..9c7215f506 100644
--- a/lib/include/__clang_spirv_builtins.h
+++ b/lib/include/__clang_spirv_builtins.h
@@ -52,30 +52,30 @@
 // Builtin IDs and sizes
 
 extern __SPIRV_BUILTIN_ALIAS(__builtin_spirv_num_workgroups) __size_t
-    __spirv_NumWorkgroups(int);
+    __spirv_BuiltInNumWorkgroups(int);
 extern __SPIRV_BUILTIN_ALIAS(__builtin_spirv_workgroup_size) __size_t
-    __spirv_WorkgroupSize(int);
+    __spirv_BuiltInWorkgroupSize(int);
 extern __SPIRV_BUILTIN_ALIAS(__builtin_spirv_workgroup_id) __size_t
-    __spirv_WorkgroupId(int);
+    __spirv_BuiltInWorkgroupId(int);
 extern __SPIRV_BUILTIN_ALIAS(__builtin_spirv_local_invocation_id) __size_t
-    __spirv_LocalInvocationId(int);
+    __spirv_BuiltInLocalInvocationId(int);
 extern __SPIRV_BUILTIN_ALIAS(__builtin_spirv_global_invocation_id) __size_t
-    __spirv_GlobalInvocationId(int);
+    __spirv_BuiltInGlobalInvocationId(int);
 
 extern __SPIRV_BUILTIN_ALIAS(__builtin_spirv_global_size) __size_t
-    __spirv_GlobalSize(int);
+    __spirv_BuiltInGlobalSize(int);
 extern __SPIRV_BUILTIN_ALIAS(__builtin_spirv_global_offset) __size_t
-    __spirv_GlobalOffset(int);
+    __spirv_BuiltInGlobalOffset(int);
 extern __SPIRV_BUILTIN_ALIAS(__builtin_spirv_subgroup_size) __uint32_t
-    __spirv_SubgroupSize();
+    __spirv_BuiltInSubgroupSize();
 extern __SPIRV_BUILTIN_ALIAS(__builtin_spirv_subgroup_max_size) __uint32_t
-    __spirv_SubgroupMaxSize();
+    __spirv_BuiltInSubgroupMaxSize();
 extern __SPIRV_BUILTIN_ALIAS(__builtin_spirv_num_subgroups) __uint32_t
-    __spirv_NumSubgroups();
+    __spirv_BuiltInNumSubgroups();
 extern __SPIRV_BUILTIN_ALIAS(__builtin_spirv_subgroup_id) __uint32_t
-    __spirv_SubgroupId();
+    __spirv_BuiltInSubgroupId();
 extern __SPIRV_BUILTIN_ALIAS(__builtin_spirv_subgroup_local_invocation_id)
-    __uint32_t __spirv_SubgroupLocalInvocationId();
+    __uint32_t __spirv_BuiltInSubgroupLocalInvocationId();
 
 // OpGenericCastToPtrExplicit
 
diff --git a/lib/include/__float_float.h b/lib/include/__float_float.h
new file mode 100644
index 0000000000..267c0721a7
--- /dev/null
+++ b/lib/include/__float_float.h
@@ -0,0 +1,176 @@
+/*===---- __float_float.h --------------------------------------------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __CLANG_FLOAT_FLOAT_H
+#define __CLANG_FLOAT_FLOAT_H
+
+#if (defined(__MINGW32__) || defined(_MSC_VER) || defined(_AIX)) &&            \
+    __STDC_HOSTED__
+
+/* Undefine anything that we'll be redefining below. */
+#  undef FLT_EVAL_METHOD
+#  undef FLT_ROUNDS
+#  undef FLT_RADIX
+#  undef FLT_MANT_DIG
+#  undef DBL_MANT_DIG
+#  undef LDBL_MANT_DIG
+#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) ||              \
+    !defined(__STRICT_ANSI__) ||                                               \
+    (defined(__cplusplus) && __cplusplus >= 201103L) ||                        \
+    (__STDC_HOSTED__ && defined(_AIX) && defined(_ALL_SOURCE))
+#    undef DECIMAL_DIG
+#  endif
+#  undef FLT_DIG
+#  undef DBL_DIG
+#  undef LDBL_DIG
+#  undef FLT_MIN_EXP
+#  undef DBL_MIN_EXP
+#  undef LDBL_MIN_EXP
+#  undef FLT_MIN_10_EXP
+#  undef DBL_MIN_10_EXP
+#  undef LDBL_MIN_10_EXP
+#  undef FLT_MAX_EXP
+#  undef DBL_MAX_EXP
+#  undef LDBL_MAX_EXP
+#  undef FLT_MAX_10_EXP
+#  undef DBL_MAX_10_EXP
+#  undef LDBL_MAX_10_EXP
+#  undef FLT_MAX
+#  undef DBL_MAX
+#  undef LDBL_MAX
+#  undef FLT_EPSILON
+#  undef DBL_EPSILON
+#  undef LDBL_EPSILON
+#  undef FLT_MIN
+#  undef DBL_MIN
+#  undef LDBL_MIN
+#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) ||              \
+    !defined(__STRICT_ANSI__) ||                                               \
+    (defined(__cplusplus) && __cplusplus >= 201703L) ||                        \
+    (__STDC_HOSTED__ && defined(_AIX) && defined(_ALL_SOURCE))
+#    undef FLT_TRUE_MIN
+#    undef DBL_TRUE_MIN
+#    undef LDBL_TRUE_MIN
+#    undef FLT_DECIMAL_DIG
+#    undef DBL_DECIMAL_DIG
+#    undef LDBL_DECIMAL_DIG
+#    undef FLT_HAS_SUBNORM
+#    undef DBL_HAS_SUBNORM
+#    undef LDBL_HAS_SUBNORM
+#  endif
+#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L) ||              \
+    !defined(__STRICT_ANSI__)
+#    undef FLT_NORM_MAX
+#    undef DBL_NORM_MAX
+#    undef LDBL_NORM_MAX
+#endif
+#endif
+
+#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L) ||              \
+    !defined(__STRICT_ANSI__)
+#  undef FLT_SNAN
+#  undef DBL_SNAN
+#  undef LDBL_SNAN
+#endif
+
+/* Characteristics of floating point types, C99 5.2.4.2.2 */
+
+#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) ||              \
+    (defined(__cplusplus) && __cplusplus >= 201103L)
+#define FLT_EVAL_METHOD __FLT_EVAL_METHOD__
+#endif
+#define FLT_ROUNDS (__builtin_flt_rounds())
+#define FLT_RADIX __FLT_RADIX__
+
+#define FLT_MANT_DIG __FLT_MANT_DIG__
+#define DBL_MANT_DIG __DBL_MANT_DIG__
+#define LDBL_MANT_DIG __LDBL_MANT_DIG__
+
+#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) ||              \
+    !defined(__STRICT_ANSI__) ||                                               \
+    (defined(__cplusplus) && __cplusplus >= 201103L) ||                        \
+    (__STDC_HOSTED__ && defined(_AIX) && defined(_ALL_SOURCE))
+#  define DECIMAL_DIG __DECIMAL_DIG__
+#endif
+
+#define FLT_DIG __FLT_DIG__
+#define DBL_DIG __DBL_DIG__
+#define LDBL_DIG __LDBL_DIG__
+
+#define FLT_MIN_EXP __FLT_MIN_EXP__
+#define DBL_MIN_EXP __DBL_MIN_EXP__
+#define LDBL_MIN_EXP __LDBL_MIN_EXP__
+
+#define FLT_MIN_10_EXP __FLT_MIN_10_EXP__
+#define DBL_MIN_10_EXP __DBL_MIN_10_EXP__
+#define LDBL_MIN_10_EXP __LDBL_MIN_10_EXP__
+
+#define FLT_MAX_EXP __FLT_MAX_EXP__
+#define DBL_MAX_EXP __DBL_MAX_EXP__
+#define LDBL_MAX_EXP __LDBL_MAX_EXP__
+
+#define FLT_MAX_10_EXP __FLT_MAX_10_EXP__
+#define DBL_MAX_10_EXP __DBL_MAX_10_EXP__
+#define LDBL_MAX_10_EXP __LDBL_MAX_10_EXP__
+
+#define FLT_MAX __FLT_MAX__
+#define DBL_MAX __DBL_MAX__
+#define LDBL_MAX __LDBL_MAX__
+
+#define FLT_EPSILON __FLT_EPSILON__
+#define DBL_EPSILON __DBL_EPSILON__
+#define LDBL_EPSILON __LDBL_EPSILON__
+
+#define FLT_MIN __FLT_MIN__
+#define DBL_MIN __DBL_MIN__
+#define LDBL_MIN __LDBL_MIN__
+
+#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) ||              \
+    !defined(__STRICT_ANSI__) ||                                               \
+    (defined(__cplusplus) && __cplusplus >= 201703L) ||                        \
+    (__STDC_HOSTED__ && defined(_AIX) && defined(_ALL_SOURCE))
+#  define FLT_TRUE_MIN __FLT_DENORM_MIN__
+#  define DBL_TRUE_MIN __DBL_DENORM_MIN__
+#  define LDBL_TRUE_MIN __LDBL_DENORM_MIN__
+#  define FLT_DECIMAL_DIG __FLT_DECIMAL_DIG__
+#  define DBL_DECIMAL_DIG __DBL_DECIMAL_DIG__
+#  define LDBL_DECIMAL_DIG __LDBL_DECIMAL_DIG__
+#  define FLT_HAS_SUBNORM __FLT_HAS_DENORM__
+#  define DBL_HAS_SUBNORM __DBL_HAS_DENORM__
+#  define LDBL_HAS_SUBNORM __LDBL_HAS_DENORM__
+#endif
+
+#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L) ||              \
+    !defined(__STRICT_ANSI__)
+   /* C23 5.2.5.3.2p28 */
+#  define FLT_SNAN (__builtin_nansf(""))
+#  define DBL_SNAN (__builtin_nans(""))
+#  define LDBL_SNAN (__builtin_nansl(""))
+
+   /* C23 5.2.5.3.3p32 */
+#  define FLT_NORM_MAX __FLT_NORM_MAX__
+#  define DBL_NORM_MAX __DBL_NORM_MAX__
+#  define LDBL_NORM_MAX __LDBL_NORM_MAX__
+#endif
+
+#ifdef __STDC_WANT_IEC_60559_TYPES_EXT__
+#  define FLT16_MANT_DIG    __FLT16_MANT_DIG__
+#  define FLT16_DECIMAL_DIG __FLT16_DECIMAL_DIG__
+#  define FLT16_DIG         __FLT16_DIG__
+#  define FLT16_MIN_EXP     __FLT16_MIN_EXP__
+#  define FLT16_MIN_10_EXP  __FLT16_MIN_10_EXP__
+#  define FLT16_MAX_EXP     __FLT16_MAX_EXP__
+#  define FLT16_MAX_10_EXP  __FLT16_MAX_10_EXP__
+#  define FLT16_MAX         __FLT16_MAX__
+#  define FLT16_EPSILON     __FLT16_EPSILON__
+#  define FLT16_MIN         __FLT16_MIN__
+#  define FLT16_TRUE_MIN    __FLT16_TRUE_MIN__
+#endif /* __STDC_WANT_IEC_60559_TYPES_EXT__ */
+
+#endif /* __CLANG_FLOAT_FLOAT_H */
diff --git a/lib/include/__float_header_macro.h b/lib/include/__float_header_macro.h
new file mode 100644
index 0000000000..11b270e90d
--- /dev/null
+++ b/lib/include/__float_header_macro.h
@@ -0,0 +1,12 @@
+/*===---- __float_header_macro.h -------------------------------------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __CLANG_FLOAT_H
+#define __CLANG_FLOAT_H
+#endif /* __CLANG_FLOAT_H */
diff --git a/lib/include/__float_infinity_nan.h b/lib/include/__float_infinity_nan.h
new file mode 100644
index 0000000000..7e253d0bc5
--- /dev/null
+++ b/lib/include/__float_infinity_nan.h
@@ -0,0 +1,20 @@
+/*===---- __float_infinity_nan.h -------------------------------------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __CLANG_FLOAT_INFINITY_NAN_H
+#define __CLANG_FLOAT_INFINITY_NAN_H
+
+/* C23 5.2.5.3.3p29-30 */
+#undef INFINITY
+#undef NAN
+
+#define INFINITY (__builtin_inff())
+#define NAN (__builtin_nanf(""))
+
+#endif /* __CLANG_FLOAT_INFINITY_NAN_H */
diff --git a/lib/include/amo.h b/lib/include/amo.h
new file mode 100644
index 0000000000..97eff35e9c
--- /dev/null
+++ b/lib/include/amo.h
@@ -0,0 +1,131 @@
+/*===---- amo.h - PowerPC Atomic Memory Operations ------------------------===*\
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+\*===----------------------------------------------------------------------===*/
+
+/* This header provides compatibility for GCC's AMO functions.
+ * The functions here call Clang's underlying AMO builtins.
+ */
+
+#ifndef _AMO_H
+#define _AMO_H
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* AMO Load Operation Codes (FC values) */
+enum {
+  _AMO_LD_ADD = 0x00,  /* Fetch and Add */
+  _AMO_LD_XOR = 0x01,  /* Fetch and XOR */
+  _AMO_LD_IOR = 0x02,  /* Fetch and OR */
+  _AMO_LD_AND = 0x03,  /* Fetch and AND */
+  _AMO_LD_UMAX = 0x04, /* Fetch and Maximum Unsigned */
+  _AMO_LD_SMAX = 0x05, /* Fetch and Maximum Signed */
+  _AMO_LD_UMIN = 0x06, /* Fetch and Minimum Unsigned */
+  _AMO_LD_SMIN = 0x07, /* Fetch and Minimum Signed */
+  _AMO_LD_SWAP = 0x08  /* Swap */
+};
+
+/* 32-bit unsigned AMO load operations */
+static inline uint32_t amo_lwat_add(uint32_t *ptr, uint32_t val) {
+  return __builtin_amo_lwat(ptr, val, _AMO_LD_ADD);
+}
+
+static inline uint32_t amo_lwat_xor(uint32_t *ptr, uint32_t val) {
+  return __builtin_amo_lwat(ptr, val, _AMO_LD_XOR);
+}
+
+static inline uint32_t amo_lwat_ior(uint32_t *ptr, uint32_t val) {
+  return __builtin_amo_lwat(ptr, val, _AMO_LD_IOR);
+}
+
+static inline uint32_t amo_lwat_and(uint32_t *ptr, uint32_t val) {
+  return __builtin_amo_lwat(ptr, val, _AMO_LD_AND);
+}
+
+static inline uint32_t amo_lwat_umax(uint32_t *ptr, uint32_t val) {
+  return __builtin_amo_lwat(ptr, val, _AMO_LD_UMAX);
+}
+
+static inline uint32_t amo_lwat_umin(uint32_t *ptr, uint32_t val) {
+  return __builtin_amo_lwat(ptr, val, _AMO_LD_UMIN);
+}
+
+static inline uint32_t amo_lwat_swap(uint32_t *ptr, uint32_t val) {
+  return __builtin_amo_lwat(ptr, val, _AMO_LD_SWAP);
+}
+
+/* 32-bit signed AMO load operations */
+static inline int32_t amo_lwat_sadd(int32_t *ptr, int32_t val) {
+  return __builtin_amo_lwat_s(ptr, val, _AMO_LD_ADD);
+}
+
+static inline int32_t amo_lwat_smax(int32_t *ptr, int32_t val) {
+  return __builtin_amo_lwat_s(ptr, val, _AMO_LD_SMAX);
+}
+
+static inline int32_t amo_lwat_smin(int32_t *ptr, int32_t val) {
+  return __builtin_amo_lwat_s(ptr, val, _AMO_LD_SMIN);
+}
+
+static inline int32_t amo_lwat_sswap(int32_t *ptr, int32_t val) {
+  return __builtin_amo_lwat_s(ptr, val, _AMO_LD_SWAP);
+}
+
+/* 64-bit unsigned AMO load operations */
+static inline uint64_t amo_ldat_add(uint64_t *ptr, uint64_t val) {
+  return __builtin_amo_ldat(ptr, val, _AMO_LD_ADD);
+}
+
+static inline uint64_t amo_ldat_xor(uint64_t *ptr, uint64_t val) {
+  return __builtin_amo_ldat(ptr, val, _AMO_LD_XOR);
+}
+
+static inline uint64_t amo_ldat_ior(uint64_t *ptr, uint64_t val) {
+  return __builtin_amo_ldat(ptr, val, _AMO_LD_IOR);
+}
+
+static inline uint64_t amo_ldat_and(uint64_t *ptr, uint64_t val) {
+  return __builtin_amo_ldat(ptr, val, _AMO_LD_AND);
+}
+
+static inline uint64_t amo_ldat_umax(uint64_t *ptr, uint64_t val) {
+  return __builtin_amo_ldat(ptr, val, _AMO_LD_UMAX);
+}
+
+static inline uint64_t amo_ldat_umin(uint64_t *ptr, uint64_t val) {
+  return __builtin_amo_ldat(ptr, val, _AMO_LD_UMIN);
+}
+
+static inline uint64_t amo_ldat_swap(uint64_t *ptr, uint64_t val) {
+  return __builtin_amo_ldat(ptr, val, _AMO_LD_SWAP);
+}
+
+/* 64-bit signed AMO load operations */
+static inline int64_t amo_ldat_sadd(int64_t *ptr, int64_t val) {
+  return __builtin_amo_ldat_s(ptr, val, _AMO_LD_ADD);
+}
+
+static inline int64_t amo_ldat_smax(int64_t *ptr, int64_t val) {
+  return __builtin_amo_ldat_s(ptr, val, _AMO_LD_SMAX);
+}
+
+static inline int64_t amo_ldat_smin(int64_t *ptr, int64_t val) {
+  return __builtin_amo_ldat_s(ptr, val, _AMO_LD_SMIN);
+}
+
+static inline int64_t amo_ldat_sswap(int64_t *ptr, int64_t val) {
+  return __builtin_amo_ldat_s(ptr, val, _AMO_LD_SWAP);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _AMO_H */
diff --git a/lib/include/amxavx512intrin.h b/lib/include/amxavx512intrin.h
index bbde44fc26..18ef721cd1 100644
--- a/lib/include/amxavx512intrin.h
+++ b/lib/include/amxavx512intrin.h
@@ -16,7 +16,7 @@
 
 #define __DEFAULT_FN_ATTRS_AVX512                                              \
   __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("amx-avx512,avx10.2-512")))
+                 __target__("amx-avx512,avx10.2"), __min_vector_width__(512)))
 
 /// Moves a row from a tile register to a zmm destination register, converting
 ///    the int32 source elements to fp32. The row of the tile is selected by a
@@ -52,6 +52,40 @@
 ///    The row of the source tile
 #define _tile_cvtrowd2ps(tsrc, row) __builtin_ia32_tcvtrowd2ps(tsrc, row)
 
+/// Moves a row from a tile register to a zmm destination register, converting
+///    the int32 source elements to fp32. The row of the tile is selected by a
+///    8b immediate value.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m512i _tile_cvtrowd2psi(__tile tsrc, const unsigned int imm8);
+/// \endcode
+///
+/// \code{.operation}
+/// VL := 512
+/// VL_bytes := VL >> 3
+/// row_index := imm8 & 0x3f
+/// row_chunk := (imm8 >> 6) * VL_bytes
+/// FOR i := 0 TO (VL_bytes / 4) - 1
+///     IF i + row_chunk / 4 >= tsrc.colsb / 4
+///         dst.dword[i] := 0
+///     ELSE
+///         dst.f32[i] := CONVERT_INT32_TO_FP32(tsrc.row[row_index].dword[row_chunk/4+i], RNE)
+///     FI
+/// ENDFOR
+/// dst[MAX_VL-1:VL] := 0
+/// zero_tileconfig_start()
+/// \endcode
+///
+/// This intrinsic corresponds to the \c TCVTROWD2PS instruction.
+///
+/// \param tsrc
+///    The source tile. Max size is 1024 Bytes.
+/// \param imm8
+///    The row of the source tile
+#define _tile_cvtrowd2psi(tsrc, imm8) __builtin_ia32_tcvtrowd2psi(tsrc, imm8)
+
 /// Moves a row from a tile register to a zmm destination register, converting
 ///    the fp32 source elements to bf16. It places the resulting bf16 elements
 ///    in the high 16 bits within each dword. The row of the tile is selected
@@ -89,6 +123,43 @@
 #define _tile_cvtrowps2bf16h(tsrc, row)                                        \
   __builtin_ia32_tcvtrowps2bf16h(tsrc, row)
 
+/// Moves a row from a tile register to a zmm destination register, converting
+///    the fp32 source elements to bf16. It places the resulting bf16 elements
+///    in the high 16 bits within each dword. The row of the tile is selected
+///    by a 8b immediate value.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m512i _tile_cvtrowps2bf16hi(__tile tsrc, const unsigned int imm8);
+/// \endcode
+///
+/// \code{.operation}
+/// VL := 512
+/// VL_bytes := VL >> 3
+/// row_index := imm8 & 0x3f
+/// row_chunk := (imm8 >> 6) * VL_bytes
+/// FOR i := 0 TO (VL_bytes / 4) - 1
+///     IF i + row_chunk / 4 >= tsrc.colsb / 4
+///         dst.dword[i] := 0
+///     ELSE
+///         dst.word[2*i+0] := 0
+///         dst.bf16[2*i+1] := CONVERT_FP32_TO_BF16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE)
+///     FI
+/// ENDFOR
+/// dst[MAX_VL-1:VL] := 0
+/// zero_tileconfig_start()
+/// \endcode
+///
+/// This intrinsic corresponds to the \c TCVTROWPS2BF16H instruction.
+///
+/// \param tsrc
+///    The source tile. Max size is 1024 Bytes.
+/// \param imm8
+///    The the row of the source tile.
+#define _tile_cvtrowps2bf16hi(tsrc, imm8)                                      \
+  __builtin_ia32_tcvtrowps2bf16hi(tsrc, imm8)
+
 /// Moves a row from a tile register to a zmm destination register, converting
 ///    the fp32 source elements to bf16. It places the resulting bf16 elements
 ///    in the low 16 bits within each dword. The row of the tile is selected
@@ -126,6 +197,43 @@
 #define _tile_cvtrowps2bf16l(tsrc, row)                                        \
   __builtin_ia32_tcvtrowps2bf16l(tsrc, row)
 
+/// Moves a row from a tile register to a zmm destination register, converting
+///    the fp32 source elements to bf16. It places the resulting bf16 elements
+///    in the low 16 bits within each dword. The row of the tile is selected
+///    by a 8b immediate value.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m512i _tile_cvtrowps2bf16li(__tile tsrc, const unsigned int imm8);
+/// \endcode
+///
+/// \code{.operation}
+/// VL := 512
+/// VL_bytes := VL >> 3
+/// row_index := imm8 & 0x3f
+/// row_chunk := (imm8 >> 6) * VL_bytes
+/// FOR i := 0 TO (VL_bytes / 4) - 1
+///     IF i + row_chunk / 4 >= tsrc.colsb / 4
+///         dst.dword[i] := 0
+///     ELSE
+///         dst.word[2*i+1] := 0
+///         dst.bf16[2*i+0] := CONVERT_FP32_TO_BF16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE)
+///     FI
+/// ENDFOR
+/// dst[MAX_VL-1:VL] := 0
+/// zero_tileconfig_start()
+/// \endcode
+///
+/// This intrinsic corresponds to the \c TCVTROWPS2BF16L instruction.
+///
+/// \param tsrc
+///    The source tile. Max size is 1024 Bytes.
+/// \param imm8
+///    The the row of the source tile.
+#define _tile_cvtrowps2bf16li(tsrc, imm8)                                      \
+  __builtin_ia32_tcvtrowps2bf16li(tsrc, imm8)
+
 /// Moves a row from a tile register to a zmm destination register, converting
 ///    the fp32 source elements to fp16. It places the resulting fp16 elements
 ///    in the high 16 bits within each dword. The row of the tile is selected
@@ -162,6 +270,43 @@
 ///    The the row of the source tile.
 #define _tile_cvtrowps2phh(tsrc, row) __builtin_ia32_tcvtrowps2phh(tsrc, row)
 
+/// Moves a row from a tile register to a zmm destination register, converting
+///    the fp32 source elements to fp16. It places the resulting fp16 elements
+///    in the high 16 bits within each dword. The row of the tile is selected
+///    by a 8b immediate value.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m512i _tile_cvtrowps2phhi(__tile tsrc, constunsigned int imm8);
+/// \endcode
+///
+/// \code{.operation}
+/// VL := 512
+/// VL_bytes := VL >> 3
+/// row_index := imm8 & 0x3f
+/// row_chunk := (imm8 >> 6) * VL_bytes
+/// FOR i := 0 TO (VL_bytes / 4) - 1
+///     IF i + row_chunk / 4 >= tsrc.colsb / 4
+///         dst.dword[i] := 0
+///     ELSE
+///         dst.word[2*i+0] := 0
+///         dst.fp16[2*i+1] := CONVERT_FP32_TO_FP16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE)
+///     FI
+/// ENDFOR
+/// dst[MAX_VL-1:VL] := 0
+/// zero_tileconfig_start()
+/// \endcode
+///
+/// This intrinsic corresponds to the \c TCVTROWPS2PHH instruction.
+///
+/// \param tsrc
+///    The source tile. Max size is 1024 Bytes.
+/// \param imm8
+///    The the row of the source tile.
+#define _tile_cvtrowps2phhi(tsrc, imm8)                                        \
+  __builtin_ia32_tcvtrowps2phhi(tsrc, imm8)
+
 /// Moves a row from a tile register to a zmm destination register, converting
 ///    the fp32 source elements to fp16. It places the resulting fp16 elements
 ///    in the low 16 bits within each dword. The row of the tile is selected
@@ -198,6 +343,43 @@
 ///    The the row of the source tile.
 #define _tile_cvtrowps2phl(tsrc, row) __builtin_ia32_tcvtrowps2phl(tsrc, row)
 
+/// Moves a row from a tile register to a zmm destination register, converting
+///    the fp32 source elements to fp16. It places the resulting fp16 elements
+///    in the low 16 bits within each dword. The row of the tile is selected
+///    by a 8b immediate value.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m512i _tile_cvtrowps2phli(__tile tsrc, const unsigned int imm8);
+/// \endcode
+///
+/// \code{.operation}
+/// VL := 512
+/// VL_bytes := VL >> 3
+/// row_index := imm8 & 0x3f
+/// row_chunk := (imm8 >> 6) * VL_bytes
+/// FOR i := 0 TO (VL_bytes / 4) - 1
+///     IF i + row_chunk / 4 >= tsrc.colsb / 4
+///         dst.dword[i] := 0
+///     ELSE
+///         dst.word[2*i+1] := 0
+///         dst.fp16[2*i+0] := CONVERT_FP32_TO_FP16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE)
+///     FI
+/// ENDFOR
+/// dst[MAX_VL-1:VL] := 0
+/// zero_tileconfig_start()
+/// \endcode
+///
+/// This intrinsic corresponds to the \c TCVTROWPS2PHL instruction.
+///
+/// \param tsrc
+///    The source tile. Max size is 1024 Bytes.
+/// \param imm8
+///    The the row of the source tile.
+#define _tile_cvtrowps2phli(tsrc, imm8)                                        \
+  __builtin_ia32_tcvtrowps2phli(tsrc, imm8)
+
 /// Move one row of a tile data to a v16f32 data.
 /// The row of the tile is selected by a 32b GPR.
 ///
@@ -230,6 +412,38 @@
 /// \endcode
 #define _tile_movrow(a, b) ((__m512i)__builtin_ia32_tilemovrow(a, b))
 
+/// Move one row of a tile data to a v16f32 data.
+/// The row of the tile is selected by a 8b immediate value.
+///
+/// \headerfile <immintrin.h>
+///
+/// \code
+/// __m512 _tile_movrowi(__tile a, const unsigned b);
+/// \endcode
+///
+/// This intrinsic corresponds to the <c> TILEMOVROW </c> instruction.
+///
+/// \param a
+///     The 1st source tile. Max size is 1024 Bytes.
+/// \param b
+///     The 2nd source r32. Size is 4 Bytes.
+/// \returns
+///     The destination v16f32 data. Size is 64 Bytes.
+///
+/// \code{.operation}
+/// VL := 512
+/// VL_bytes := VL>>3
+/// row_index := b&0x3f
+/// row_chunk := (b>>6) * VL_bytes
+/// FOR i := 0 TO (VL_bytes-1)
+///     IF (row_chunk + i >= a.colsb)
+///             dst.byte[i] := 0
+///     ELSE
+///             dst.byte[i] := a.row[row_index].byte[row_chunk+i]
+/// ENDFOR
+/// \endcode
+#define _tile_movrowi(a, b) ((__m512i)__builtin_ia32_tilemovrowi(a, b))
+
 /// This is internal intrinsic. C/C++ user should avoid calling it directly.
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowd2ps_internal(
diff --git a/lib/include/amxbf16transposeintrin.h b/lib/include/amxbf16transposeintrin.h
deleted file mode 100644
index 86f09f2ad8..0000000000
--- a/lib/include/amxbf16transposeintrin.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*===----- amxbf16transposeintrin.h - AMX-BF16 and AMX-TRANSPOSE ------------===
- *
- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- * See https://llvm.org/LICENSE.txt for license information.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- *
- *===------------------------------------------------------------------------===
- */
-
-#ifndef __IMMINTRIN_H
-#error                                                                         \
-    "Never use <amxbf16transposeintrin.h> directly; use <immintrin.h> instead."
-#endif /* __IMMINTRIN_H */
-
-#ifndef __AMX_BF16TRANSPOSEINTRIN_H
-#define __AMX_BF16TRANSPOSEINTRIN_H
-#ifdef __x86_64__
-
-/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS                                                     \
-  __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("amx-bf16,amx-transpose")))
-
-/// Compute transpose and dot-product of BF16 (16-bit) floating-point pairs in
-///    tiles \a a and \a b, accumulating the intermediate single-precision
-///    (32-bit) floating-point elements with elements in \a dst, and store the
-///    32-bit result back to tile \a dst.
-///
-/// \headerfile <immintrin.h>
-///
-/// \code
-/// void _tile_tdpbf16ps (__tile dst, __tile a, __tile b)
-/// \endcode
-///
-/// \code{.operation}
-/// FOR m := 0 TO dst.rows - 1
-///	tmp := dst.row[m]
-///	FOR k := 0 TO (a.colsb / 4) - 1
-///		FOR n := 0 TO (dst.colsb / 4) - 1
-///			tmp.bf32[n] += FP32(a.row[m].bf16[2*k+0]) *
-///					FP32(b.row[k].bf16[2*n+0])
-///			tmp.bf32[n] += FP32(a.row[m].bf16[2*k+1]) *
-///					FP32(b.row[k].bf16[2*n+1])
-///		ENDFOR
-///	ENDFOR
-///	write_row_and_zero(dst, m, tmp, dst.colsb)
-/// ENDFOR
-/// zero_upper_rows(dst, dst.rows)
-/// zero_tileconfig_start()
-/// \endcode
-///
-/// This intrinsic corresponds to the \c TTDPBF16PS instruction.
-///
-/// \param dst
-///    The destination tile. Max size is 1024 Bytes.
-/// \param a
-///    The 1st source tile. Max size is 1024 Bytes.
-/// \param b
-///    The 2nd source tile. Max size is 1024 Bytes.
-#define _tile_tdpbf16ps(dst, a, b) __builtin_ia32_ttdpbf16ps((dst), (a), (b))
-
-/// This is internal intrinsic. C/C++ user should avoid calling it directly.
-static __inline__ _tile1024i __DEFAULT_FN_ATTRS
-_tile_tdpbf16ps_internal(unsigned short m, unsigned short n, unsigned short k,
-                         _tile1024i dst, _tile1024i src1, _tile1024i src2) {
-  return __builtin_ia32_ttdpbf16ps_internal(m, n, k, dst, src1, src2);
-}
-
-/// Compute transpose and dot-product of BF16 (16-bit) floating-point pairs in
-///    tiles src0 and src1, accumulating the intermediate single-precision
-///    (32-bit) floating-point elements with elements in "dst", and store the
-///    32-bit result back to tile "dst".
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> TTDPBF16PS </c> instruction.
-///
-/// \param dst
-///    The destination tile. Max size is 1024 Bytes.
-/// \param src0
-///    The 1st source tile. Max size is 1024 Bytes.
-/// \param src1
-///    The 2nd source tile. Max size is 1024 Bytes.
-__DEFAULT_FN_ATTRS
-static __inline__ void __tile_tdpbf16ps(__tile1024i *dst, __tile1024i src0,
-                                        __tile1024i src1) {
-  dst->tile = _tile_tdpbf16ps_internal(src0.row, src1.col, src0.col, dst->tile,
-                                       src0.tile, src1.tile);
-}
-
-#undef __DEFAULT_FN_ATTRS
-
-#endif /* __x86_64__ */
-#endif /* __AMX_BF16TRANSPOSEINTRIN_H */
diff --git a/lib/include/amxcomplextransposeintrin.h b/lib/include/amxcomplextransposeintrin.h
deleted file mode 100644
index 11abaf98e9..0000000000
--- a/lib/include/amxcomplextransposeintrin.h
+++ /dev/null
@@ -1,303 +0,0 @@
-/*===----- amxcomplextransposeintrin.h - AMX-COMPLEX and AMX-TRANSPOSE ------===
- *
- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- * See https://llvm.org/LICENSE.txt for license information.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- *
- *===------------------------------------------------------------------------===
- */
-
-#ifndef __IMMINTRIN_H
-#error                                                                         \
-    "Never use <amxcomplextransposeintrin.h> directly; include <immintrin.h> instead."
-#endif // __IMMINTRIN_H
-
-#ifndef __AMX_COMPLEXTRANSPOSEINTRIN_H
-#define __AMX_COMPLEXTRANSPOSEINTRIN_H
-#ifdef __x86_64__
-
-#define __DEFAULT_FN_ATTRS                                                     \
-  __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("amx-complex,amx-transpose")))
-
-/// Perform matrix multiplication of two tiles containing complex elements and
-///    accumulate the results into a packed single precision tile. Each dword
-///    element in input tiles \a a and \a b is interpreted as a complex number
-///    with FP16 real part and FP16 imaginary part.
-/// Calculates the imaginary part of the result. For each possible combination
-///    of (transposed column of \a a, column of \a b), it performs a set of
-///    multiplication and accumulations on all corresponding complex numbers
-///    (one from \a a and one from \a b). The imaginary part of the \a a element
-///    is multiplied with the real part of the corresponding \a b element, and
-///    the real part of the \a a element is multiplied with the imaginary part
-///    of the corresponding \a b elements. The two accumulated results are
-///    added, and then accumulated into the corresponding row and column of
-///    \a dst.
-///
-/// \headerfile <x86intrin.h>
-///
-/// \code
-/// void _tile_tcmmimfp16ps(__tile dst, __tile a, __tile b);
-/// \endcode
-///
-/// \code{.operation}
-/// FOR m := 0 TO dst.rows - 1
-///	tmp := dst.row[m]
-///	FOR k := 0 TO a.rows - 1
-///		FOR n := 0 TO (dst.colsb / 4) - 1
-///			tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) * FP32(b.row[k].fp16[2*n+1])
-///			tmp.fp32[n] += FP32(a.row[m].fp16[2*k+1]) * FP32(b.row[k].fp16[2*n+0])
-///		ENDFOR
-///	ENDFOR
-///	write_row_and_zero(dst, m, tmp, dst.colsb)
-/// ENDFOR
-/// zero_upper_rows(dst, dst.rows)
-/// zero_tileconfig_start()
-/// \endcode
-///
-/// This intrinsic corresponds to the \c TTCMMIMFP16PS instruction.
-///
-/// \param dst
-///    The destination tile. Max size is 1024 Bytes.
-/// \param a
-///    The 1st source tile. Max size is 1024 Bytes.
-/// \param b
-///    The 2nd source tile. Max size is 1024 Bytes.
-#define _tile_tcmmimfp16ps(dst, a, b)                                          \
-  __builtin_ia32_ttcmmimfp16ps((dst), (a), (b))
-
-/// Perform matrix multiplication of two tiles containing complex elements and
-///    accumulate the results into a packed single precision tile. Each dword
-///    element in input tiles \a a and \a b is interpreted as a complex number
-///    with FP16 real part and FP16 imaginary part.
-/// Calculates the real part of the result. For each possible combination
-///    of (rtransposed colum of \a a, column of \a b), it performs a set of
-///    multiplication and accumulations on all corresponding complex numbers
-///    (one from \a a and one from \a b). The real part of the \a a element is
-///    multiplied with the real part of the corresponding \a b element, and the
-///    negated imaginary part of the \a a element is multiplied with the
-///    imaginary part of the corresponding \a b elements. The two accumulated
-///    results are added, and then accumulated into the corresponding row and
-///    column of \a dst.
-///
-/// \headerfile <x86intrin.h>
-///
-/// \code
-/// void _tile_tcmmrlfp16ps(__tile dst, __tile a, __tile b);
-/// \endcode
-///
-/// \code{.operation}
-/// FOR m := 0 TO dst.rows - 1
-///	tmp := dst.row[m]
-///	FOR k := 0 TO a.rows - 1
-///		FOR n := 0 TO (dst.colsb / 4) - 1
-///			tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) * FP32(b.row[k].fp16[2*n+0])
-///			tmp.fp32[n] += FP32(-a.row[m].fp16[2*k+1]) * FP32(b.row[k].fp16[2*n+1])
-///		ENDFOR
-///	ENDFOR
-///	write_row_and_zero(dst, m, tmp, dst.colsb)
-/// ENDFOR
-/// zero_upper_rows(dst, dst.rows)
-/// zero_tileconfig_start()
-/// \endcode
-///
-/// This intrinsic corresponds to the \c TTCMMIMFP16PS instruction.
-///
-/// \param dst
-///    The destination tile. Max size is 1024 Bytes.
-/// \param a
-///    The 1st source tile. Max size is 1024 Bytes.
-/// \param b
-///    The 2nd source tile. Max size is 1024 Bytes.
-#define _tile_tcmmrlfp16ps(dst, a, b)                                          \
-  __builtin_ia32_ttcmmrlfp16ps((dst), (a), (b))
-
-/// Perform matrix conjugate transpose and multiplication of two tiles
-///    containing complex elements and accumulate the results into a packed
-///    single precision tile. Each dword element in input tiles \a a and \a b
-///    is interpreted as a complex number with FP16 real part and FP16 imaginary
-///    part.
-/// Calculates the imaginary part of the result. For each possible combination
-///    of (transposed column of \a a, column of \a b), it performs a set of
-///    multiplication and accumulations on all corresponding complex numbers
-///    (one from \a a and one from \a b). The negated imaginary part of the \a a
-///    element is multiplied with the real part of the corresponding \a b
-///    element, and the real part of the \a a element is multiplied with the
-///    imaginary part of the corresponding \a b elements. The two accumulated
-///    results are added, and then accumulated into the corresponding row and
-///    column of \a dst.
-///
-/// \headerfile <x86intrin.h>
-///
-/// \code
-/// void _tile_conjtcmmimfp16ps(__tile dst, __tile a, __tile b);
-/// \endcode
-///
-/// \code{.operation}
-/// FOR m := 0 TO dst.rows - 1
-///	tmp := dst.row[m]
-///	FOR k := 0 TO a.rows - 1
-///		FOR n := 0 TO (dst.colsb / 4) - 1
-///			tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) * FP32(b.row[k].fp16[2*n+1])
-///			tmp.fp32[n] += FP32(-a.row[m].fp16[2*k+1]) * FP32(b.row[k].fp16[2*n+0])
-///		ENDFOR
-///	ENDFOR
-///	write_row_and_zero(dst, m, tmp, dst.colsb)
-/// ENDFOR
-/// zero_upper_rows(dst, dst.rows)
-/// zero_tileconfig_start()
-/// \endcode
-///
-/// This intrinsic corresponds to the \c TCONJTCMMIMFP16PS instruction.
-///
-/// \param dst
-///    The destination tile. Max size is 1024 Bytes.
-/// \param a
-///    The 1st source tile. Max size is 1024 Bytes.
-/// \param b
-///    The 2nd source tile. Max size is 1024 Bytes.
-#define _tile_conjtcmmimfp16ps(dst, a, b)                                      \
-  __builtin_ia32_tconjtcmmimfp16ps((dst), (a), (b))
-
-/// Perform conjugate transpose of an FP16-pair of complex elements from \a a
-///    and writes the result to \a dst.
-///
-/// \headerfile <x86intrin.h>
-///
-/// \code
-/// void _tile_conjtfp16(__tile dst, __tile a);
-/// \endcode
-///
-/// \code{.operation}
-/// FOR i := 0 TO dst.rows - 1
-///	FOR j := 0 TO (dst.colsb / 4) - 1
-///		tmp.fp16[2*j+0] := a.row[j].fp16[2*i+0]
-///		tmp.fp16[2*j+1] := -a.row[j].fp16[2*i+1]
-///	ENDFOR
-///	write_row_and_zero(dst, i, tmp, dst.colsb)
-/// ENDFOR
-/// zero_upper_rows(dst, dst.rows)
-/// zero_tileconfig_start()
-/// \endcode
-///
-/// This intrinsic corresponds to the \c TCONJTFP16 instruction.
-///
-/// \param dst
-///    The destination tile. Max size is 1024 Bytes.
-/// \param a
-///    The source tile. Max size is 1024 Bytes.
-#define _tile_conjtfp16(dst, a) __builtin_ia32_tconjtfp16((dst), (a))
-
-static __inline__ _tile1024i __DEFAULT_FN_ATTRS _tile_tcmmimfp16ps_internal(
-    unsigned short m, unsigned short n, unsigned short k, _tile1024i dst,
-    _tile1024i src1, _tile1024i src2) {
-  return __builtin_ia32_ttcmmimfp16ps_internal(m, n, k, dst, src1, src2);
-}
-
-static __inline__ _tile1024i __DEFAULT_FN_ATTRS _tile_tcmmrlfp16ps_internal(
-    unsigned short m, unsigned short n, unsigned short k, _tile1024i dst,
-    _tile1024i src1, _tile1024i src2) {
-  return __builtin_ia32_ttcmmrlfp16ps_internal(m, n, k, dst, src1, src2);
-}
-
-static __inline__ _tile1024i __DEFAULT_FN_ATTRS _tile_conjtcmmimfp16ps_internal(
-    unsigned short m, unsigned short n, unsigned short k, _tile1024i dst,
-    _tile1024i src1, _tile1024i src2) {
-  return __builtin_ia32_tconjtcmmimfp16ps_internal(m, n, k, dst, src1, src2);
-}
-
-static __inline__ _tile1024i __DEFAULT_FN_ATTRS
-_tile_conjtfp16_internal(unsigned short m, unsigned short n, _tile1024i src) {
-  return __builtin_ia32_tconjtfp16_internal(m, n, src);
-}
-
-/// Perform matrix multiplication of two tiles containing complex elements and
-///    accumulate the results into a packed single precision tile. Each dword
-///    element in input tiles src0 and src1 is interpreted as a complex number
-///    with FP16 real part and FP16 imaginary part.
-///    This function calculates the imaginary part of the result.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> TTCMMIMFP16PS </c> instruction.
-///
-/// \param dst
-///    The destination tile. Max size is 1024 Bytes.
-/// \param src0
-///    The 1st source tile. Max size is 1024 Bytes.
-/// \param src1
-///    The 2nd source tile. Max size is 1024 Bytes.
-__DEFAULT_FN_ATTRS
-static void __tile_tcmmimfp16ps(__tile1024i *dst, __tile1024i src0,
-                                __tile1024i src1) {
-  dst->tile = _tile_tcmmimfp16ps_internal(src0.row, src1.col, src0.col,
-                                          dst->tile, src0.tile, src1.tile);
-}
-
-/// Perform matrix multiplication of two tiles containing complex elements and
-///    accumulate the results into a packed single precision tile. Each dword
-///    element in input tiles src0 and src1 is interpreted as a complex number
-///    with FP16 real part and FP16 imaginary part.
-///    This function calculates the real part of the result.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> TTCMMRLFP16PS </c> instruction.
-///
-/// \param dst
-///    The destination tile. Max size is 1024 Bytes.
-/// \param src0
-///    The 1st source tile. Max size is 1024 Bytes.
-/// \param src1
-///    The 2nd source tile. Max size is 1024 Bytes.
-__DEFAULT_FN_ATTRS
-static void __tile_tcmmrlfp16ps(__tile1024i *dst, __tile1024i src0,
-                                __tile1024i src1) {
-  dst->tile = _tile_tcmmrlfp16ps_internal(src0.row, src1.col, src0.col,
-                                          dst->tile, src0.tile, src1.tile);
-}
-
-/// Perform matrix conjugate transpose and multiplication of two tiles
-///    containing complex elements and accumulate the results into a packed
-///    single precision tile. Each dword element in input tiles src0 and src1
-///    is interpreted as a complex number with FP16 real part and FP16 imaginary
-///    part.
-///    This function calculates the imaginary part of the result.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> TCONJTCMMIMFP16PS </c> instruction.
-///
-/// \param dst
-///    The destination tile. Max size is 1024 Bytes.
-/// \param src0
-///    The 1st source tile. Max size is 1024 Bytes.
-/// \param src1
-///    The 2nd source tile. Max size is 1024 Bytes.
-__DEFAULT_FN_ATTRS
-static void __tile_conjtcmmimfp16ps(__tile1024i *dst, __tile1024i src0,
-                                    __tile1024i src1) {
-  dst->tile = _tile_conjtcmmimfp16ps_internal(src0.row, src1.col, src0.col,
-                                              dst->tile, src0.tile, src1.tile);
-}
-
-/// Perform conjugate transpose of an FP16-pair of complex elements from src and
-///    writes the result to dst.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> TCONJTFP16 </c> instruction.
-///
-/// \param dst
-///    The destination tile. Max size is 1024 Bytes.
-/// \param src
-///    The source tile. Max size is 1024 Bytes.
-__DEFAULT_FN_ATTRS
-static void __tile_conjtfp16(__tile1024i *dst, __tile1024i src) {
-  dst->tile = _tile_conjtfp16_internal(src.row, src.col, src.tile);
-}
-
-#undef __DEFAULT_FN_ATTRS
-
-#endif // __x86_64__
-#endif // __AMX_COMPLEXTRANSPOSEINTRIN_H
diff --git a/lib/include/amxfp16transposeintrin.h b/lib/include/amxfp16transposeintrin.h
deleted file mode 100644
index 191f8c6097..0000000000
--- a/lib/include/amxfp16transposeintrin.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*===----- amxfp16transposeintrin.h - AMX-FP16 and AMX-TRANSPOSE ------------===
- *
- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- * See https://llvm.org/LICENSE.txt for license information.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- *
- *===------------------------------------------------------------------------===
- */
-
-#ifndef __IMMINTRIN_H
-#error                                                                         \
-    "Never use <amxfp16transposeintrin.h> directly; use <immintrin.h> instead."
-#endif /* __IMMINTRIN_H */
-
-#ifndef __AMX_FP16TRANSPOSEINTRIN_H
-#define __AMX_FP16TRANSPOSEINTRIN_H
-#ifdef __x86_64__
-
-/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS                                                     \
-  __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("amx-fp16,amx-transpose")))
-
-/// Compute transpose and dot-product of FP16 (16-bit) floating-point pairs in
-///    tiles \a a and \a b, accumulating the intermediate single-precision
-///    (32-bit) floating-point elements with elements in \a dst, and store the
-///    32-bit result back to tile \a dst.
-///
-/// \headerfile <immintrin.h>
-///
-/// \code
-/// void _tile_tdpfp16ps (__tile dst, __tile a, __tile b)
-/// \endcode
-///
-/// \code{.operation}
-/// FOR m := 0 TO dst.rows - 1
-///	tmp := dst.row[m]
-///	FOR k := 0 TO (a.colsb / 4) - 1
-///		FOR n := 0 TO (dst.colsb / 4) - 1
-///			tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) *
-///					FP32(b.row[k].fp16[2*n+0])
-///			tmp.fp32[n] += FP32(a.row[m].fp16[2*k+1]) *
-///					FP32(b.row[k].fp16[2*n+1])
-///		ENDFOR
-///	ENDFOR
-///	write_row_and_zero(dst, m, tmp, dst.colsb)
-/// ENDFOR
-/// zero_upper_rows(dst, dst.rows)
-/// zero_tileconfig_start()
-/// \endcode
-///
-/// This intrinsic corresponds to the \c TTDPFP16PS instruction.
-///
-/// \param dst
-///    The destination tile. Max size is 1024 Bytes.
-/// \param a
-///    The 1st source tile. Max size is 1024 Bytes.
-/// \param b
-///    The 2nd source tile. Max size is 1024 Bytes.
-#define _tile_tdpfp16ps(dst, a, b) __builtin_ia32_ttdpfp16ps((dst), (a), (b))
-
-/// This is internal intrinsic. C/C++ user should avoid calling it directly.
-static __inline__ _tile1024i __DEFAULT_FN_ATTRS
-_tile_tdpfp16ps_internal(unsigned short m, unsigned short n, unsigned short k,
-                         _tile1024i dst, _tile1024i src1, _tile1024i src2) {
-  return __builtin_ia32_ttdpfp16ps_internal(m, n, k, dst, src1, src2);
-}
-
-/// Compute transpose and dot-product of FP16 (16-bit) floating-point pairs in
-///    tiles src0 and src1, accumulating the intermediate single-precision
-///    (32-bit) floating-point elements with elements in "dst", and store the
-///    32-bit result back to tile "dst".
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> TTDPFP16PS </c> instruction.
-///
-/// \param dst
-///    The destination tile. Max size is 1024 Bytes.
-/// \param src0
-///    The 1st source tile. Max size is 1024 Bytes.
-/// \param src1
-///    The 2nd source tile. Max size is 1024 Bytes.
-__DEFAULT_FN_ATTRS
-static __inline__ void __tile_tdpfp16ps(__tile1024i *dst, __tile1024i src0,
-                                        __tile1024i src1) {
-  dst->tile = _tile_tdpfp16ps_internal(src0.row, src1.col, src0.col, dst->tile,
-                                       src0.tile, src1.tile);
-}
-
-#undef __DEFAULT_FN_ATTRS
-
-#endif /* __x86_64__ */
-#endif /* __AMX_FP16TRANSPOSEINTRIN_H */
diff --git a/lib/include/amxintrin.h b/lib/include/amxintrin.h
index a7da10d995..208aa35806 100644
--- a/lib/include/amxintrin.h
+++ b/lib/include/amxintrin.h
@@ -230,8 +230,6 @@ static __inline__ void __DEFAULT_FN_ATTRS_TILE _tile_release(void) {
 /// bytes. Since there is no 2D type in llvm IR, we use vector type to
 /// represent 2D tile and the fixed size is maximum amx tile register size.
 typedef int _tile1024i __attribute__((__vector_size__(1024), __aligned__(64)));
-typedef int _tile1024i_1024a
-    __attribute__((__vector_size__(1024), __aligned__(1024)));
 
 /// This is internal intrinsic. C/C++ user should avoid calling it directly.
 static __inline__ _tile1024i __DEFAULT_FN_ATTRS_TILE
diff --git a/lib/include/amxmovrstransposeintrin.h b/lib/include/amxmovrstransposeintrin.h
deleted file mode 100644
index 5f48cba949..0000000000
--- a/lib/include/amxmovrstransposeintrin.h
+++ /dev/null
@@ -1,200 +0,0 @@
-/* ===--- amxmovrstransposeintrin.h - AMX_MOVRS_TRANSPOSE intrinsics --------===
- *
- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- * See https://llvm.org/LICENSE.txt for license information.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- *
- * ===-----------------------------------------------------------------------===
- */
-
-#ifndef __IMMINTRIN_H
-#error                                                                         \
-    "Never use <amxmovrstransposeintrin.h> directly; use <immintrin.h> instead."
-#endif /* __IMMINTRIN_H */
-
-#ifndef __AMX_MOVRS_TRANSPOSEINTRIN_H
-#define __AMX_MOVRS_TRANSPOSEINTRIN_H
-#ifdef __x86_64__
-
-#define __DEFAULT_FN_ATTRS                                                     \
-  __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("amx-transpose,amx-movrs")))
-
-#define _tile_2rpntlvwz0rs(tdst, base, stride)                                 \
-  __builtin_ia32_t2rpntlvwz0rs(tdst, base, stride)
-#define _tile_2rpntlvwz0rst1(tdst, base, stride)                               \
-  __builtin_ia32_t2rpntlvwz0rst1(tdst, base, stride)
-#define _tile_2rpntlvwz1rs(tdst, base, stride)                                 \
-  __builtin_ia32_t2rpntlvwz1rs(tdst, base, stride)
-#define _tile_2rpntlvwz1rst1(tdst, base, stride)                               \
-  __builtin_ia32_t2rpntlvwz1rst1(tdst, base, stride)
-
-static __inline__ void __DEFAULT_FN_ATTRS _tile_2rpntlvwz0rs_internal(
-    unsigned short row, unsigned short col0, unsigned short col1,
-    _tile1024i *dst0, _tile1024i *dst1, const void *base,
-    __SIZE_TYPE__ stride) {
-  // Use __tile1024i_1024a* to escape the alignment check in
-  // clang/test/Headers/x86-intrinsics-headers-clean.cpp
-  __builtin_ia32_t2rpntlvwz0rs_internal(
-      row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base,
-      (__SIZE_TYPE__)(stride));
-}
-
-static __inline__ void __DEFAULT_FN_ATTRS _tile_2rpntlvwz0rst1_internal(
-    unsigned short row, unsigned short col0, unsigned short col1,
-    _tile1024i *dst0, _tile1024i *dst1, const void *base,
-    __SIZE_TYPE__ stride) {
-  __builtin_ia32_t2rpntlvwz0rst1_internal(
-      row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base,
-      (__SIZE_TYPE__)(stride));
-}
-
-static __inline__ void __DEFAULT_FN_ATTRS _tile_2rpntlvwz1rs_internal(
-    unsigned short row, unsigned short col0, unsigned short col1,
-    _tile1024i *dst0, _tile1024i *dst1, const void *base,
-    __SIZE_TYPE__ stride) {
-  __builtin_ia32_t2rpntlvwz1rs_internal(
-      row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base,
-      (__SIZE_TYPE__)(stride));
-}
-
-static __inline__ void __DEFAULT_FN_ATTRS _tile_2rpntlvwz1rst1_internal(
-    unsigned short row, unsigned short col0, unsigned short col1,
-    _tile1024i *dst0, _tile1024i *dst1, const void *base,
-    __SIZE_TYPE__ stride) {
-  __builtin_ia32_t2rpntlvwz1rst1_internal(
-      row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base,
-      (__SIZE_TYPE__)(stride));
-}
-
-/// Converts a pair of tiles from memory into VNNI format, and places the
-/// results in a pair of destinations specified by dst. The pair of tiles
-/// in memory is specified via a tsib; the second tile is after the first
-/// one, separated by the same stride that separates each row.
-/// The tile configuration for the destination tiles indicates the amount
-/// of data to read from memory. The instruction will load a number of rows
-/// that is equal to twice the number of rows in tmm1. The size of each row
-/// is equal to the average width of the destination tiles. If the second
-/// tile is configured with zero rows and columns, only the first tile will
-/// be written.
-/// Provides a hint to the implementation that the data will likely become
-/// read shared in the near future and the data caching can be optimized.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> T2RPNTLVWZ0RS </c> instruction.
-///
-/// \param dst0
-///    First tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param dst1
-///    Second tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param base
-///    A pointer to base address.
-/// \param stride
-///    The stride between the rows' data to be loaded in memory.
-__DEFAULT_FN_ATTRS
-static void __tile_2rpntlvwz0rs(__tile1024i *dst0, __tile1024i *dst1,
-                                const void *base, __SIZE_TYPE__ stride) {
-  _tile_2rpntlvwz0rs_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,
-                              &dst1->tile, base, stride);
-}
-
-/// Converts a pair of tiles from memory into VNNI format, and places the
-/// results in a pair of destinations specified by dst. The pair of tiles
-/// in memory is specified via a tsib; the second tile is after the first
-/// one, separated by the same stride that separates each row.
-/// The tile configuration for the destination tiles indicates the amount
-/// of data to read from memory. The instruction will load a number of rows
-/// that is equal to twice the number of rows in tmm1. The size of each row
-/// is equal to the average width of the destination tiles. If the second
-/// tile is configured with zero rows and columns, only the first tile will
-/// be written.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> T2RPNTLVWZ0T1RS </c> instruction.
-///
-/// \param dst0
-///    First tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param dst1
-///    Second tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param base
-///    A pointer to base address.
-/// \param stride
-///    The stride between the rows' data to be loaded in memory.
-__DEFAULT_FN_ATTRS
-static void __tile_2rpntlvwz0rst1(__tile1024i *dst0, __tile1024i *dst1,
-                                  const void *base, __SIZE_TYPE__ stride) {
-  _tile_2rpntlvwz0rst1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,
-                                &dst1->tile, base, stride);
-}
-
-/// Converts a pair of tiles from memory into VNNI format, and places the
-/// results in a pair of destinations specified by dst. The pair of tiles
-/// in memory is specified via a tsib; the second tile is after the first
-/// one, separated by the same stride that separates each row.
-/// The tile configuration for the destination tiles indicates the amount
-/// of data to read from memory. The instruction will load a number of rows
-/// that is equal to twice the number of rows in tmm1. The size of each row
-/// is equal to the average width of the destination tiles. If the second
-/// tile is configured with zero rows and columns, only the first tile will
-/// be written. The last row will be not be read from memory but instead
-/// filled with zeros.
-/// Provides a hint to the implementation that the data will likely become
-/// read shared in the near future and the data caching can be optimized.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> T2RPNTLVWZ1 </c> instruction.
-///
-/// \param dst0
-///    First tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param dst1
-///    Second tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param base
-///    A pointer to base address.
-/// \param stride
-///    The stride between the rows' data to be loaded in memory.
-__DEFAULT_FN_ATTRS
-static void __tile_2rpntlvwz1rs(__tile1024i *dst0, __tile1024i *dst1,
-                                const void *base, __SIZE_TYPE__ stride) {
-  _tile_2rpntlvwz1rs_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,
-                              &dst1->tile, base, stride);
-}
-
-/// Converts a pair of tiles from memory into VNNI format, and places the
-/// results in a pair of destinations specified by dst. The pair of tiles
-/// in memory is specified via a tsib; the second tile is after the first
-/// one, separated by the same stride that separates each row.
-/// The tile configuration for the destination tiles indicates the amount
-/// of data to read from memory. The instruction will load a number of rows
-/// that is equal to twice the number of rows in tmm1. The size of each row
-/// is equal to the average width of the destination tiles. If the second
-/// tile is configured with zero rows and columns, only the first tile will
-/// be written. The last row will be not be read from memory but instead
-/// filled with zeros.
-/// Provides a hint to the implementation that the data will likely become
-/// read shared in the near future and the data caching can be optimized.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> T2RPNTLVWZ1T1RS </c> instruction.
-///
-/// \param dst0
-///    First tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param dst1
-///    Second tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param base
-///    A pointer to base address.
-/// \param stride
-///    The stride between the rows' data to be loaded in memory.
-__DEFAULT_FN_ATTRS
-static void __tile_2rpntlvwz1rst1(__tile1024i *dst0, __tile1024i *dst1,
-                                  const void *base, __SIZE_TYPE__ stride) {
-  _tile_2rpntlvwz1rst1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,
-                                &dst1->tile, base, stride);
-}
-
-#undef __DEFAULT_FN_ATTRS
-#endif /* __x86_64__ */
-#endif /* __AMX_MOVRS_TRANSPOSEINTRIN_H */
diff --git a/lib/include/amxtf32transposeintrin.h b/lib/include/amxtf32transposeintrin.h
deleted file mode 100644
index e1b90c1adf..0000000000
--- a/lib/include/amxtf32transposeintrin.h
+++ /dev/null
@@ -1,105 +0,0 @@
-/*===--------- amxtf32transposeintrin.h - AMX-TF32 and AMX-TRANSPOSE --------===
- *
- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- * See https://llvm.org/LICENSE.txt for license information.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- *
- *===------------------------------------------------------------------------===
- */
-#ifndef __IMMINTRIN_H
-#error                                                                         \
-    "Never use <amxtf32transposeintrin.h> directly; include <immintrin.h> instead."
-#endif // __IMMINTRIN_H
-
-#ifndef __AMX_TF32TRANSPOSEINTRIN_H
-#define __AMX_TF32TRANSPOSEINTRIN_H
-#ifdef __x86_64__
-
-#define __DEFAULT_FN_ATTRS_TF32_TRANSPOSE                                      \
-  __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("amx-tf32,amx-transpose")))
-
-/// \code
-/// void _tile_tmmultf32ps(constexpr int srcdst, constexpr int a, \
-///                        constexpr int b);
-/// \endcode
-///
-/// This intrinsic corresponds to the <c> TTMMULTF32PS </c> instruction.
-///
-/// \param srcdst
-/// 	The destination tile. Max size is 1024 Bytes.
-/// \param a
-/// 	The 1st source tile. Max size is 1024 Bytes.
-/// \param b
-/// 	The 2nd source tile. Max size is 1024 Bytes.
-///
-/// \code{.operation}
-/// DEFINE zero_lower_mantissa_bits_fp32(x[31:0]) {
-/// 	dword[12:0] := 0
-/// 	dword[31:13] := x[31:13]
-/// 	return dword
-/// }
-///
-/// DEFINE silence_snan_fp32(x[31:0]) {
-/// 	IF (x.exponent == 255 and x.fraction != 0 and x.fraction[22] == 0)
-/// 		x.fraction[22] := 1
-/// 	return x
-/// }
-///
-/// elements_dest:= srcdst.colsb/4
-///
-/// FOR m := 0 TO (srcdst.rows-1)
-/// 	tmp[511:0] := 0
-/// 	FOR k := 0 TO (a.rows-1)
-/// 		FOR n := 0 TO (elements_dest-1)
-/// 			a1e := silence_snan_fp32(a.row[k].fp32[m])
-/// 			a2e := silence_snan_fp32(b.row[k].fp32[n])
-/// 			s1e := zero_lower_mantissa_bits_fp32(a1e)
-/// 			s2e := zero_lower_mantissa_bits_fp32(a2e)
-/// 			tmp.fp32[n] += s1e * s2e
-/// 		ENDFOR
-/// 	ENDFOR
-///
-/// 	FOR n := 0 TO (elements_dest-1)
-/// 		tmp.fp32[n] += srcdst.row[m].fp32[n]
-/// 	ENDFOR
-///	write_row_and_zero(srcdst, m, tmp, srcdst.colsb)
-///
-/// ENDFOR
-///
-/// zero_upper_rows(srcdst, srcdst.rows)
-/// zero_tileconfig_start()
-/// \endcode
-#define _tile_tmmultf32ps(srcdst, a, b)                                        \
-  __builtin_ia32_ttmmultf32ps((srcdst), (a), (b))
-
-// dst = m x n (srcdest), src1 = k x m, src2 = k x n
-static __inline__ _tile1024i __DEFAULT_FN_ATTRS_TF32_TRANSPOSE
-_tile_tmmultf32ps_internal(unsigned short m, unsigned short n, unsigned short k,
-                           _tile1024i dst, _tile1024i src1, _tile1024i src2) {
-  return __builtin_ia32_ttmmultf32ps_internal(m, n, k, dst, src1, src2);
-}
-
-/// Compute transpose and do Matrix Multiplication of src0 and src1, and then do
-/// Matrix Plus with dst. All the calculation is base on float32 but with the
-/// lower 13-bit set to 0.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> TTMMULTF32PS </c> instruction.
-///
-/// \param dst
-///    The destination tile. Max size is 1024 Bytes.
-/// \param src0
-///    The 1st source tile. Max size is 1024 Bytes.
-/// \param src1
-///    The 2nd source tile. Max size is 1024 Bytes.
-__DEFAULT_FN_ATTRS_TF32_TRANSPOSE
-static void __tile_tmmultf32ps(__tile1024i *dst, __tile1024i src0,
-                               __tile1024i src1) {
-  dst->tile = _tile_tmmultf32ps_internal(src0.row, src1.col, src0.col,
-                                         dst->tile, src0.tile, src1.tile);
-}
-
-#endif // __x86_64__
-#endif // __AMX_TF32TRANSPOSEINTRIN_H
diff --git a/lib/include/amxtransposeintrin.h b/lib/include/amxtransposeintrin.h
deleted file mode 100644
index b3fa37d766..0000000000
--- a/lib/include/amxtransposeintrin.h
+++ /dev/null
@@ -1,248 +0,0 @@
-/* ===--- amxtransposeintrin.h - AMX_TRANSPOSE intrinsics -*- C++ -*---------===
- *
- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- * See https://llvm.org/LICENSE.txt for license information.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- *
- * ===-----------------------------------------------------------------------===
- */
-
-#ifndef __IMMINTRIN_H
-#error "Never use <amxtransposeintrin.h> directly; use <immintrin.h> instead."
-#endif /* __IMMINTRIN_H */
-
-#ifndef __AMX_TRANSPOSEINTRIN_H
-#define __AMX_TRANSPOSEINTRIN_H
-#ifdef __x86_64__
-
-#define __DEFAULT_FN_ATTRS_TRANSPOSE                                           \
-  __attribute__((__always_inline__, __nodebug__, __target__("amx-transpose")))
-
-#define _tile_2rpntlvwz0(tdst, base, stride)                                   \
-  __builtin_ia32_t2rpntlvwz0(tdst, base, stride)
-#define _tile_2rpntlvwz0t1(tdst, base, stride)                                 \
-  __builtin_ia32_t2rpntlvwz0t1(tdst, base, stride)
-#define _tile_2rpntlvwz1(tdst, base, stride)                                   \
-  __builtin_ia32_t2rpntlvwz1(tdst, base, stride)
-#define _tile_2rpntlvwz1t1(tdst, base, stride)                                 \
-  __builtin_ia32_t2rpntlvwz1t1(tdst, base, stride)
-
-/// Transpose 32-bit elements from \a src and write the result to \a dst.
-///
-/// \headerfile <immintrin.h>
-///
-/// \code
-/// void _tile_transposed(__tile dst, __tile src);
-/// \endcode
-///
-/// This intrinsic corresponds to the <c> TTRANSPOSED </c> instruction.
-///
-/// \param dst
-/// 	The destination tile. Max size is 1024 Bytes.
-/// \param src
-/// 	The source tile. Max size is 1024 Bytes.
-///
-/// \code{.operation}
-///
-/// FOR i := 0 TO (dst.rows-1)
-/// 	tmp[511:0] := 0
-/// 	FOR j := 0 TO (dst.colsb/4-1)
-/// 		tmp.dword[j] := src.row[j].dword[i]
-/// 	ENDFOR
-/// 	dst.row[i] := tmp
-/// ENDFOR
-///
-/// zero_upper_rows(dst, dst.rows)
-/// zero_tileconfig_start()
-/// \endcode
-#define _tile_transposed(dst, src) __builtin_ia32_ttransposed(dst, src)
-
-static __inline__ void __DEFAULT_FN_ATTRS_TRANSPOSE _tile_2rpntlvwz0_internal(
-    unsigned short row, unsigned short col0, unsigned short col1,
-    _tile1024i *dst0, _tile1024i *dst1, const void *base,
-    __SIZE_TYPE__ stride) {
-  // Use __tile1024i_1024a* to escape the alignment check in
-  // clang/test/Headers/x86-intrinsics-headers-clean.cpp
-  __builtin_ia32_t2rpntlvwz0_internal(row, col0, col1, (_tile1024i_1024a *)dst0,
-                                      (_tile1024i_1024a *)dst1, base,
-                                      (__SIZE_TYPE__)(stride));
-}
-
-static __inline__ void __DEFAULT_FN_ATTRS_TRANSPOSE _tile_2rpntlvwz0t1_internal(
-    unsigned short row, unsigned short col0, unsigned short col1,
-    _tile1024i *dst0, _tile1024i *dst1, const void *base,
-    __SIZE_TYPE__ stride) {
-  __builtin_ia32_t2rpntlvwz0t1_internal(
-      row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base,
-      (__SIZE_TYPE__)(stride));
-}
-
-static __inline__ void __DEFAULT_FN_ATTRS_TRANSPOSE _tile_2rpntlvwz1_internal(
-    unsigned short row, unsigned short col0, unsigned short col1,
-    _tile1024i *dst0, _tile1024i *dst1, const void *base,
-    __SIZE_TYPE__ stride) {
-  __builtin_ia32_t2rpntlvwz1_internal(row, col0, col1, (_tile1024i_1024a *)dst0,
-                                      (_tile1024i_1024a *)dst1, base,
-                                      (__SIZE_TYPE__)(stride));
-}
-
-static __inline__ void __DEFAULT_FN_ATTRS_TRANSPOSE _tile_2rpntlvwz1t1_internal(
-    unsigned short row, unsigned short col0, unsigned short col1,
-    _tile1024i *dst0, _tile1024i *dst1, const void *base,
-    __SIZE_TYPE__ stride) {
-  __builtin_ia32_t2rpntlvwz1t1_internal(
-      row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base,
-      (__SIZE_TYPE__)(stride));
-}
-
-// This is internal intrinsic. C/C++ user should avoid calling it directly.
-static __inline__ _tile1024i __DEFAULT_FN_ATTRS_TRANSPOSE
-_tile_transposed_internal(unsigned short m, unsigned short n, _tile1024i src) {
-  return __builtin_ia32_ttransposed_internal(m, n, src);
-}
-
-/// Converts a pair of tiles from memory into VNNI format, and places the
-/// results in a pair of destinations specified by dst. The pair of tiles
-/// in memory is specified via a tsib; the second tile is after the first
-/// one, separated by the same stride that separates each row.
-/// The tile configuration for the destination tiles indicates the amount
-/// of data to read from memory. The instruction will load a number of rows
-/// that is equal to twice the number of rows in tmm1. The size of each row
-/// is equal to the average width of the destination tiles. If the second
-/// tile is configured with zero rows and columns, only the first tile will
-/// be written.
-/// Provides a hint to the implementation that the data will likely not be
-/// reused in the near future and the data caching can be optimized.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> T2RPNTLVWZ0 </c> instruction.
-///
-/// \param dst0
-///    First tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param dst1
-///    Second tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param base
-///    A pointer to base address.
-/// \param stride
-///    The stride between the rows' data to be loaded in memory.
-__DEFAULT_FN_ATTRS_TRANSPOSE
-static void __tile_2rpntlvwz0(__tile1024i *dst0, __tile1024i *dst1,
-                              const void *base, __SIZE_TYPE__ stride) {
-  _tile_2rpntlvwz0_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,
-                            &dst1->tile, base, stride);
-}
-
-/// Converts a pair of tiles from memory into VNNI format, and places the
-/// results in a pair of destinations specified by dst. The pair of tiles
-/// in memory is specified via a tsib; the second tile is after the first
-/// one, separated by the same stride that separates each row.
-/// The tile configuration for the destination tiles indicates the amount
-/// of data to read from memory. The instruction will load a number of rows
-/// that is equal to twice the number of rows in tmm1. The size of each row
-/// is equal to the average width of the destination tiles. If the second
-/// tile is configured with zero rows and columns, only the first tile will
-/// be written.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> T2RPNTLVWZ0T1 </c> instruction.
-///
-/// \param dst0
-///    First tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param dst1
-///    Second tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param base
-///    A pointer to base address.
-/// \param stride
-///    The stride between the rows' data to be loaded in memory.
-__DEFAULT_FN_ATTRS_TRANSPOSE
-static void __tile_2rpntlvwz0t1(__tile1024i *dst0, __tile1024i *dst1,
-                                const void *base, __SIZE_TYPE__ stride) {
-  _tile_2rpntlvwz0t1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,
-                              &dst1->tile, base, stride);
-}
-
-/// Converts a pair of tiles from memory into VNNI format, and places the
-/// results in a pair of destinations specified by dst. The pair of tiles
-/// in memory is specified via a tsib; the second tile is after the first
-/// one, separated by the same stride that separates each row.
-/// The tile configuration for the destination tiles indicates the amount
-/// of data to read from memory. The instruction will load a number of rows
-/// that is equal to twice the number of rows in tmm1. The size of each row
-/// is equal to the average width of the destination tiles. If the second
-/// tile is configured with zero rows and columns, only the first tile will
-/// be written. The last row will be not be read from memory but instead
-/// filled with zeros.
-/// Provides a hint to the implementation that the data will likely not be
-/// reused in the near future and the data caching can be optimized.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> T2RPNTLVWZ1 </c> instruction.
-///
-/// \param dst0
-///    First tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param dst1
-///    Second tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param base
-///    A pointer to base address.
-/// \param stride
-///    The stride between the rows' data to be loaded in memory.
-__DEFAULT_FN_ATTRS_TRANSPOSE
-static void __tile_2rpntlvwz1(__tile1024i *dst0, __tile1024i *dst1,
-                              const void *base, __SIZE_TYPE__ stride) {
-  _tile_2rpntlvwz1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,
-                            &dst1->tile, base, stride);
-}
-
-/// Converts a pair of tiles from memory into VNNI format, and places the
-/// results in a pair of destinations specified by dst. The pair of tiles
-/// in memory is specified via a tsib; the second tile is after the first
-/// one, separated by the same stride that separates each row.
-/// The tile configuration for the destination tiles indicates the amount
-/// of data to read from memory. The instruction will load a number of rows
-/// that is equal to twice the number of rows in tmm1. The size of each row
-/// is equal to the average width of the destination tiles. If the second
-/// tile is configured with zero rows and columns, only the first tile will
-/// be written. The last row will be not be read from memory but instead
-/// filled with zeros.
-/// Provides a hint to the implementation that the data will likely not be
-/// reused in the near future and the data caching can be optimized.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> T2RPNTLVWZ1T1 </c> instruction.
-///
-/// \param dst0
-///    First tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param dst1
-///    Second tile of destination tile pair. Max size is 1024i*2 Bytes.
-/// \param base
-///    A pointer to base address.
-/// \param stride
-///    The stride between the rows' data to be loaded in memory.
-__DEFAULT_FN_ATTRS_TRANSPOSE
-static void __tile_2rpntlvwz1t1(__tile1024i *dst0, __tile1024i *dst1,
-                                const void *base, __SIZE_TYPE__ stride) {
-  _tile_2rpntlvwz1t1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,
-                              &dst1->tile, base, stride);
-}
-
-/// Transpose 32-bit elements from src and write the result to dst.
-///
-/// \headerfile <immintrin.h>
-///
-/// This intrinsic corresponds to the <c> TTRANSPOSED </c> instruction.
-///
-/// \param dst
-///    The destination tile. Max size is 1024 Bytes.
-/// \param src
-///    The source tile. Max size is 1024 Bytes.
-__DEFAULT_FN_ATTRS_TRANSPOSE
-static void __tile_transposed(__tile1024i *dst, __tile1024i src) {
-  dst->tile = _tile_transposed_internal(dst->row, dst->col, src.tile);
-}
-
-#endif /* __x86_64__ */
-#endif /* __AMX_TRANSPOSEINTRIN_H */
diff --git a/lib/include/arm_acle.h b/lib/include/arm_acle.h
index 5cfa3d023a..622e8f3d6a 100644
--- a/lib/include/arm_acle.h
+++ b/lib/include/arm_acle.h
@@ -55,11 +55,37 @@ __chkfeat(uint64_t __features) {
 /* 7.5 Swap */
 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
 __swp(uint32_t __x, volatile uint32_t *__p) {
-  uint32_t v;
-  do
-    v = __builtin_arm_ldrex(__p);
-  while (__builtin_arm_strex(__x, __p));
-  return v;
+  uint32_t __v;
+#if (__ARM_FEATURE_LDREX & 4) || __ARM_ARCH_6M__ || __linux__
+  /*
+   * Using this clang builtin is sensible in most situations. Where
+   * LDREX and STREX are available, it will compile to a loop using
+   * them. Otherwise it will compile to a libcall, requiring the
+   * runtime to provide that library function.
+   *
+   * That's unavoidable on Armv6-M, which has no atomic instructions
+   * at all (not even SWP), so in that situation the user will just
+   * have to provide an implementation of __atomic_exchange_4 (perhaps
+   * it would temporarily disable interrupts, and then do a separate
+   * load and store).
+   *
+   * We also use the libcall strategy on pre-Armv7 Linux targets, on
+   * the theory that Linux's runtime support library _will_ provide a
+   * suitable libcall, and it's better to use that than the SWP
+   * instruction because then when the same binary is run on a later
+   * Linux system the libcall implementation will use LDREX instead.
+   */
+  __v = __atomic_exchange_n(__p, __x, __ATOMIC_RELAXED);
+#else
+  /*
+   * But for older Arm architectures when the target is not Linux, we
+   * fall back to using the SWP instruction via inline assembler. ACLE
+   * is clear that we're allowed to do this, but shouldn't do it if we
+   * have a better alternative.
+   */
+  __asm__("swp %0, %1, [%2]" : "=r"(__v) : "r"(__x), "r"(__p) : "memory");
+#endif
+  return __v;
 }
 
 /* 7.6 Memory prefetch intrinsics */
@@ -72,6 +98,12 @@ __swp(uint32_t __x, volatile uint32_t *__p) {
 #else
 #define __pldx(access_kind, cache_level, retention_policy, addr) \
   __builtin_arm_prefetch(addr, access_kind, cache_level, retention_policy, 1)
+#define __pldx_range(access_kind, retention_policy, length, count, stride,     \
+                     reuse_distance, addr)                                     \
+  __builtin_arm_range_prefetch_x(addr, access_kind, retention_policy, length,  \
+                                 count, stride, reuse_distance)
+#define __pld_range(access_kind, retention_policy, metadata, addr)             \
+  __builtin_arm_range_prefetch(addr, access_kind, retention_policy, metadata)
 #endif
 
 /* 7.6.2 Instruction prefetch */
@@ -795,28 +827,6 @@ __arm_st64bv0(void *__addr, data512_t __value) {
 
 #endif // __ARM_FEATURE_COPROC
 
-/* 17 Transactional Memory Extension (TME) Intrinsics */
-#if defined(__ARM_FEATURE_TME) && __ARM_FEATURE_TME
-
-#define _TMFAILURE_REASON  0x00007fffu
-#define _TMFAILURE_RTRY    0x00008000u
-#define _TMFAILURE_CNCL    0x00010000u
-#define _TMFAILURE_MEM     0x00020000u
-#define _TMFAILURE_IMP     0x00040000u
-#define _TMFAILURE_ERR     0x00080000u
-#define _TMFAILURE_SIZE    0x00100000u
-#define _TMFAILURE_NEST    0x00200000u
-#define _TMFAILURE_DBG     0x00400000u
-#define _TMFAILURE_INT     0x00800000u
-#define _TMFAILURE_TRIVIAL 0x01000000u
-
-#define __tstart()        __builtin_arm_tstart()
-#define __tcommit()       __builtin_arm_tcommit()
-#define __tcancel(__arg)  __builtin_arm_tcancel(__arg)
-#define __ttest()         __builtin_arm_ttest()
-
-#endif /* __ARM_FEATURE_TME */
-
 /* 8.7 Armv8.5-A Random number generation intrinsics */
 #if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
 static __inline__ int __attribute__((__always_inline__, __nodebug__, target("rand")))
diff --git a/lib/include/arm_neon.h b/lib/include/arm_neon.h
index 476158a2cb..392184c61b 100644
--- a/lib/include/arm_neon.h
+++ b/lib/include/arm_neon.h
@@ -10562,7 +10562,7 @@ __ai __attribute__((target("neon"))) int16x4_t __noswap_vget_high_s16(int16x8_t
 #define vget_lane_p8(__p0, __p1) __extension__ ({ \
   poly8_t __ret; \
   poly8x8_t __s0 = __p0; \
-  __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vget_lane_i8(__s0, __p1)); \
+  __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vget_lane_i8(__builtin_bit_cast(int8x8_t, __s0), __p1)); \
   __ret; \
 })
 #else
@@ -10570,13 +10570,13 @@ __ai __attribute__((target("neon"))) int16x4_t __noswap_vget_high_s16(int16x8_t
   poly8_t __ret; \
   poly8x8_t __s0 = __p0; \
   poly8x8_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \
-  __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vget_lane_i8(__rev0, __p1)); \
+  __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vget_lane_i8(__builtin_bit_cast(int8x8_t, __rev0), __p1)); \
   __ret; \
 })
 #define __noswap_vget_lane_p8(__p0, __p1) __extension__ ({ \
   poly8_t __ret; \
   poly8x8_t __s0 = __p0; \
-  __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vget_lane_i8(__s0, __p1)); \
+  __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vget_lane_i8(__builtin_bit_cast(int8x8_t, __s0), __p1)); \
   __ret; \
 })
 #endif
@@ -10585,7 +10585,7 @@ __ai __attribute__((target("neon"))) int16x4_t __noswap_vget_high_s16(int16x8_t
 #define vget_lane_p16(__p0, __p1) __extension__ ({ \
   poly16_t __ret; \
   poly16x4_t __s0 = __p0; \
-  __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vget_lane_i16(__s0, __p1)); \
+  __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vget_lane_i16(__builtin_bit_cast(int16x4_t, __s0), __p1)); \
   __ret; \
 })
 #else
@@ -10593,13 +10593,13 @@ __ai __attribute__((target("neon"))) int16x4_t __noswap_vget_high_s16(int16x8_t
   poly16_t __ret; \
   poly16x4_t __s0 = __p0; \
   poly16x4_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \
-  __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vget_lane_i16(__rev0, __p1)); \
+  __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vget_lane_i16(__builtin_bit_cast(int16x4_t, __rev0), __p1)); \
   __ret; \
 })
 #define __noswap_vget_lane_p16(__p0, __p1) __extension__ ({ \
   poly16_t __ret; \
   poly16x4_t __s0 = __p0; \
-  __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vget_lane_i16(__s0, __p1)); \
+  __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vget_lane_i16(__builtin_bit_cast(int16x4_t, __s0), __p1)); \
   __ret; \
 })
 #endif
@@ -10608,7 +10608,7 @@ __ai __attribute__((target("neon"))) int16x4_t __noswap_vget_high_s16(int16x8_t
 #define vgetq_lane_p8(__p0, __p1) __extension__ ({ \
   poly8_t __ret; \
   poly8x16_t __s0 = __p0; \
-  __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vgetq_lane_i8(__s0, __p1)); \
+  __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vgetq_lane_i8(__builtin_bit_cast(int8x16_t, __s0), __p1)); \
   __ret; \
 })
 #else
@@ -10616,13 +10616,13 @@ __ai __attribute__((target("neon"))) int16x4_t __noswap_vget_high_s16(int16x8_t
   poly8_t __ret; \
   poly8x16_t __s0 = __p0; \
   poly8x16_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \
-  __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vgetq_lane_i8(__rev0, __p1)); \
+  __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vgetq_lane_i8(__builtin_bit_cast(int8x16_t, __rev0), __p1)); \
   __ret; \
 })
 #define __noswap_vgetq_lane_p8(__p0, __p1) __extension__ ({ \
   poly8_t __ret; \
   poly8x16_t __s0 = __p0; \
-  __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vgetq_lane_i8(__s0, __p1)); \
+  __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vgetq_lane_i8(__builtin_bit_cast(int8x16_t, __s0), __p1)); \
   __ret; \
 })
 #endif
@@ -10631,7 +10631,7 @@ __ai __attribute__((target("neon"))) int16x4_t __noswap_vget_high_s16(int16x8_t
 #define vgetq_lane_p16(__p0, __p1) __extension__ ({ \
   poly16_t __ret; \
   poly16x8_t __s0 = __p0; \
-  __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vgetq_lane_i16(__s0, __p1)); \
+  __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vgetq_lane_i16(__builtin_bit_cast(int16x8_t, __s0), __p1)); \
   __ret; \
 })
 #else
@@ -10639,13 +10639,13 @@ __ai __attribute__((target("neon"))) int16x4_t __noswap_vget_high_s16(int16x8_t
   poly16_t __ret; \
   poly16x8_t __s0 = __p0; \
   poly16x8_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \
-  __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vgetq_lane_i16(__rev0, __p1)); \
+  __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vgetq_lane_i16(__builtin_bit_cast(int16x8_t, __rev0), __p1)); \
   __ret; \
 })
 #define __noswap_vgetq_lane_p16(__p0, __p1) __extension__ ({ \
   poly16_t __ret; \
   poly16x8_t __s0 = __p0; \
-  __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vgetq_lane_i16(__s0, __p1)); \
+  __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vgetq_lane_i16(__builtin_bit_cast(int16x8_t, __s0), __p1)); \
   __ret; \
 })
 #endif
@@ -27027,7 +27027,7 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p
   poly8x8_t __ret; \
   poly8_t __s0 = __p0; \
   poly8x8_t __s1 = __p1; \
-  __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vset_lane_i8(__s0, __s1, __p2)); \
+  __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vset_lane_i8(__s0, __builtin_bit_cast(int8x8_t, __s1), __p2)); \
   __ret; \
 })
 #else
@@ -27036,7 +27036,7 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p
   poly8_t __s0 = __p0; \
   poly8x8_t __s1 = __p1; \
   poly8x8_t __rev1;  __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \
-  __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vset_lane_i8(__s0, __rev1, __p2)); \
+  __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vset_lane_i8(__s0, __builtin_bit_cast(int8x8_t, __rev1), __p2)); \
   __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \
   __ret; \
 })
@@ -27044,7 +27044,7 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p
   poly8x8_t __ret; \
   poly8_t __s0 = __p0; \
   poly8x8_t __s1 = __p1; \
-  __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vset_lane_i8(__s0, __s1, __p2)); \
+  __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vset_lane_i8(__s0, __builtin_bit_cast(int8x8_t, __s1), __p2)); \
   __ret; \
 })
 #endif
@@ -27054,7 +27054,7 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p
   poly16x4_t __ret; \
   poly16_t __s0 = __p0; \
   poly16x4_t __s1 = __p1; \
-  __ret = __builtin_bit_cast(poly16x4_t, __builtin_neon_vset_lane_i16(__s0, __s1, __p2)); \
+  __ret = __builtin_bit_cast(poly16x4_t, __builtin_neon_vset_lane_i16(__s0, __builtin_bit_cast(int16x4_t, __s1), __p2)); \
   __ret; \
 })
 #else
@@ -27063,7 +27063,7 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p
   poly16_t __s0 = __p0; \
   poly16x4_t __s1 = __p1; \
   poly16x4_t __rev1;  __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_16); \
-  __ret = __builtin_bit_cast(poly16x4_t, __builtin_neon_vset_lane_i16(__s0, __rev1, __p2)); \
+  __ret = __builtin_bit_cast(poly16x4_t, __builtin_neon_vset_lane_i16(__s0, __builtin_bit_cast(int16x4_t, __rev1), __p2)); \
   __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \
   __ret; \
 })
@@ -27071,7 +27071,7 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p
   poly16x4_t __ret; \
   poly16_t __s0 = __p0; \
   poly16x4_t __s1 = __p1; \
-  __ret = __builtin_bit_cast(poly16x4_t, __builtin_neon_vset_lane_i16(__s0, __s1, __p2)); \
+  __ret = __builtin_bit_cast(poly16x4_t, __builtin_neon_vset_lane_i16(__s0, __builtin_bit_cast(int16x4_t, __s1), __p2)); \
   __ret; \
 })
 #endif
@@ -27081,7 +27081,7 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p
   poly8x16_t __ret; \
   poly8_t __s0 = __p0; \
   poly8x16_t __s1 = __p1; \
-  __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vsetq_lane_i8(__s0, __s1, __p2)); \
+  __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vsetq_lane_i8(__s0, __builtin_bit_cast(int8x16_t, __s1), __p2)); \
   __ret; \
 })
 #else
@@ -27090,7 +27090,7 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p
   poly8_t __s0 = __p0; \
   poly8x16_t __s1 = __p1; \
   poly8x16_t __rev1;  __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \
-  __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vsetq_lane_i8(__s0, __rev1, __p2)); \
+  __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vsetq_lane_i8(__s0, __builtin_bit_cast(int8x16_t, __rev1), __p2)); \
   __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \
   __ret; \
 })
@@ -27098,7 +27098,7 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p
   poly8x16_t __ret; \
   poly8_t __s0 = __p0; \
   poly8x16_t __s1 = __p1; \
-  __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vsetq_lane_i8(__s0, __s1, __p2)); \
+  __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vsetq_lane_i8(__s0, __builtin_bit_cast(int8x16_t, __s1), __p2)); \
   __ret; \
 })
 #endif
@@ -27108,7 +27108,7 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p
   poly16x8_t __ret; \
   poly16_t __s0 = __p0; \
   poly16x8_t __s1 = __p1; \
-  __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_vsetq_lane_i16(__s0, __s1, __p2)); \
+  __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_vsetq_lane_i16(__s0, __builtin_bit_cast(int16x8_t, __s1), __p2)); \
   __ret; \
 })
 #else
@@ -27117,7 +27117,7 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p
   poly16_t __s0 = __p0; \
   poly16x8_t __s1 = __p1; \
   poly16x8_t __rev1;  __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_16); \
-  __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_vsetq_lane_i16(__s0, __rev1, __p2)); \
+  __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_vsetq_lane_i16(__s0, __builtin_bit_cast(int16x8_t, __rev1), __p2)); \
   __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \
   __ret; \
 })
@@ -27125,7 +27125,7 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p
   poly16x8_t __ret; \
   poly16_t __s0 = __p0; \
   poly16x8_t __s1 = __p1; \
-  __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_vsetq_lane_i16(__s0, __s1, __p2)); \
+  __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_vsetq_lane_i16(__s0, __builtin_bit_cast(int16x8_t, __s1), __p2)); \
   __ret; \
 })
 #endif
@@ -41141,6 +41141,42 @@ __ai __attribute__((target("neon"))) float32x2_t vfms_f32(float32x2_t __p0, floa
 
 #endif
 #if defined(__aarch64__)
+#ifdef __LITTLE_ENDIAN__
+__ai __attribute__((target("f8f16mm,neon"))) float16x8_t vmmlaq_f16_mf8_fpm(float16x8_t __p0, mfloat8x16_t __p1, mfloat8x16_t __p2, fpm_t __p3) {
+  float16x8_t __ret;
+  __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vmmlaq_f16_mf8_fpm(__builtin_bit_cast(int8x16_t, __p0), __p1, __p2, __p3));
+  return __ret;
+}
+#else
+__ai __attribute__((target("f8f16mm,neon"))) float16x8_t vmmlaq_f16_mf8_fpm(float16x8_t __p0, mfloat8x16_t __p1, mfloat8x16_t __p2, fpm_t __p3) {
+  float16x8_t __ret;
+  float16x8_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16);
+  mfloat8x16_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8);
+  mfloat8x16_t __rev2;  __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8);
+  __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vmmlaq_f16_mf8_fpm(__builtin_bit_cast(int8x16_t, __rev0), __rev1, __rev2, __p3));
+  __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16);
+  return __ret;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+__ai __attribute__((target("f8f32mm,neon"))) float32x4_t vmmlaq_f32_mf8_fpm(float32x4_t __p0, mfloat8x16_t __p1, mfloat8x16_t __p2, fpm_t __p3) {
+  float32x4_t __ret;
+  __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vmmlaq_f32_mf8_fpm(__p0, __p1, __p2, __p3));
+  return __ret;
+}
+#else
+__ai __attribute__((target("f8f32mm,neon"))) float32x4_t vmmlaq_f32_mf8_fpm(float32x4_t __p0, mfloat8x16_t __p1, mfloat8x16_t __p2, fpm_t __p3) {
+  float32x4_t __ret;
+  float32x4_t __rev0;  __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32);
+  mfloat8x16_t __rev1;  __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8);
+  mfloat8x16_t __rev2;  __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8);
+  __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vmmlaq_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3));
+  __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32);
+  return __ret;
+}
+#endif
+
 #ifdef __LITTLE_ENDIAN__
 __ai __attribute__((target("fp8,neon"))) bfloat16x8_t vcvt1_bf16_mf8_fpm(mfloat8x8_t __p0, fpm_t __p1) {
   bfloat16x8_t __ret;
@@ -49847,6 +49883,16 @@ __ai __attribute__((target("neon"))) int32_t vcvts_s32_f32(float32_t __p0) {
   __ret = __builtin_bit_cast(int32_t, __builtin_neon_vcvts_s32_f32(__p0));
   return __ret;
 }
+__ai __attribute__((target("neon"))) int32_t vcvtd_s32_f64(float64_t __p0) {
+  int32_t __ret;
+  __ret = __builtin_bit_cast(int32_t, __builtin_neon_vcvtd_s32_f64(__p0));
+  return __ret;
+}
+__ai __attribute__((target("neon"))) int64_t vcvts_s64_f32(float32_t __p0) {
+  int64_t __ret;
+  __ret = __builtin_bit_cast(int64_t, __builtin_neon_vcvts_s64_f32(__p0));
+  return __ret;
+}
 __ai __attribute__((target("neon"))) int64_t vcvtd_s64_f64(float64_t __p0) {
   int64_t __ret;
   __ret = __builtin_bit_cast(int64_t, __builtin_neon_vcvtd_s64_f64(__p0));
@@ -49878,6 +49924,16 @@ __ai __attribute__((target("neon"))) uint32_t vcvts_u32_f32(float32_t __p0) {
   __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vcvts_u32_f32(__p0));
   return __ret;
 }
+__ai __attribute__((target("neon"))) uint32_t vcvtd_u32_f64(float64_t __p0) {
+  uint32_t __ret;
+  __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vcvtd_u32_f64(__p0));
+  return __ret;
+}
+__ai __attribute__((target("neon"))) uint64_t vcvts_u64_f32(float32_t __p0) {
+  uint64_t __ret;
+  __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcvts_u64_f32(__p0));
+  return __ret;
+}
 __ai __attribute__((target("neon"))) uint64_t vcvtd_u64_f64(float64_t __p0) {
   uint64_t __ret;
   __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcvtd_u64_f64(__p0));
@@ -49909,6 +49965,11 @@ __ai __attribute__((target("neon"))) int32_t vcvtas_s32_f32(float32_t __p0) {
   __ret = __builtin_bit_cast(int32_t, __builtin_neon_vcvtas_s32_f32(__p0));
   return __ret;
 }
+__ai __attribute__((target("neon"))) int32_t vcvtad_s32_f64(float64_t __p0) {
+  int32_t __ret;
+  __ret = __builtin_bit_cast(int32_t, __builtin_neon_vcvtad_s32_f64(__p0));
+  return __ret;
+}
 #ifdef __LITTLE_ENDIAN__
 __ai __attribute__((target("neon"))) int64x2_t vcvtaq_s64_f64(float64x2_t __p0) {
   int64x2_t __ret;
@@ -49930,6 +49991,11 @@ __ai __attribute__((target("neon"))) int64x1_t vcvta_s64_f64(float64x1_t __p0) {
   __ret = __builtin_bit_cast(int64x1_t, __builtin_neon_vcvta_s64_v(__builtin_bit_cast(int8x8_t, __p0), 3));
   return __ret;
 }
+__ai __attribute__((target("neon"))) int64_t vcvtas_s64_f32(float32_t __p0) {
+  int64_t __ret;
+  __ret = __builtin_bit_cast(int64_t, __builtin_neon_vcvtas_s64_f32(__p0));
+  return __ret;
+}
 __ai __attribute__((target("neon"))) int64_t vcvtad_s64_f64(float64_t __p0) {
   int64_t __ret;
   __ret = __builtin_bit_cast(int64_t, __builtin_neon_vcvtad_s64_f64(__p0));
@@ -49940,6 +50006,11 @@ __ai __attribute__((target("neon"))) uint32_t vcvtas_u32_f32(float32_t __p0) {
   __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vcvtas_u32_f32(__p0));
   return __ret;
 }
+__ai __attribute__((target("neon"))) uint32_t vcvtad_u32_f64(float64_t __p0) {
+  uint32_t __ret;
+  __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vcvtad_u32_f64(__p0));
+  return __ret;
+}
 #ifdef __LITTLE_ENDIAN__
 __ai __attribute__((target("neon"))) uint64x2_t vcvtaq_u64_f64(float64x2_t __p0) {
   uint64x2_t __ret;
@@ -49961,6 +50032,11 @@ __ai __attribute__((target("neon"))) uint64x1_t vcvta_u64_f64(float64x1_t __p0)
   __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vcvta_u64_v(__builtin_bit_cast(int8x8_t, __p0), 19));
   return __ret;
 }
+__ai __attribute__((target("neon"))) uint64_t vcvtas_u64_f32(float32_t __p0) {
+  uint64_t __ret;
+  __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcvtas_u64_f32(__p0));
+  return __ret;
+}
 __ai __attribute__((target("neon"))) uint64_t vcvtad_u64_f64(float64_t __p0) {
   uint64_t __ret;
   __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcvtad_u64_f64(__p0));
@@ -49971,6 +50047,11 @@ __ai __attribute__((target("neon"))) int32_t vcvtms_s32_f32(float32_t __p0) {
   __ret = __builtin_bit_cast(int32_t, __builtin_neon_vcvtms_s32_f32(__p0));
   return __ret;
 }
+__ai __attribute__((target("neon"))) int32_t vcvtmd_s32_f64(float64_t __p0) {
+  int32_t __ret;
+  __ret = __builtin_bit_cast(int32_t, __builtin_neon_vcvtmd_s32_f64(__p0));
+  return __ret;
+}
 #ifdef __LITTLE_ENDIAN__
 __ai __attribute__((target("neon"))) int64x2_t vcvtmq_s64_f64(float64x2_t __p0) {
   int64x2_t __ret;
@@ -49992,6 +50073,11 @@ __ai __attribute__((target("neon"))) int64x1_t vcvtm_s64_f64(float64x1_t __p0) {
   __ret = __builtin_bit_cast(int64x1_t, __builtin_neon_vcvtm_s64_v(__builtin_bit_cast(int8x8_t, __p0), 3));
   return __ret;
 }
+__ai __attribute__((target("neon"))) int64_t vcvtms_s64_f32(float32_t __p0) {
+  int64_t __ret;
+  __ret = __builtin_bit_cast(int64_t, __builtin_neon_vcvtms_s64_f32(__p0));
+  return __ret;
+}
 __ai __attribute__((target("neon"))) int64_t vcvtmd_s64_f64(float64_t __p0) {
   int64_t __ret;
   __ret = __builtin_bit_cast(int64_t, __builtin_neon_vcvtmd_s64_f64(__p0));
@@ -50002,6 +50088,11 @@ __ai __attribute__((target("neon"))) uint32_t vcvtms_u32_f32(float32_t __p0) {
   __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vcvtms_u32_f32(__p0));
   return __ret;
 }
+__ai __attribute__((target("neon"))) uint32_t vcvtmd_u32_f64(float64_t __p0) {
+  uint32_t __ret;
+  __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vcvtmd_u32_f64(__p0));
+  return __ret;
+}
 #ifdef __LITTLE_ENDIAN__
 __ai __attribute__((target("neon"))) uint64x2_t vcvtmq_u64_f64(float64x2_t __p0) {
   uint64x2_t __ret;
@@ -50023,6 +50114,11 @@ __ai __attribute__((target("neon"))) uint64x1_t vcvtm_u64_f64(float64x1_t __p0)
   __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vcvtm_u64_v(__builtin_bit_cast(int8x8_t, __p0), 19));
   return __ret;
 }
+__ai __attribute__((target("neon"))) uint64_t vcvtms_u64_f32(float32_t __p0) {
+  uint64_t __ret;
+  __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcvtms_u64_f32(__p0));
+  return __ret;
+}
 __ai __attribute__((target("neon"))) uint64_t vcvtmd_u64_f64(float64_t __p0) {
   uint64_t __ret;
   __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcvtmd_u64_f64(__p0));
@@ -50033,6 +50129,11 @@ __ai __attribute__((target("neon"))) int32_t vcvtns_s32_f32(float32_t __p0) {
   __ret = __builtin_bit_cast(int32_t, __builtin_neon_vcvtns_s32_f32(__p0));
   return __ret;
 }
+__ai __attribute__((target("neon"))) int32_t vcvtnd_s32_f64(float64_t __p0) {
+  int32_t __ret;
+  __ret = __builtin_bit_cast(int32_t, __builtin_neon_vcvtnd_s32_f64(__p0));
+  return __ret;
+}
 #ifdef __LITTLE_ENDIAN__
 __ai __attribute__((target("neon"))) int64x2_t vcvtnq_s64_f64(float64x2_t __p0) {
   int64x2_t __ret;
@@ -50054,6 +50155,11 @@ __ai __attribute__((target("neon"))) int64x1_t vcvtn_s64_f64(float64x1_t __p0) {
   __ret = __builtin_bit_cast(int64x1_t, __builtin_neon_vcvtn_s64_v(__builtin_bit_cast(int8x8_t, __p0), 3));
   return __ret;
 }
+__ai __attribute__((target("neon"))) int64_t vcvtns_s64_f32(float32_t __p0) {
+  int64_t __ret;
+  __ret = __builtin_bit_cast(int64_t, __builtin_neon_vcvtns_s64_f32(__p0));
+  return __ret;
+}
 __ai __attribute__((target("neon"))) int64_t vcvtnd_s64_f64(float64_t __p0) {
   int64_t __ret;
   __ret = __builtin_bit_cast(int64_t, __builtin_neon_vcvtnd_s64_f64(__p0));
@@ -50064,6 +50170,11 @@ __ai __attribute__((target("neon"))) uint32_t vcvtns_u32_f32(float32_t __p0) {
   __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vcvtns_u32_f32(__p0));
   return __ret;
 }
+__ai __attribute__((target("neon"))) uint32_t vcvtnd_u32_f64(float64_t __p0) {
+  uint32_t __ret;
+  __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vcvtnd_u32_f64(__p0));
+  return __ret;
+}
 #ifdef __LITTLE_ENDIAN__
 __ai __attribute__((target("neon"))) uint64x2_t vcvtnq_u64_f64(float64x2_t __p0) {
   uint64x2_t __ret;
@@ -50085,6 +50196,11 @@ __ai __attribute__((target("neon"))) uint64x1_t vcvtn_u64_f64(float64x1_t __p0)
   __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vcvtn_u64_v(__builtin_bit_cast(int8x8_t, __p0), 19));
   return __ret;
 }
+__ai __attribute__((target("neon"))) uint64_t vcvtns_u64_f32(float32_t __p0) {
+  uint64_t __ret;
+  __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcvtns_u64_f32(__p0));
+  return __ret;
+}
 __ai __attribute__((target("neon"))) uint64_t vcvtnd_u64_f64(float64_t __p0) {
   uint64_t __ret;
   __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcvtnd_u64_f64(__p0));
@@ -50095,6 +50211,11 @@ __ai __attribute__((target("neon"))) int32_t vcvtps_s32_f32(float32_t __p0) {
   __ret = __builtin_bit_cast(int32_t, __builtin_neon_vcvtps_s32_f32(__p0));
   return __ret;
 }
+__ai __attribute__((target("neon"))) int32_t vcvtpd_s32_f64(float64_t __p0) {
+  int32_t __ret;
+  __ret = __builtin_bit_cast(int32_t, __builtin_neon_vcvtpd_s32_f64(__p0));
+  return __ret;
+}
 #ifdef __LITTLE_ENDIAN__
 __ai __attribute__((target("neon"))) int64x2_t vcvtpq_s64_f64(float64x2_t __p0) {
   int64x2_t __ret;
@@ -50116,6 +50237,11 @@ __ai __attribute__((target("neon"))) int64x1_t vcvtp_s64_f64(float64x1_t __p0) {
   __ret = __builtin_bit_cast(int64x1_t, __builtin_neon_vcvtp_s64_v(__builtin_bit_cast(int8x8_t, __p0), 3));
   return __ret;
 }
+__ai __attribute__((target("neon"))) int64_t vcvtps_s64_f32(float32_t __p0) {
+  int64_t __ret;
+  __ret = __builtin_bit_cast(int64_t, __builtin_neon_vcvtps_s64_f32(__p0));
+  return __ret;
+}
 __ai __attribute__((target("neon"))) int64_t vcvtpd_s64_f64(float64_t __p0) {
   int64_t __ret;
   __ret = __builtin_bit_cast(int64_t, __builtin_neon_vcvtpd_s64_f64(__p0));
@@ -50126,6 +50252,11 @@ __ai __attribute__((target("neon"))) uint32_t vcvtps_u32_f32(float32_t __p0) {
   __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vcvtps_u32_f32(__p0));
   return __ret;
 }
+__ai __attribute__((target("neon"))) uint32_t vcvtpd_u32_f64(float64_t __p0) {
+  uint32_t __ret;
+  __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vcvtpd_u32_f64(__p0));
+  return __ret;
+}
 #ifdef __LITTLE_ENDIAN__
 __ai __attribute__((target("neon"))) uint64x2_t vcvtpq_u64_f64(float64x2_t __p0) {
   uint64x2_t __ret;
@@ -50147,6 +50278,11 @@ __ai __attribute__((target("neon"))) uint64x1_t vcvtp_u64_f64(float64x1_t __p0)
   __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vcvtp_u64_v(__builtin_bit_cast(int8x8_t, __p0), 19));
   return __ret;
 }
+__ai __attribute__((target("neon"))) uint64_t vcvtps_u64_f32(float32_t __p0) {
+  uint64_t __ret;
+  __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcvtps_u64_f32(__p0));
+  return __ret;
+}
 __ai __attribute__((target("neon"))) uint64_t vcvtpd_u64_f64(float64_t __p0) {
   uint64_t __ret;
   __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcvtpd_u64_f64(__p0));
@@ -50255,7 +50391,7 @@ __ai __attribute__((target("neon"))) float32x2_t vdiv_f32(float32x2_t __p0, floa
 #define vdupb_lane_p8(__p0, __p1) __extension__ ({ \
   poly8_t __ret; \
   poly8x8_t __s0 = __p0; \
-  __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vdupb_lane_i8(__s0, __p1)); \
+  __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vdupb_lane_i8(__builtin_bit_cast(int8x8_t, __s0), __p1)); \
   __ret; \
 })
 #else
@@ -50263,7 +50399,7 @@ __ai __attribute__((target("neon"))) float32x2_t vdiv_f32(float32x2_t __p0, floa
   poly8_t __ret; \
   poly8x8_t __s0 = __p0; \
   poly8x8_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \
-  __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vdupb_lane_i8(__rev0, __p1)); \
+  __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vdupb_lane_i8(__builtin_bit_cast(int8x8_t, __rev0), __p1)); \
   __ret; \
 })
 #endif
@@ -50272,7 +50408,7 @@ __ai __attribute__((target("neon"))) float32x2_t vdiv_f32(float32x2_t __p0, floa
 #define vduph_lane_p16(__p0, __p1) __extension__ ({ \
   poly16_t __ret; \
   poly16x4_t __s0 = __p0; \
-  __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vduph_lane_i16(__s0, __p1)); \
+  __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vduph_lane_i16(__builtin_bit_cast(int16x4_t, __s0), __p1)); \
   __ret; \
 })
 #else
@@ -50280,7 +50416,7 @@ __ai __attribute__((target("neon"))) float32x2_t vdiv_f32(float32x2_t __p0, floa
   poly16_t __ret; \
   poly16x4_t __s0 = __p0; \
   poly16x4_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \
-  __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vduph_lane_i16(__rev0, __p1)); \
+  __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vduph_lane_i16(__builtin_bit_cast(int16x4_t, __rev0), __p1)); \
   __ret; \
 })
 #endif
@@ -50506,7 +50642,7 @@ __ai __attribute__((target("neon"))) float32x2_t vdiv_f32(float32x2_t __p0, floa
 #define vdupb_laneq_p8(__p0, __p1) __extension__ ({ \
   poly8_t __ret; \
   poly8x16_t __s0 = __p0; \
-  __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vdupb_laneq_i8(__s0, __p1)); \
+  __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vdupb_laneq_i8(__builtin_bit_cast(int8x16_t, __s0), __p1)); \
   __ret; \
 })
 #else
@@ -50514,7 +50650,7 @@ __ai __attribute__((target("neon"))) float32x2_t vdiv_f32(float32x2_t __p0, floa
   poly8_t __ret; \
   poly8x16_t __s0 = __p0; \
   poly8x16_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \
-  __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vdupb_laneq_i8(__rev0, __p1)); \
+  __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vdupb_laneq_i8(__builtin_bit_cast(int8x16_t, __rev0), __p1)); \
   __ret; \
 })
 #endif
@@ -50523,7 +50659,7 @@ __ai __attribute__((target("neon"))) float32x2_t vdiv_f32(float32x2_t __p0, floa
 #define vduph_laneq_p16(__p0, __p1) __extension__ ({ \
   poly16_t __ret; \
   poly16x8_t __s0 = __p0; \
-  __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vduph_laneq_i16(__s0, __p1)); \
+  __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vduph_laneq_i16(__builtin_bit_cast(int16x8_t, __s0), __p1)); \
   __ret; \
 })
 #else
@@ -50531,7 +50667,7 @@ __ai __attribute__((target("neon"))) float32x2_t vdiv_f32(float32x2_t __p0, floa
   poly16_t __ret; \
   poly16x8_t __s0 = __p0; \
   poly16x8_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \
-  __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vduph_laneq_i16(__rev0, __p1)); \
+  __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vduph_laneq_i16(__builtin_bit_cast(int16x8_t, __rev0), __p1)); \
   __ret; \
 })
 #endif
@@ -52105,14 +52241,14 @@ __ai __attribute__((target("neon"))) float64x1_t vget_high_f64(float64x2_t __p0)
 #define vget_lane_p64(__p0, __p1) __extension__ ({ \
   poly64_t __ret; \
   poly64x1_t __s0 = __p0; \
-  __ret = __builtin_bit_cast(poly64_t, __builtin_neon_vget_lane_i64(__s0, __p1)); \
+  __ret = __builtin_bit_cast(poly64_t, __builtin_neon_vget_lane_i64(__builtin_bit_cast(int64x1_t, __s0), __p1)); \
   __ret; \
 })
 #ifdef __LITTLE_ENDIAN__
 #define vgetq_lane_p64(__p0, __p1) __extension__ ({ \
   poly64_t __ret; \
   poly64x2_t __s0 = __p0; \
-  __ret = __builtin_bit_cast(poly64_t, __builtin_neon_vgetq_lane_i64(__s0, __p1)); \
+  __ret = __builtin_bit_cast(poly64_t, __builtin_neon_vgetq_lane_i64(__builtin_bit_cast(int64x2_t, __s0), __p1)); \
   __ret; \
 })
 #else
@@ -52120,13 +52256,13 @@ __ai __attribute__((target("neon"))) float64x1_t vget_high_f64(float64x2_t __p0)
   poly64_t __ret; \
   poly64x2_t __s0 = __p0; \
   poly64x2_t __rev0;  __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \
-  __ret = __builtin_bit_cast(poly64_t, __builtin_neon_vgetq_lane_i64(__rev0, __p1)); \
+  __ret = __builtin_bit_cast(poly64_t, __builtin_neon_vgetq_lane_i64(__builtin_bit_cast(int64x2_t, __rev0), __p1)); \
   __ret; \
 })
 #define __noswap_vgetq_lane_p64(__p0, __p1) __extension__ ({ \
   poly64_t __ret; \
   poly64x2_t __s0 = __p0; \
-  __ret = __builtin_bit_cast(poly64_t, __builtin_neon_vgetq_lane_i64(__s0, __p1)); \
+  __ret = __builtin_bit_cast(poly64_t, __builtin_neon_vgetq_lane_i64(__builtin_bit_cast(int64x2_t, __s0), __p1)); \
   __ret; \
 })
 #endif
@@ -59743,20 +59879,20 @@ __ai __attribute__((target("neon"))) int16_t vqrshlh_s16(int16_t __p0, int16_t _
 })
 #ifdef __LITTLE_ENDIAN__
 #define vqrshrun_high_n_s32(__p0_716, __p1_716, __p2_716) __extension__ ({ \
-  int16x8_t __ret_716; \
-  int16x4_t __s0_716 = __p0_716; \
+  uint16x8_t __ret_716; \
+  uint16x4_t __s0_716 = __p0_716; \
   int32x4_t __s1_716 = __p1_716; \
-  __ret_716 = __builtin_bit_cast(int16x8_t, vcombine_s16(__builtin_bit_cast(int16x4_t, __s0_716), __builtin_bit_cast(int16x4_t, vqrshrun_n_s32(__s1_716, __p2_716)))); \
+  __ret_716 = __builtin_bit_cast(uint16x8_t, vcombine_u16(__builtin_bit_cast(uint16x4_t, __s0_716), __builtin_bit_cast(uint16x4_t, vqrshrun_n_s32(__s1_716, __p2_716)))); \
   __ret_716; \
 })
 #else
 #define vqrshrun_high_n_s32(__p0_717, __p1_717, __p2_717) __extension__ ({ \
-  int16x8_t __ret_717; \
-  int16x4_t __s0_717 = __p0_717; \
+  uint16x8_t __ret_717; \
+  uint16x4_t __s0_717 = __p0_717; \
   int32x4_t __s1_717 = __p1_717; \
-  int16x4_t __rev0_717;  __rev0_717 = __builtin_shufflevector(__s0_717, __s0_717, __lane_reverse_64_16); \
+  uint16x4_t __rev0_717;  __rev0_717 = __builtin_shufflevector(__s0_717, __s0_717, __lane_reverse_64_16); \
   int32x4_t __rev1_717;  __rev1_717 = __builtin_shufflevector(__s1_717, __s1_717, __lane_reverse_128_32); \
-  __ret_717 = __builtin_bit_cast(int16x8_t, __noswap_vcombine_s16(__builtin_bit_cast(int16x4_t, __rev0_717), __builtin_bit_cast(int16x4_t, __noswap_vqrshrun_n_s32(__rev1_717, __p2_717)))); \
+  __ret_717 = __builtin_bit_cast(uint16x8_t, __noswap_vcombine_u16(__builtin_bit_cast(uint16x4_t, __rev0_717), __builtin_bit_cast(uint16x4_t, __noswap_vqrshrun_n_s32(__rev1_717, __p2_717)))); \
   __ret_717 = __builtin_shufflevector(__ret_717, __ret_717, __lane_reverse_128_16); \
   __ret_717; \
 })
@@ -59764,20 +59900,20 @@ __ai __attribute__((target("neon"))) int16_t vqrshlh_s16(int16_t __p0, int16_t _
 
 #ifdef __LITTLE_ENDIAN__
 #define vqrshrun_high_n_s64(__p0_718, __p1_718, __p2_718) __extension__ ({ \
-  int32x4_t __ret_718; \
-  int32x2_t __s0_718 = __p0_718; \
+  uint32x4_t __ret_718; \
+  uint32x2_t __s0_718 = __p0_718; \
   int64x2_t __s1_718 = __p1_718; \
-  __ret_718 = __builtin_bit_cast(int32x4_t, vcombine_s32(__builtin_bit_cast(int32x2_t, __s0_718), __builtin_bit_cast(int32x2_t, vqrshrun_n_s64(__s1_718, __p2_718)))); \
+  __ret_718 = __builtin_bit_cast(uint32x4_t, vcombine_u32(__builtin_bit_cast(uint32x2_t, __s0_718), __builtin_bit_cast(uint32x2_t, vqrshrun_n_s64(__s1_718, __p2_718)))); \
   __ret_718; \
 })
 #else
 #define vqrshrun_high_n_s64(__p0_719, __p1_719, __p2_719) __extension__ ({ \
-  int32x4_t __ret_719; \
-  int32x2_t __s0_719 = __p0_719; \
+  uint32x4_t __ret_719; \
+  uint32x2_t __s0_719 = __p0_719; \
   int64x2_t __s1_719 = __p1_719; \
-  int32x2_t __rev0_719;  __rev0_719 = __builtin_shufflevector(__s0_719, __s0_719, __lane_reverse_64_32); \
+  uint32x2_t __rev0_719;  __rev0_719 = __builtin_shufflevector(__s0_719, __s0_719, __lane_reverse_64_32); \
   int64x2_t __rev1_719;  __rev1_719 = __builtin_shufflevector(__s1_719, __s1_719, __lane_reverse_128_64); \
-  __ret_719 = __builtin_bit_cast(int32x4_t, __noswap_vcombine_s32(__builtin_bit_cast(int32x2_t, __rev0_719), __builtin_bit_cast(int32x2_t, __noswap_vqrshrun_n_s64(__rev1_719, __p2_719)))); \
+  __ret_719 = __builtin_bit_cast(uint32x4_t, __noswap_vcombine_u32(__builtin_bit_cast(uint32x2_t, __rev0_719), __builtin_bit_cast(uint32x2_t, __noswap_vqrshrun_n_s64(__rev1_719, __p2_719)))); \
   __ret_719 = __builtin_shufflevector(__ret_719, __ret_719, __lane_reverse_128_32); \
   __ret_719; \
 })
@@ -59785,20 +59921,20 @@ __ai __attribute__((target("neon"))) int16_t vqrshlh_s16(int16_t __p0, int16_t _
 
 #ifdef __LITTLE_ENDIAN__
 #define vqrshrun_high_n_s16(__p0_720, __p1_720, __p2_720) __extension__ ({ \
-  int8x16_t __ret_720; \
-  int8x8_t __s0_720 = __p0_720; \
+  uint8x16_t __ret_720; \
+  uint8x8_t __s0_720 = __p0_720; \
   int16x8_t __s1_720 = __p1_720; \
-  __ret_720 = __builtin_bit_cast(int8x16_t, vcombine_s8(__builtin_bit_cast(int8x8_t, __s0_720), __builtin_bit_cast(int8x8_t, vqrshrun_n_s16(__s1_720, __p2_720)))); \
+  __ret_720 = __builtin_bit_cast(uint8x16_t, vcombine_u8(__builtin_bit_cast(uint8x8_t, __s0_720), __builtin_bit_cast(uint8x8_t, vqrshrun_n_s16(__s1_720, __p2_720)))); \
   __ret_720; \
 })
 #else
 #define vqrshrun_high_n_s16(__p0_721, __p1_721, __p2_721) __extension__ ({ \
-  int8x16_t __ret_721; \
-  int8x8_t __s0_721 = __p0_721; \
+  uint8x16_t __ret_721; \
+  uint8x8_t __s0_721 = __p0_721; \
   int16x8_t __s1_721 = __p1_721; \
-  int8x8_t __rev0_721;  __rev0_721 = __builtin_shufflevector(__s0_721, __s0_721, __lane_reverse_64_8); \
+  uint8x8_t __rev0_721;  __rev0_721 = __builtin_shufflevector(__s0_721, __s0_721, __lane_reverse_64_8); \
   int16x8_t __rev1_721;  __rev1_721 = __builtin_shufflevector(__s1_721, __s1_721, __lane_reverse_128_16); \
-  __ret_721 = __builtin_bit_cast(int8x16_t, __noswap_vcombine_s8(__builtin_bit_cast(int8x8_t, __rev0_721), __builtin_bit_cast(int8x8_t, __noswap_vqrshrun_n_s16(__rev1_721, __p2_721)))); \
+  __ret_721 = __builtin_bit_cast(uint8x16_t, __noswap_vcombine_u8(__builtin_bit_cast(uint8x8_t, __rev0_721), __builtin_bit_cast(uint8x8_t, __noswap_vqrshrun_n_s16(__rev1_721, __p2_721)))); \
   __ret_721 = __builtin_shufflevector(__ret_721, __ret_721, __lane_reverse_128_8); \
   __ret_721; \
 })
@@ -60098,20 +60234,20 @@ __ai __attribute__((target("neon"))) int16_t vqshlh_s16(int16_t __p0, int16_t __
 })
 #ifdef __LITTLE_ENDIAN__
 #define vqshrun_high_n_s32(__p0_734, __p1_734, __p2_734) __extension__ ({ \
-  int16x8_t __ret_734; \
-  int16x4_t __s0_734 = __p0_734; \
+  uint16x8_t __ret_734; \
+  uint16x4_t __s0_734 = __p0_734; \
   int32x4_t __s1_734 = __p1_734; \
-  __ret_734 = __builtin_bit_cast(int16x8_t, vcombine_s16(__builtin_bit_cast(int16x4_t, __s0_734), __builtin_bit_cast(int16x4_t, vqshrun_n_s32(__s1_734, __p2_734)))); \
+  __ret_734 = __builtin_bit_cast(uint16x8_t, vcombine_u16(__builtin_bit_cast(uint16x4_t, __s0_734), __builtin_bit_cast(uint16x4_t, vqshrun_n_s32(__s1_734, __p2_734)))); \
   __ret_734; \
 })
 #else
 #define vqshrun_high_n_s32(__p0_735, __p1_735, __p2_735) __extension__ ({ \
-  int16x8_t __ret_735; \
-  int16x4_t __s0_735 = __p0_735; \
+  uint16x8_t __ret_735; \
+  uint16x4_t __s0_735 = __p0_735; \
   int32x4_t __s1_735 = __p1_735; \
-  int16x4_t __rev0_735;  __rev0_735 = __builtin_shufflevector(__s0_735, __s0_735, __lane_reverse_64_16); \
+  uint16x4_t __rev0_735;  __rev0_735 = __builtin_shufflevector(__s0_735, __s0_735, __lane_reverse_64_16); \
   int32x4_t __rev1_735;  __rev1_735 = __builtin_shufflevector(__s1_735, __s1_735, __lane_reverse_128_32); \
-  __ret_735 = __builtin_bit_cast(int16x8_t, __noswap_vcombine_s16(__builtin_bit_cast(int16x4_t, __rev0_735), __builtin_bit_cast(int16x4_t, __noswap_vqshrun_n_s32(__rev1_735, __p2_735)))); \
+  __ret_735 = __builtin_bit_cast(uint16x8_t, __noswap_vcombine_u16(__builtin_bit_cast(uint16x4_t, __rev0_735), __builtin_bit_cast(uint16x4_t, __noswap_vqshrun_n_s32(__rev1_735, __p2_735)))); \
   __ret_735 = __builtin_shufflevector(__ret_735, __ret_735, __lane_reverse_128_16); \
   __ret_735; \
 })
@@ -60119,20 +60255,20 @@ __ai __attribute__((target("neon"))) int16_t vqshlh_s16(int16_t __p0, int16_t __
 
 #ifdef __LITTLE_ENDIAN__
 #define vqshrun_high_n_s64(__p0_736, __p1_736, __p2_736) __extension__ ({ \
-  int32x4_t __ret_736; \
-  int32x2_t __s0_736 = __p0_736; \
+  uint32x4_t __ret_736; \
+  uint32x2_t __s0_736 = __p0_736; \
   int64x2_t __s1_736 = __p1_736; \
-  __ret_736 = __builtin_bit_cast(int32x4_t, vcombine_s32(__builtin_bit_cast(int32x2_t, __s0_736), __builtin_bit_cast(int32x2_t, vqshrun_n_s64(__s1_736, __p2_736)))); \
+  __ret_736 = __builtin_bit_cast(uint32x4_t, vcombine_u32(__builtin_bit_cast(uint32x2_t, __s0_736), __builtin_bit_cast(uint32x2_t, vqshrun_n_s64(__s1_736, __p2_736)))); \
   __ret_736; \
 })
 #else
 #define vqshrun_high_n_s64(__p0_737, __p1_737, __p2_737) __extension__ ({ \
-  int32x4_t __ret_737; \
-  int32x2_t __s0_737 = __p0_737; \
+  uint32x4_t __ret_737; \
+  uint32x2_t __s0_737 = __p0_737; \
   int64x2_t __s1_737 = __p1_737; \
-  int32x2_t __rev0_737;  __rev0_737 = __builtin_shufflevector(__s0_737, __s0_737, __lane_reverse_64_32); \
+  uint32x2_t __rev0_737;  __rev0_737 = __builtin_shufflevector(__s0_737, __s0_737, __lane_reverse_64_32); \
   int64x2_t __rev1_737;  __rev1_737 = __builtin_shufflevector(__s1_737, __s1_737, __lane_reverse_128_64); \
-  __ret_737 = __builtin_bit_cast(int32x4_t, __noswap_vcombine_s32(__builtin_bit_cast(int32x2_t, __rev0_737), __builtin_bit_cast(int32x2_t, __noswap_vqshrun_n_s64(__rev1_737, __p2_737)))); \
+  __ret_737 = __builtin_bit_cast(uint32x4_t, __noswap_vcombine_u32(__builtin_bit_cast(uint32x2_t, __rev0_737), __builtin_bit_cast(uint32x2_t, __noswap_vqshrun_n_s64(__rev1_737, __p2_737)))); \
   __ret_737 = __builtin_shufflevector(__ret_737, __ret_737, __lane_reverse_128_32); \
   __ret_737; \
 })
@@ -60140,20 +60276,20 @@ __ai __attribute__((target("neon"))) int16_t vqshlh_s16(int16_t __p0, int16_t __
 
 #ifdef __LITTLE_ENDIAN__
 #define vqshrun_high_n_s16(__p0_738, __p1_738, __p2_738) __extension__ ({ \
-  int8x16_t __ret_738; \
-  int8x8_t __s0_738 = __p0_738; \
+  uint8x16_t __ret_738; \
+  uint8x8_t __s0_738 = __p0_738; \
   int16x8_t __s1_738 = __p1_738; \
-  __ret_738 = __builtin_bit_cast(int8x16_t, vcombine_s8(__builtin_bit_cast(int8x8_t, __s0_738), __builtin_bit_cast(int8x8_t, vqshrun_n_s16(__s1_738, __p2_738)))); \
+  __ret_738 = __builtin_bit_cast(uint8x16_t, vcombine_u8(__builtin_bit_cast(uint8x8_t, __s0_738), __builtin_bit_cast(uint8x8_t, vqshrun_n_s16(__s1_738, __p2_738)))); \
   __ret_738; \
 })
 #else
 #define vqshrun_high_n_s16(__p0_739, __p1_739, __p2_739) __extension__ ({ \
-  int8x16_t __ret_739; \
-  int8x8_t __s0_739 = __p0_739; \
+  uint8x16_t __ret_739; \
+  uint8x8_t __s0_739 = __p0_739; \
   int16x8_t __s1_739 = __p1_739; \
-  int8x8_t __rev0_739;  __rev0_739 = __builtin_shufflevector(__s0_739, __s0_739, __lane_reverse_64_8); \
+  uint8x8_t __rev0_739;  __rev0_739 = __builtin_shufflevector(__s0_739, __s0_739, __lane_reverse_64_8); \
   int16x8_t __rev1_739;  __rev1_739 = __builtin_shufflevector(__s1_739, __s1_739, __lane_reverse_128_16); \
-  __ret_739 = __builtin_bit_cast(int8x16_t, __noswap_vcombine_s8(__builtin_bit_cast(int8x8_t, __rev0_739), __builtin_bit_cast(int8x8_t, __noswap_vqshrun_n_s16(__rev1_739, __p2_739)))); \
+  __ret_739 = __builtin_bit_cast(uint8x16_t, __noswap_vcombine_u8(__builtin_bit_cast(uint8x8_t, __rev0_739), __builtin_bit_cast(uint8x8_t, __noswap_vqshrun_n_s16(__rev1_739, __p2_739)))); \
   __ret_739 = __builtin_shufflevector(__ret_739, __ret_739, __lane_reverse_128_8); \
   __ret_739; \
 })
@@ -64345,7 +64481,7 @@ __ai __attribute__((target("neon"))) int8x16_t vrsubhn_high_s16(int8x8_t __p0, i
   poly64x1_t __ret; \
   poly64_t __s0 = __p0; \
   poly64x1_t __s1 = __p1; \
-  __ret = __builtin_bit_cast(poly64x1_t, __builtin_neon_vset_lane_i64(__s0, __s1, __p2)); \
+  __ret = __builtin_bit_cast(poly64x1_t, __builtin_neon_vset_lane_i64(__s0, __builtin_bit_cast(int64x1_t, __s1), __p2)); \
   __ret; \
 })
 #ifdef __LITTLE_ENDIAN__
@@ -64353,7 +64489,7 @@ __ai __attribute__((target("neon"))) int8x16_t vrsubhn_high_s16(int8x8_t __p0, i
   poly64x2_t __ret; \
   poly64_t __s0 = __p0; \
   poly64x2_t __s1 = __p1; \
-  __ret = __builtin_bit_cast(poly64x2_t, __builtin_neon_vsetq_lane_i64(__s0, __s1, __p2)); \
+  __ret = __builtin_bit_cast(poly64x2_t, __builtin_neon_vsetq_lane_i64(__s0, __builtin_bit_cast(int64x2_t, __s1), __p2)); \
   __ret; \
 })
 #else
@@ -64362,7 +64498,7 @@ __ai __attribute__((target("neon"))) int8x16_t vrsubhn_high_s16(int8x8_t __p0, i
   poly64_t __s0 = __p0; \
   poly64x2_t __s1 = __p1; \
   poly64x2_t __rev1;  __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_64); \
-  __ret = __builtin_bit_cast(poly64x2_t, __builtin_neon_vsetq_lane_i64(__s0, __rev1, __p2)); \
+  __ret = __builtin_bit_cast(poly64x2_t, __builtin_neon_vsetq_lane_i64(__s0, __builtin_bit_cast(int64x2_t, __rev1), __p2)); \
   __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \
   __ret; \
 })
@@ -64370,7 +64506,7 @@ __ai __attribute__((target("neon"))) int8x16_t vrsubhn_high_s16(int8x8_t __p0, i
   poly64x2_t __ret; \
   poly64_t __s0 = __p0; \
   poly64x2_t __s1 = __p1; \
-  __ret = __builtin_bit_cast(poly64x2_t, __builtin_neon_vsetq_lane_i64(__s0, __s1, __p2)); \
+  __ret = __builtin_bit_cast(poly64x2_t, __builtin_neon_vsetq_lane_i64(__s0, __builtin_bit_cast(int64x2_t, __s1), __p2)); \
   __ret; \
 })
 #endif
diff --git a/lib/include/arm_sme.h b/lib/include/arm_sme.h
index 6da5ca0b51..0983e4a58d 100644
--- a/lib/include/arm_sme.h
+++ b/lib/include/arm_sme.h
@@ -796,8 +796,6 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za16_mf8_vg1x2
 void svdot_za16_mf8_vg1x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8x2_t, fpm_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za16_mf8_vg1x4_fpm)))
 void svdot_za16_mf8_vg1x4_fpm(uint32_t, svmfloat8x4_t, svmfloat8x4_t, fpm_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za16_mf8_vg2x1_fpm)))
-void svmla_single_za16_mf8_vg2x1_fpm(uint32_t, svmfloat8_t, svmfloat8_t, fpm_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za16_mf8_vg2x2_fpm)))
 void svmla_single_za16_mf8_vg2x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8_t, fpm_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za16_mf8_vg2x4_fpm)))
@@ -808,6 +806,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za16_mf8_
 void svmla_lane_za16_mf8_vg2x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8_t, uint64_t, fpm_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za16_mf8_vg2x4_fpm)))
 void svmla_lane_za16_mf8_vg2x4_fpm(uint32_t, svmfloat8x4_t, svmfloat8_t, uint64_t, fpm_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za16_mf8_vg2x1_fpm)))
+void svmla_za16_mf8_vg2x1_fpm(uint32_t, svmfloat8_t, svmfloat8_t, fpm_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za16_mf8_vg2x2_fpm)))
 void svmla_za16_mf8_vg2x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8x2_t, fpm_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za16_mf8_vg2x4_fpm)))
@@ -828,8 +828,6 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za16_mf8_vg1x
 void svdot_za16_vg1x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8x2_t, fpm_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za16_mf8_vg1x4_fpm)))
 void svdot_za16_vg1x4_fpm(uint32_t, svmfloat8x4_t, svmfloat8x4_t, fpm_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za16_mf8_vg2x1_fpm)))
-void svmla_za16_vg2x1_fpm(uint32_t, svmfloat8_t, svmfloat8_t, fpm_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za16_mf8_vg2x2_fpm)))
 void svmla_za16_vg2x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8_t, fpm_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za16_mf8_vg2x4_fpm)))
@@ -840,6 +838,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za16_mf8
 void svmla_lane_za16_vg2x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8_t, uint64_t, fpm_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za16_mf8_vg2x4_fpm)))
 void svmla_lane_za16_vg2x4_fpm(uint32_t, svmfloat8x4_t, svmfloat8_t, uint64_t, fpm_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za16_mf8_vg2x1_fpm)))
+void svmla_za16_vg2x1_fpm(uint32_t, svmfloat8_t, svmfloat8_t, fpm_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za16_mf8_vg2x2_fpm)))
 void svmla_za16_vg2x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8x2_t, fpm_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za16_mf8_vg2x4_fpm)))
@@ -860,8 +860,6 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_mf8_vg1x2
 void svdot_za32_mf8_vg1x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8x2_t, fpm_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_mf8_vg1x4_fpm)))
 void svdot_za32_mf8_vg1x4_fpm(uint32_t, svmfloat8x4_t, svmfloat8x4_t, fpm_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_mf8_vg4x1_fpm)))
-void svmla_single_za32_mf8_vg4x1_fpm(uint32_t, svmfloat8_t, svmfloat8_t, fpm_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_mf8_vg4x2_fpm)))
 void svmla_single_za32_mf8_vg4x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8_t, fpm_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_mf8_vg4x4_fpm)))
@@ -872,6 +870,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_mf8_
 void svmla_lane_za32_mf8_vg4x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8_t, uint64_t, fpm_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_mf8_vg4x4_fpm)))
 void svmla_lane_za32_mf8_vg4x4_fpm(uint32_t, svmfloat8x4_t, svmfloat8_t, uint64_t, fpm_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_mf8_vg4x1_fpm)))
+void svmla_za32_mf8_vg4x1_fpm(uint32_t, svmfloat8_t, svmfloat8_t, fpm_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_mf8_vg4x2_fpm)))
 void svmla_za32_mf8_vg4x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8x2_t, fpm_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_mf8_vg4x4_fpm)))
@@ -894,8 +894,6 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_mf8_vg1x
 void svdot_za32_vg1x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8x2_t, fpm_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_mf8_vg1x4_fpm)))
 void svdot_za32_vg1x4_fpm(uint32_t, svmfloat8x4_t, svmfloat8x4_t, fpm_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_mf8_vg4x1_fpm)))
-void svmla_za32_vg4x1_fpm(uint32_t, svmfloat8_t, svmfloat8_t, fpm_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_mf8_vg4x2_fpm)))
 void svmla_za32_vg4x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8_t, fpm_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_mf8_vg4x4_fpm)))
@@ -906,6 +904,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_mf8
 void svmla_lane_za32_vg4x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8_t, uint64_t, fpm_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_mf8_vg4x4_fpm)))
 void svmla_lane_za32_vg4x4_fpm(uint32_t, svmfloat8x4_t, svmfloat8_t, uint64_t, fpm_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_mf8_vg4x1_fpm)))
+void svmla_za32_vg4x1_fpm(uint32_t, svmfloat8_t, svmfloat8_t, fpm_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_mf8_vg4x2_fpm)))
 void svmla_za32_vg4x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8x2_t, fpm_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_mf8_vg4x4_fpm)))
diff --git a/lib/include/arm_sve.h b/lib/include/arm_sve.h
index 6a036be08c..d56bd34530 100644
--- a/lib/include/arm_sve.h
+++ b/lib/include/arm_sve.h
@@ -4617,6 +4617,86 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_bf16_x2)))
 svbfloat16x2_t svminnm(svbfloat16x2_t, svbfloat16x2_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_bf16_x4)))
 svbfloat16x4_t svminnm(svbfloat16x4_t, svbfloat16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_single_bf16_x2)))
+svbfloat16x2_t svmul_single_bf16_x2(svbfloat16x2_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_single_bf16_x4)))
+svbfloat16x4_t svmul_single_bf16_x4(svbfloat16x4_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_x2)))
+svbfloat16x2_t svmul_bf16_x2(svbfloat16x2_t, svbfloat16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_x4)))
+svbfloat16x4_t svmul_bf16_x4(svbfloat16x4_t, svbfloat16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_single_bf16_x2)))
+svbfloat16x2_t svscale_single_bf16_x2(svbfloat16x2_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_single_bf16_x4)))
+svbfloat16x4_t svscale_single_bf16_x4(svbfloat16x4_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_bf16_x2)))
+svbfloat16x2_t svscale_bf16_x2(svbfloat16x2_t, svint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_bf16_x4)))
+svbfloat16x4_t svscale_bf16_x4(svbfloat16x4_t, svint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_single_bf16_x2)))
+svbfloat16x2_t svmul(svbfloat16x2_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_single_bf16_x4)))
+svbfloat16x4_t svmul(svbfloat16x4_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_x2)))
+svbfloat16x2_t svmul(svbfloat16x2_t, svbfloat16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_x4)))
+svbfloat16x4_t svmul(svbfloat16x4_t, svbfloat16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_single_bf16_x2)))
+svbfloat16x2_t svscale(svbfloat16x2_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_single_bf16_x4)))
+svbfloat16x4_t svscale(svbfloat16x4_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_bf16_x2)))
+svbfloat16x2_t svscale(svbfloat16x2_t, svint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_bf16_x4)))
+svbfloat16x4_t svscale(svbfloat16x4_t, svint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_single_f64_x2)))
+svfloat64x2_t svmul_single_f64_x2(svfloat64x2_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_single_f32_x2)))
+svfloat32x2_t svmul_single_f32_x2(svfloat32x2_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_single_f16_x2)))
+svfloat16x2_t svmul_single_f16_x2(svfloat16x2_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_single_f64_x4)))
+svfloat64x4_t svmul_single_f64_x4(svfloat64x4_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_single_f32_x4)))
+svfloat32x4_t svmul_single_f32_x4(svfloat32x4_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_single_f16_x4)))
+svfloat16x4_t svmul_single_f16_x4(svfloat16x4_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f64_x2)))
+svfloat64x2_t svmul_f64_x2(svfloat64x2_t, svfloat64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f32_x2)))
+svfloat32x2_t svmul_f32_x2(svfloat32x2_t, svfloat32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f16_x2)))
+svfloat16x2_t svmul_f16_x2(svfloat16x2_t, svfloat16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f64_x4)))
+svfloat64x4_t svmul_f64_x4(svfloat64x4_t, svfloat64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f32_x4)))
+svfloat32x4_t svmul_f32_x4(svfloat32x4_t, svfloat32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f16_x4)))
+svfloat16x4_t svmul_f16_x4(svfloat16x4_t, svfloat16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_single_f64_x2)))
+svfloat64x2_t svmul(svfloat64x2_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_single_f32_x2)))
+svfloat32x2_t svmul(svfloat32x2_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_single_f16_x2)))
+svfloat16x2_t svmul(svfloat16x2_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_single_f64_x4)))
+svfloat64x4_t svmul(svfloat64x4_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_single_f32_x4)))
+svfloat32x4_t svmul(svfloat32x4_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_single_f16_x4)))
+svfloat16x4_t svmul(svfloat16x4_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f64_x2)))
+svfloat64x2_t svmul(svfloat64x2_t, svfloat64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f32_x2)))
+svfloat32x2_t svmul(svfloat32x2_t, svfloat32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f16_x2)))
+svfloat16x2_t svmul(svfloat16x2_t, svfloat16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f64_x4)))
+svfloat64x4_t svmul(svfloat64x4_t, svfloat64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f32_x4)))
+svfloat32x4_t svmul(svfloat32x4_t, svfloat32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_f16_x4)))
+svfloat16x4_t svmul(svfloat16x4_t, svfloat16x4_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadda_f64)))
 float64_t svadda_f64(svbool_t, float64_t, svfloat64_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadda_f32)))
@@ -4655,18 +4735,6 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrw_u32base_s32in
 svuint32_t svadrw_u32base_s32index(svuint32_t, svint32_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrw_u64base_s64index)))
 svuint64_t svadrw_u64base_s64index(svuint64_t, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_u32)))
-svuint32_t svcompact_u32(svbool_t, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_u64)))
-svuint64_t svcompact_u64(svbool_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_f64)))
-svfloat64_t svcompact_f64(svbool_t, svfloat64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_f32)))
-svfloat32_t svcompact_f32(svbool_t, svfloat32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_s32)))
-svint32_t svcompact_s32(svbool_t, svint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_s64)))
-svint64_t svcompact_s64(svbool_t, svint64_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32base_index_u32)))
 svuint32_t svld1_gather_u32base_index_u32(svbool_t, svuint32_t, int64_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64base_index_u64)))
@@ -5923,18 +5991,6 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrw_u32base_s32i
 svuint32_t svadrw_index(svuint32_t, svint32_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrw_u64base_s64index)))
 svuint64_t svadrw_index(svuint64_t, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_u32)))
-svuint32_t svcompact(svbool_t, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_u64)))
-svuint64_t svcompact(svbool_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_f64)))
-svfloat64_t svcompact(svbool_t, svfloat64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_f32)))
-svfloat32_t svcompact(svbool_t, svfloat32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_s32)))
-svint32_t svcompact(svbool_t, svint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_s64)))
-svint64_t svcompact(svbool_t, svint64_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32base_index_u32)))
 svuint32_t svld1_gather_index_u32(svbool_t, svuint32_t, int64_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64base_index_u64)))
@@ -8777,3764 +8833,222 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_mf8)))
 svmfloat8_t svzipq2(svmfloat8_t, svmfloat8_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_s16)))
 svint16_t svzipq2(svint16_t, svint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmmla_f32)))
-svfloat32_t svbfmmla_f32(svfloat32_t, svbfloat16_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmmla_f32)))
-svfloat32_t svbfmmla(svfloat32_t, svbfloat16_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfdot_n_f32)))
-svfloat32_t svbfdot_n_f32(svfloat32_t, svbfloat16_t, bfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfdot_f32)))
-svfloat32_t svbfdot_f32(svfloat32_t, svbfloat16_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfdot_lane_f32)))
-svfloat32_t svbfdot_lane_f32(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalb_n_f32)))
-svfloat32_t svbfmlalb_n_f32(svfloat32_t, svbfloat16_t, bfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalb_f32)))
-svfloat32_t svbfmlalb_f32(svfloat32_t, svbfloat16_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalb_lane_f32)))
-svfloat32_t svbfmlalb_lane_f32(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalt_n_f32)))
-svfloat32_t svbfmlalt_n_f32(svfloat32_t, svbfloat16_t, bfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalt_f32)))
-svfloat32_t svbfmlalt_f32(svfloat32_t, svbfloat16_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalt_lane_f32)))
-svfloat32_t svbfmlalt_lane_f32(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_bf16_f32_m)))
-svbfloat16_t svcvt_bf16_f32_m(svbfloat16_t, svbool_t, svfloat32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_bf16_f32_x)))
-svbfloat16_t svcvt_bf16_f32_x(svbool_t, svfloat32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_bf16_f32_z)))
-svbfloat16_t svcvt_bf16_f32_z(svbool_t, svfloat32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnt_bf16_f32_m)))
-svbfloat16_t svcvtnt_bf16_f32_m(svbfloat16_t, svbool_t, svfloat32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfdot_n_f32)))
-svfloat32_t svbfdot(svfloat32_t, svbfloat16_t, bfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfdot_f32)))
-svfloat32_t svbfdot(svfloat32_t, svbfloat16_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfdot_lane_f32)))
-svfloat32_t svbfdot_lane(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalb_n_f32)))
-svfloat32_t svbfmlalb(svfloat32_t, svbfloat16_t, bfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalb_f32)))
-svfloat32_t svbfmlalb(svfloat32_t, svbfloat16_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalb_lane_f32)))
-svfloat32_t svbfmlalb_lane(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalt_n_f32)))
-svfloat32_t svbfmlalt(svfloat32_t, svbfloat16_t, bfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalt_f32)))
-svfloat32_t svbfmlalt(svfloat32_t, svbfloat16_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalt_lane_f32)))
-svfloat32_t svbfmlalt_lane(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_bf16_f32_m)))
-svbfloat16_t svcvt_bf16_m(svbfloat16_t, svbool_t, svfloat32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_bf16_f32_x)))
-svbfloat16_t svcvt_bf16_x(svbool_t, svfloat32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_bf16_f32_z)))
-svbfloat16_t svcvt_bf16_z(svbool_t, svfloat32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnt_bf16_f32_m)))
-svbfloat16_t svcvtnt_bf16_m(svbfloat16_t, svbool_t, svfloat32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_f32)))
-svfloat32_t svmmla_f32(svfloat32_t, svfloat32_t, svfloat32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_f32)))
-svfloat32_t svmmla(svfloat32_t, svfloat32_t, svfloat32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u8)))
-svuint8_t svld1ro_u8(svbool_t, uint8_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u32)))
-svuint32_t svld1ro_u32(svbool_t, uint32_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u64)))
-svuint64_t svld1ro_u64(svbool_t, uint64_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u16)))
-svuint16_t svld1ro_u16(svbool_t, uint16_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_bf16)))
-svbfloat16_t svld1ro_bf16(svbool_t, bfloat16_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s8)))
-svint8_t svld1ro_s8(svbool_t, int8_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_f64)))
-svfloat64_t svld1ro_f64(svbool_t, float64_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_f32)))
-svfloat32_t svld1ro_f32(svbool_t, float32_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_f16)))
-svfloat16_t svld1ro_f16(svbool_t, float16_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s32)))
-svint32_t svld1ro_s32(svbool_t, int32_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s64)))
-svint64_t svld1ro_s64(svbool_t, int64_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_mf8)))
-svmfloat8_t svld1ro_mf8(svbool_t, mfloat8_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s16)))
-svint16_t svld1ro_s16(svbool_t, int16_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_f64)))
-svfloat64_t svmmla_f64(svfloat64_t, svfloat64_t, svfloat64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u8)))
-svuint8_t svtrn1q_u8(svuint8_t, svuint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u32)))
-svuint32_t svtrn1q_u32(svuint32_t, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u64)))
-svuint64_t svtrn1q_u64(svuint64_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u16)))
-svuint16_t svtrn1q_u16(svuint16_t, svuint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_bf16)))
-svbfloat16_t svtrn1q_bf16(svbfloat16_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s8)))
-svint8_t svtrn1q_s8(svint8_t, svint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_f64)))
-svfloat64_t svtrn1q_f64(svfloat64_t, svfloat64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_f32)))
-svfloat32_t svtrn1q_f32(svfloat32_t, svfloat32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_f16)))
-svfloat16_t svtrn1q_f16(svfloat16_t, svfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s32)))
-svint32_t svtrn1q_s32(svint32_t, svint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s64)))
-svint64_t svtrn1q_s64(svint64_t, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s16)))
-svint16_t svtrn1q_s16(svint16_t, svint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u8)))
-svuint8_t svtrn2q_u8(svuint8_t, svuint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u32)))
-svuint32_t svtrn2q_u32(svuint32_t, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u64)))
-svuint64_t svtrn2q_u64(svuint64_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u16)))
-svuint16_t svtrn2q_u16(svuint16_t, svuint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_bf16)))
-svbfloat16_t svtrn2q_bf16(svbfloat16_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s8)))
-svint8_t svtrn2q_s8(svint8_t, svint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_f64)))
-svfloat64_t svtrn2q_f64(svfloat64_t, svfloat64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_f32)))
-svfloat32_t svtrn2q_f32(svfloat32_t, svfloat32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_f16)))
-svfloat16_t svtrn2q_f16(svfloat16_t, svfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s32)))
-svint32_t svtrn2q_s32(svint32_t, svint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s64)))
-svint64_t svtrn2q_s64(svint64_t, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s16)))
-svint16_t svtrn2q_s16(svint16_t, svint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u8)))
-svuint8_t svuzp1q_u8(svuint8_t, svuint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u32)))
-svuint32_t svuzp1q_u32(svuint32_t, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u64)))
-svuint64_t svuzp1q_u64(svuint64_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u16)))
-svuint16_t svuzp1q_u16(svuint16_t, svuint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_bf16)))
-svbfloat16_t svuzp1q_bf16(svbfloat16_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s8)))
-svint8_t svuzp1q_s8(svint8_t, svint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_f64)))
-svfloat64_t svuzp1q_f64(svfloat64_t, svfloat64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_f32)))
-svfloat32_t svuzp1q_f32(svfloat32_t, svfloat32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_f16)))
-svfloat16_t svuzp1q_f16(svfloat16_t, svfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s32)))
-svint32_t svuzp1q_s32(svint32_t, svint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s64)))
-svint64_t svuzp1q_s64(svint64_t, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s16)))
-svint16_t svuzp1q_s16(svint16_t, svint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u8)))
-svuint8_t svuzp2q_u8(svuint8_t, svuint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u32)))
-svuint32_t svuzp2q_u32(svuint32_t, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u64)))
-svuint64_t svuzp2q_u64(svuint64_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u16)))
-svuint16_t svuzp2q_u16(svuint16_t, svuint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_bf16)))
-svbfloat16_t svuzp2q_bf16(svbfloat16_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s8)))
-svint8_t svuzp2q_s8(svint8_t, svint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_f64)))
-svfloat64_t svuzp2q_f64(svfloat64_t, svfloat64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_f32)))
-svfloat32_t svuzp2q_f32(svfloat32_t, svfloat32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_f16)))
-svfloat16_t svuzp2q_f16(svfloat16_t, svfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s32)))
-svint32_t svuzp2q_s32(svint32_t, svint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s64)))
-svint64_t svuzp2q_s64(svint64_t, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s16)))
-svint16_t svuzp2q_s16(svint16_t, svint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u8)))
-svuint8_t svzip1q_u8(svuint8_t, svuint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u32)))
-svuint32_t svzip1q_u32(svuint32_t, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u64)))
-svuint64_t svzip1q_u64(svuint64_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u16)))
-svuint16_t svzip1q_u16(svuint16_t, svuint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_bf16)))
-svbfloat16_t svzip1q_bf16(svbfloat16_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s8)))
-svint8_t svzip1q_s8(svint8_t, svint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_f64)))
-svfloat64_t svzip1q_f64(svfloat64_t, svfloat64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_f32)))
-svfloat32_t svzip1q_f32(svfloat32_t, svfloat32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_f16)))
-svfloat16_t svzip1q_f16(svfloat16_t, svfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s32)))
-svint32_t svzip1q_s32(svint32_t, svint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s64)))
-svint64_t svzip1q_s64(svint64_t, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s16)))
-svint16_t svzip1q_s16(svint16_t, svint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u8)))
-svuint8_t svzip2q_u8(svuint8_t, svuint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u32)))
-svuint32_t svzip2q_u32(svuint32_t, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u64)))
-svuint64_t svzip2q_u64(svuint64_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u16)))
-svuint16_t svzip2q_u16(svuint16_t, svuint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_bf16)))
-svbfloat16_t svzip2q_bf16(svbfloat16_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s8)))
-svint8_t svzip2q_s8(svint8_t, svint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_f64)))
-svfloat64_t svzip2q_f64(svfloat64_t, svfloat64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_f32)))
-svfloat32_t svzip2q_f32(svfloat32_t, svfloat32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_f16)))
-svfloat16_t svzip2q_f16(svfloat16_t, svfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s32)))
-svint32_t svzip2q_s32(svint32_t, svint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s64)))
-svint64_t svzip2q_s64(svint64_t, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s16)))
-svint16_t svzip2q_s16(svint16_t, svint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u8)))
-svuint8_t svld1ro(svbool_t, uint8_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u32)))
-svuint32_t svld1ro(svbool_t, uint32_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u64)))
-svuint64_t svld1ro(svbool_t, uint64_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u16)))
-svuint16_t svld1ro(svbool_t, uint16_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_bf16)))
-svbfloat16_t svld1ro(svbool_t, bfloat16_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s8)))
-svint8_t svld1ro(svbool_t, int8_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_f64)))
-svfloat64_t svld1ro(svbool_t, float64_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_f32)))
-svfloat32_t svld1ro(svbool_t, float32_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_f16)))
-svfloat16_t svld1ro(svbool_t, float16_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s32)))
-svint32_t svld1ro(svbool_t, int32_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s64)))
-svint64_t svld1ro(svbool_t, int64_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_mf8)))
-svmfloat8_t svld1ro(svbool_t, mfloat8_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s16)))
-svint16_t svld1ro(svbool_t, int16_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_f64)))
-svfloat64_t svmmla(svfloat64_t, svfloat64_t, svfloat64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u8)))
-svuint8_t svtrn1q(svuint8_t, svuint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u32)))
-svuint32_t svtrn1q(svuint32_t, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u64)))
-svuint64_t svtrn1q(svuint64_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u16)))
-svuint16_t svtrn1q(svuint16_t, svuint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_bf16)))
-svbfloat16_t svtrn1q(svbfloat16_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s8)))
-svint8_t svtrn1q(svint8_t, svint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_f64)))
-svfloat64_t svtrn1q(svfloat64_t, svfloat64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_f32)))
-svfloat32_t svtrn1q(svfloat32_t, svfloat32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_f16)))
-svfloat16_t svtrn1q(svfloat16_t, svfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s32)))
-svint32_t svtrn1q(svint32_t, svint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s64)))
-svint64_t svtrn1q(svint64_t, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s16)))
-svint16_t svtrn1q(svint16_t, svint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u8)))
-svuint8_t svtrn2q(svuint8_t, svuint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u32)))
-svuint32_t svtrn2q(svuint32_t, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u64)))
-svuint64_t svtrn2q(svuint64_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u16)))
-svuint16_t svtrn2q(svuint16_t, svuint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_bf16)))
-svbfloat16_t svtrn2q(svbfloat16_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s8)))
-svint8_t svtrn2q(svint8_t, svint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_f64)))
-svfloat64_t svtrn2q(svfloat64_t, svfloat64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_f32)))
-svfloat32_t svtrn2q(svfloat32_t, svfloat32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_f16)))
-svfloat16_t svtrn2q(svfloat16_t, svfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s32)))
-svint32_t svtrn2q(svint32_t, svint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s64)))
-svint64_t svtrn2q(svint64_t, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s16)))
-svint16_t svtrn2q(svint16_t, svint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u8)))
-svuint8_t svuzp1q(svuint8_t, svuint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u32)))
-svuint32_t svuzp1q(svuint32_t, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u64)))
-svuint64_t svuzp1q(svuint64_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u16)))
-svuint16_t svuzp1q(svuint16_t, svuint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_bf16)))
-svbfloat16_t svuzp1q(svbfloat16_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s8)))
-svint8_t svuzp1q(svint8_t, svint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_f64)))
-svfloat64_t svuzp1q(svfloat64_t, svfloat64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_f32)))
-svfloat32_t svuzp1q(svfloat32_t, svfloat32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_f16)))
-svfloat16_t svuzp1q(svfloat16_t, svfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s32)))
-svint32_t svuzp1q(svint32_t, svint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s64)))
-svint64_t svuzp1q(svint64_t, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s16)))
-svint16_t svuzp1q(svint16_t, svint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u8)))
-svuint8_t svuzp2q(svuint8_t, svuint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u32)))
-svuint32_t svuzp2q(svuint32_t, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u64)))
-svuint64_t svuzp2q(svuint64_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u16)))
-svuint16_t svuzp2q(svuint16_t, svuint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_bf16)))
-svbfloat16_t svuzp2q(svbfloat16_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s8)))
-svint8_t svuzp2q(svint8_t, svint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_f64)))
-svfloat64_t svuzp2q(svfloat64_t, svfloat64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_f32)))
-svfloat32_t svuzp2q(svfloat32_t, svfloat32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_f16)))
-svfloat16_t svuzp2q(svfloat16_t, svfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s32)))
-svint32_t svuzp2q(svint32_t, svint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s64)))
-svint64_t svuzp2q(svint64_t, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s16)))
-svint16_t svuzp2q(svint16_t, svint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u8)))
-svuint8_t svzip1q(svuint8_t, svuint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u32)))
-svuint32_t svzip1q(svuint32_t, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u64)))
-svuint64_t svzip1q(svuint64_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u16)))
-svuint16_t svzip1q(svuint16_t, svuint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_bf16)))
-svbfloat16_t svzip1q(svbfloat16_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s8)))
-svint8_t svzip1q(svint8_t, svint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_f64)))
-svfloat64_t svzip1q(svfloat64_t, svfloat64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_f32)))
-svfloat32_t svzip1q(svfloat32_t, svfloat32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_f16)))
-svfloat16_t svzip1q(svfloat16_t, svfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s32)))
-svint32_t svzip1q(svint32_t, svint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s64)))
-svint64_t svzip1q(svint64_t, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s16)))
-svint16_t svzip1q(svint16_t, svint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u8)))
-svuint8_t svzip2q(svuint8_t, svuint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u32)))
-svuint32_t svzip2q(svuint32_t, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u64)))
-svuint64_t svzip2q(svuint64_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u16)))
-svuint16_t svzip2q(svuint16_t, svuint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_bf16)))
-svbfloat16_t svzip2q(svbfloat16_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s8)))
-svint8_t svzip2q(svint8_t, svint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_f64)))
-svfloat64_t svzip2q(svfloat64_t, svfloat64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_f32)))
-svfloat32_t svzip2q(svfloat32_t, svfloat32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_f16)))
-svfloat16_t svzip2q(svfloat16_t, svfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s32)))
-svint32_t svzip2q(svint32_t, svint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s64)))
-svint64_t svzip2q(svint64_t, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s16)))
-svint16_t svzip2q(svint16_t, svint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_s32)))
-svint32_t svmmla_s32(svint32_t, svint8_t, svint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_u32)))
-svuint32_t svmmla_u32(svuint32_t, svuint8_t, svuint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusmmla_s32)))
-svint32_t svusmmla_s32(svint32_t, svuint8_t, svint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_s32)))
-svint32_t svmmla(svint32_t, svint8_t, svint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_u32)))
-svuint32_t svmmla(svuint32_t, svuint8_t, svuint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusmmla_s32)))
-svint32_t svusmmla(svint32_t, svuint8_t, svint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsudot_n_s32)))
-svint32_t svsudot_n_s32(svint32_t, svint8_t, uint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsudot_s32)))
-svint32_t svsudot_s32(svint32_t, svint8_t, svuint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsudot_lane_s32)))
-svint32_t svsudot_lane_s32(svint32_t, svint8_t, svuint8_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusdot_n_s32)))
-svint32_t svusdot_n_s32(svint32_t, svuint8_t, int8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusdot_s32)))
-svint32_t svusdot_s32(svint32_t, svuint8_t, svint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusdot_lane_s32)))
-svint32_t svusdot_lane_s32(svint32_t, svuint8_t, svint8_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsudot_n_s32)))
-svint32_t svsudot(svint32_t, svint8_t, uint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsudot_s32)))
-svint32_t svsudot(svint32_t, svint8_t, svuint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsudot_lane_s32)))
-svint32_t svsudot_lane(svint32_t, svint8_t, svuint8_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusdot_n_s32)))
-svint32_t svusdot(svint32_t, svuint8_t, int8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusdot_s32)))
-svint32_t svusdot(svint32_t, svuint8_t, svint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusdot_lane_s32)))
-svint32_t svusdot_lane(svint32_t, svuint8_t, svint8_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrax1_u64)))
-svuint64_t svrax1_u64(svuint64_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrax1_s64)))
-svint64_t svrax1_s64(svint64_t, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrax1_u64)))
-svuint64_t svrax1(svuint64_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrax1_s64)))
-svint64_t svrax1(svint64_t, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_u32_z)))
-svuint32_t svhistcnt_u32_z(svbool_t, svuint32_t, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_u64_z)))
-svuint64_t svhistcnt_u64_z(svbool_t, svuint64_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_s32_z)))
-svuint32_t svhistcnt_s32_z(svbool_t, svint32_t, svint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_s64_z)))
-svuint64_t svhistcnt_s64_z(svbool_t, svint64_t, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistseg_u8)))
-svuint8_t svhistseg_u8(svuint8_t, svuint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistseg_s8)))
-svuint8_t svhistseg_s8(svint8_t, svint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_index_u32)))
-svuint32_t svldnt1_gather_u32base_index_u32(svbool_t, svuint32_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_index_u64)))
-svuint64_t svldnt1_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_index_f64)))
-svfloat64_t svldnt1_gather_u64base_index_f64(svbool_t, svuint64_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_index_f32)))
-svfloat32_t svldnt1_gather_u32base_index_f32(svbool_t, svuint32_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_index_s32)))
-svint32_t svldnt1_gather_u32base_index_s32(svbool_t, svuint32_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_index_s64)))
-svint64_t svldnt1_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_offset_u32)))
-svuint32_t svldnt1_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_offset_u64)))
-svuint64_t svldnt1_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_offset_f64)))
-svfloat64_t svldnt1_gather_u64base_offset_f64(svbool_t, svuint64_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_offset_f32)))
-svfloat32_t svldnt1_gather_u32base_offset_f32(svbool_t, svuint32_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_offset_s32)))
-svint32_t svldnt1_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_offset_s64)))
-svint64_t svldnt1_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_u32)))
-svuint32_t svldnt1_gather_u32base_u32(svbool_t, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_u64)))
-svuint64_t svldnt1_gather_u64base_u64(svbool_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_f64)))
-svfloat64_t svldnt1_gather_u64base_f64(svbool_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_f32)))
-svfloat32_t svldnt1_gather_u32base_f32(svbool_t, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_s32)))
-svint32_t svldnt1_gather_u32base_s32(svbool_t, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_s64)))
-svint64_t svldnt1_gather_u64base_s64(svbool_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64index_u64)))
-svuint64_t svldnt1_gather_s64index_u64(svbool_t, uint64_t const *, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64index_f64)))
-svfloat64_t svldnt1_gather_s64index_f64(svbool_t, float64_t const *, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64index_s64)))
-svint64_t svldnt1_gather_s64index_s64(svbool_t, int64_t const *, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64index_u64)))
-svuint64_t svldnt1_gather_u64index_u64(svbool_t, uint64_t const *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64index_f64)))
-svfloat64_t svldnt1_gather_u64index_f64(svbool_t, float64_t const *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64index_s64)))
-svint64_t svldnt1_gather_u64index_s64(svbool_t, int64_t const *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32offset_u32)))
-svuint32_t svldnt1_gather_u32offset_u32(svbool_t, uint32_t const *, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32offset_f32)))
-svfloat32_t svldnt1_gather_u32offset_f32(svbool_t, float32_t const *, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32offset_s32)))
-svint32_t svldnt1_gather_u32offset_s32(svbool_t, int32_t const *, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64offset_u64)))
-svuint64_t svldnt1_gather_s64offset_u64(svbool_t, uint64_t const *, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64offset_f64)))
-svfloat64_t svldnt1_gather_s64offset_f64(svbool_t, float64_t const *, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64offset_s64)))
-svint64_t svldnt1_gather_s64offset_s64(svbool_t, int64_t const *, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64offset_u64)))
-svuint64_t svldnt1_gather_u64offset_u64(svbool_t, uint64_t const *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64offset_f64)))
-svfloat64_t svldnt1_gather_u64offset_f64(svbool_t, float64_t const *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64offset_s64)))
-svint64_t svldnt1_gather_u64offset_s64(svbool_t, int64_t const *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32base_offset_u32)))
-svuint32_t svldnt1sb_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64base_offset_u64)))
-svuint64_t svldnt1sb_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32base_offset_s32)))
-svint32_t svldnt1sb_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64base_offset_s64)))
-svint64_t svldnt1sb_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32base_u32)))
-svuint32_t svldnt1sb_gather_u32base_u32(svbool_t, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64base_u64)))
-svuint64_t svldnt1sb_gather_u64base_u64(svbool_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32base_s32)))
-svint32_t svldnt1sb_gather_u32base_s32(svbool_t, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64base_s64)))
-svint64_t svldnt1sb_gather_u64base_s64(svbool_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32offset_u32)))
-svuint32_t svldnt1sb_gather_u32offset_u32(svbool_t, int8_t const *, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32offset_s32)))
-svint32_t svldnt1sb_gather_u32offset_s32(svbool_t, int8_t const *, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_s64offset_u64)))
-svuint64_t svldnt1sb_gather_s64offset_u64(svbool_t, int8_t const *, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_s64offset_s64)))
-svint64_t svldnt1sb_gather_s64offset_s64(svbool_t, int8_t const *, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64offset_u64)))
-svuint64_t svldnt1sb_gather_u64offset_u64(svbool_t, int8_t const *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64offset_s64)))
-svint64_t svldnt1sb_gather_u64offset_s64(svbool_t, int8_t const *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_index_u32)))
-svuint32_t svldnt1sh_gather_u32base_index_u32(svbool_t, svuint32_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_index_u64)))
-svuint64_t svldnt1sh_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_index_s32)))
-svint32_t svldnt1sh_gather_u32base_index_s32(svbool_t, svuint32_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_index_s64)))
-svint64_t svldnt1sh_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_offset_u32)))
-svuint32_t svldnt1sh_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_offset_u64)))
-svuint64_t svldnt1sh_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_offset_s32)))
-svint32_t svldnt1sh_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_offset_s64)))
-svint64_t svldnt1sh_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_u32)))
-svuint32_t svldnt1sh_gather_u32base_u32(svbool_t, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_u64)))
-svuint64_t svldnt1sh_gather_u64base_u64(svbool_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_s32)))
-svint32_t svldnt1sh_gather_u32base_s32(svbool_t, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_s64)))
-svint64_t svldnt1sh_gather_u64base_s64(svbool_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_s64index_u64)))
-svuint64_t svldnt1sh_gather_s64index_u64(svbool_t, int16_t const *, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_s64index_s64)))
-svint64_t svldnt1sh_gather_s64index_s64(svbool_t, int16_t const *, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64index_u64)))
-svuint64_t svldnt1sh_gather_u64index_u64(svbool_t, int16_t const *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64index_s64)))
-svint64_t svldnt1sh_gather_u64index_s64(svbool_t, int16_t const *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32offset_u32)))
-svuint32_t svldnt1sh_gather_u32offset_u32(svbool_t, int16_t const *, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32offset_s32)))
-svint32_t svldnt1sh_gather_u32offset_s32(svbool_t, int16_t const *, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_s64offset_u64)))
-svuint64_t svldnt1sh_gather_s64offset_u64(svbool_t, int16_t const *, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_s64offset_s64)))
-svint64_t svldnt1sh_gather_s64offset_s64(svbool_t, int16_t const *, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64offset_u64)))
-svuint64_t svldnt1sh_gather_u64offset_u64(svbool_t, int16_t const *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64offset_s64)))
-svint64_t svldnt1sh_gather_u64offset_s64(svbool_t, int16_t const *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_index_u64)))
-svuint64_t svldnt1sw_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_index_s64)))
-svint64_t svldnt1sw_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_offset_u64)))
-svuint64_t svldnt1sw_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_offset_s64)))
-svint64_t svldnt1sw_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_u64)))
-svuint64_t svldnt1sw_gather_u64base_u64(svbool_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_s64)))
-svint64_t svldnt1sw_gather_u64base_s64(svbool_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_s64index_u64)))
-svuint64_t svldnt1sw_gather_s64index_u64(svbool_t, int32_t const *, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_s64index_s64)))
-svint64_t svldnt1sw_gather_s64index_s64(svbool_t, int32_t const *, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64index_u64)))
-svuint64_t svldnt1sw_gather_u64index_u64(svbool_t, int32_t const *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64index_s64)))
-svint64_t svldnt1sw_gather_u64index_s64(svbool_t, int32_t const *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_s64offset_u64)))
-svuint64_t svldnt1sw_gather_s64offset_u64(svbool_t, int32_t const *, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_s64offset_s64)))
-svint64_t svldnt1sw_gather_s64offset_s64(svbool_t, int32_t const *, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64offset_u64)))
-svuint64_t svldnt1sw_gather_u64offset_u64(svbool_t, int32_t const *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64offset_s64)))
-svint64_t svldnt1sw_gather_u64offset_s64(svbool_t, int32_t const *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32base_offset_u32)))
-svuint32_t svldnt1ub_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64base_offset_u64)))
-svuint64_t svldnt1ub_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32base_offset_s32)))
-svint32_t svldnt1ub_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64base_offset_s64)))
-svint64_t svldnt1ub_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32base_u32)))
-svuint32_t svldnt1ub_gather_u32base_u32(svbool_t, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64base_u64)))
-svuint64_t svldnt1ub_gather_u64base_u64(svbool_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32base_s32)))
-svint32_t svldnt1ub_gather_u32base_s32(svbool_t, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64base_s64)))
-svint64_t svldnt1ub_gather_u64base_s64(svbool_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32offset_u32)))
-svuint32_t svldnt1ub_gather_u32offset_u32(svbool_t, uint8_t const *, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32offset_s32)))
-svint32_t svldnt1ub_gather_u32offset_s32(svbool_t, uint8_t const *, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_s64offset_u64)))
-svuint64_t svldnt1ub_gather_s64offset_u64(svbool_t, uint8_t const *, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_s64offset_s64)))
-svint64_t svldnt1ub_gather_s64offset_s64(svbool_t, uint8_t const *, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64offset_u64)))
-svuint64_t svldnt1ub_gather_u64offset_u64(svbool_t, uint8_t const *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64offset_s64)))
-svint64_t svldnt1ub_gather_u64offset_s64(svbool_t, uint8_t const *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_index_u32)))
-svuint32_t svldnt1uh_gather_u32base_index_u32(svbool_t, svuint32_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_index_u64)))
-svuint64_t svldnt1uh_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_index_s32)))
-svint32_t svldnt1uh_gather_u32base_index_s32(svbool_t, svuint32_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_index_s64)))
-svint64_t svldnt1uh_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_offset_u32)))
-svuint32_t svldnt1uh_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_offset_u64)))
-svuint64_t svldnt1uh_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_offset_s32)))
-svint32_t svldnt1uh_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_offset_s64)))
-svint64_t svldnt1uh_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_u32)))
-svuint32_t svldnt1uh_gather_u32base_u32(svbool_t, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_u64)))
-svuint64_t svldnt1uh_gather_u64base_u64(svbool_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_s32)))
-svint32_t svldnt1uh_gather_u32base_s32(svbool_t, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_s64)))
-svint64_t svldnt1uh_gather_u64base_s64(svbool_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_s64index_u64)))
-svuint64_t svldnt1uh_gather_s64index_u64(svbool_t, uint16_t const *, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_s64index_s64)))
-svint64_t svldnt1uh_gather_s64index_s64(svbool_t, uint16_t const *, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64index_u64)))
-svuint64_t svldnt1uh_gather_u64index_u64(svbool_t, uint16_t const *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64index_s64)))
-svint64_t svldnt1uh_gather_u64index_s64(svbool_t, uint16_t const *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32offset_u32)))
-svuint32_t svldnt1uh_gather_u32offset_u32(svbool_t, uint16_t const *, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32offset_s32)))
-svint32_t svldnt1uh_gather_u32offset_s32(svbool_t, uint16_t const *, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_s64offset_u64)))
-svuint64_t svldnt1uh_gather_s64offset_u64(svbool_t, uint16_t const *, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_s64offset_s64)))
-svint64_t svldnt1uh_gather_s64offset_s64(svbool_t, uint16_t const *, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64offset_u64)))
-svuint64_t svldnt1uh_gather_u64offset_u64(svbool_t, uint16_t const *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64offset_s64)))
-svint64_t svldnt1uh_gather_u64offset_s64(svbool_t, uint16_t const *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_index_u64)))
-svuint64_t svldnt1uw_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_index_s64)))
-svint64_t svldnt1uw_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_offset_u64)))
-svuint64_t svldnt1uw_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_offset_s64)))
-svint64_t svldnt1uw_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_u64)))
-svuint64_t svldnt1uw_gather_u64base_u64(svbool_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_s64)))
-svint64_t svldnt1uw_gather_u64base_s64(svbool_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_s64index_u64)))
-svuint64_t svldnt1uw_gather_s64index_u64(svbool_t, uint32_t const *, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_s64index_s64)))
-svint64_t svldnt1uw_gather_s64index_s64(svbool_t, uint32_t const *, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64index_u64)))
-svuint64_t svldnt1uw_gather_u64index_u64(svbool_t, uint32_t const *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64index_s64)))
-svint64_t svldnt1uw_gather_u64index_s64(svbool_t, uint32_t const *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_s64offset_u64)))
-svuint64_t svldnt1uw_gather_s64offset_u64(svbool_t, uint32_t const *, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_s64offset_s64)))
-svint64_t svldnt1uw_gather_s64offset_s64(svbool_t, uint32_t const *, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64offset_u64)))
-svuint64_t svldnt1uw_gather_u64offset_u64(svbool_t, uint32_t const *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64offset_s64)))
-svint64_t svldnt1uw_gather_u64offset_s64(svbool_t, uint32_t const *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmatch_u8)))
-svbool_t svmatch_u8(svbool_t, svuint8_t, svuint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmatch_u16)))
-svbool_t svmatch_u16(svbool_t, svuint16_t, svuint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmatch_s8)))
-svbool_t svmatch_s8(svbool_t, svint8_t, svint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmatch_s16)))
-svbool_t svmatch_s16(svbool_t, svint16_t, svint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmatch_u8)))
-svbool_t svnmatch_u8(svbool_t, svuint8_t, svuint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmatch_u16)))
-svbool_t svnmatch_u16(svbool_t, svuint16_t, svuint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmatch_s8)))
-svbool_t svnmatch_s8(svbool_t, svint8_t, svint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmatch_s16)))
-svbool_t svnmatch_s16(svbool_t, svint16_t, svint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_index_u32)))
-void svstnt1_scatter_u32base_index_u32(svbool_t, svuint32_t, int64_t, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_index_u64)))
-void svstnt1_scatter_u64base_index_u64(svbool_t, svuint64_t, int64_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_index_f64)))
-void svstnt1_scatter_u64base_index_f64(svbool_t, svuint64_t, int64_t, svfloat64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_index_f32)))
-void svstnt1_scatter_u32base_index_f32(svbool_t, svuint32_t, int64_t, svfloat32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_index_s32)))
-void svstnt1_scatter_u32base_index_s32(svbool_t, svuint32_t, int64_t, svint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_index_s64)))
-void svstnt1_scatter_u64base_index_s64(svbool_t, svuint64_t, int64_t, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_offset_u32)))
-void svstnt1_scatter_u32base_offset_u32(svbool_t, svuint32_t, int64_t, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_offset_u64)))
-void svstnt1_scatter_u64base_offset_u64(svbool_t, svuint64_t, int64_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_offset_f64)))
-void svstnt1_scatter_u64base_offset_f64(svbool_t, svuint64_t, int64_t, svfloat64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_offset_f32)))
-void svstnt1_scatter_u32base_offset_f32(svbool_t, svuint32_t, int64_t, svfloat32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_offset_s32)))
-void svstnt1_scatter_u32base_offset_s32(svbool_t, svuint32_t, int64_t, svint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_offset_s64)))
-void svstnt1_scatter_u64base_offset_s64(svbool_t, svuint64_t, int64_t, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_u32)))
-void svstnt1_scatter_u32base_u32(svbool_t, svuint32_t, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_u64)))
-void svstnt1_scatter_u64base_u64(svbool_t, svuint64_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_f64)))
-void svstnt1_scatter_u64base_f64(svbool_t, svuint64_t, svfloat64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_f32)))
-void svstnt1_scatter_u32base_f32(svbool_t, svuint32_t, svfloat32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_s32)))
-void svstnt1_scatter_u32base_s32(svbool_t, svuint32_t, svint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_s64)))
-void svstnt1_scatter_u64base_s64(svbool_t, svuint64_t, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64index_u64)))
-void svstnt1_scatter_s64index_u64(svbool_t, uint64_t *, svint64_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64index_f64)))
-void svstnt1_scatter_s64index_f64(svbool_t, float64_t *, svint64_t, svfloat64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64index_s64)))
-void svstnt1_scatter_s64index_s64(svbool_t, int64_t *, svint64_t, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64index_u64)))
-void svstnt1_scatter_u64index_u64(svbool_t, uint64_t *, svuint64_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64index_f64)))
-void svstnt1_scatter_u64index_f64(svbool_t, float64_t *, svuint64_t, svfloat64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64index_s64)))
-void svstnt1_scatter_u64index_s64(svbool_t, int64_t *, svuint64_t, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32offset_u32)))
-void svstnt1_scatter_u32offset_u32(svbool_t, uint32_t *, svuint32_t, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32offset_f32)))
-void svstnt1_scatter_u32offset_f32(svbool_t, float32_t *, svuint32_t, svfloat32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32offset_s32)))
-void svstnt1_scatter_u32offset_s32(svbool_t, int32_t *, svuint32_t, svint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64offset_u64)))
-void svstnt1_scatter_s64offset_u64(svbool_t, uint64_t *, svint64_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64offset_f64)))
-void svstnt1_scatter_s64offset_f64(svbool_t, float64_t *, svint64_t, svfloat64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64offset_s64)))
-void svstnt1_scatter_s64offset_s64(svbool_t, int64_t *, svint64_t, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64offset_u64)))
-void svstnt1_scatter_u64offset_u64(svbool_t, uint64_t *, svuint64_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64offset_f64)))
-void svstnt1_scatter_u64offset_f64(svbool_t, float64_t *, svuint64_t, svfloat64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64offset_s64)))
-void svstnt1_scatter_u64offset_s64(svbool_t, int64_t *, svuint64_t, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32base_offset_u32)))
-void svstnt1b_scatter_u32base_offset_u32(svbool_t, svuint32_t, int64_t, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64base_offset_u64)))
-void svstnt1b_scatter_u64base_offset_u64(svbool_t, svuint64_t, int64_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32base_offset_s32)))
-void svstnt1b_scatter_u32base_offset_s32(svbool_t, svuint32_t, int64_t, svint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64base_offset_s64)))
-void svstnt1b_scatter_u64base_offset_s64(svbool_t, svuint64_t, int64_t, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32base_u32)))
-void svstnt1b_scatter_u32base_u32(svbool_t, svuint32_t, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64base_u64)))
-void svstnt1b_scatter_u64base_u64(svbool_t, svuint64_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32base_s32)))
-void svstnt1b_scatter_u32base_s32(svbool_t, svuint32_t, svint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64base_s64)))
-void svstnt1b_scatter_u64base_s64(svbool_t, svuint64_t, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32offset_s32)))
-void svstnt1b_scatter_u32offset_s32(svbool_t, int8_t *, svuint32_t, svint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32offset_u32)))
-void svstnt1b_scatter_u32offset_u32(svbool_t, uint8_t *, svuint32_t, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_s64offset_s64)))
-void svstnt1b_scatter_s64offset_s64(svbool_t, int8_t *, svint64_t, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_s64offset_u64)))
-void svstnt1b_scatter_s64offset_u64(svbool_t, uint8_t *, svint64_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64offset_s64)))
-void svstnt1b_scatter_u64offset_s64(svbool_t, int8_t *, svuint64_t, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64offset_u64)))
-void svstnt1b_scatter_u64offset_u64(svbool_t, uint8_t *, svuint64_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_index_u32)))
-void svstnt1h_scatter_u32base_index_u32(svbool_t, svuint32_t, int64_t, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_index_u64)))
-void svstnt1h_scatter_u64base_index_u64(svbool_t, svuint64_t, int64_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_index_s32)))
-void svstnt1h_scatter_u32base_index_s32(svbool_t, svuint32_t, int64_t, svint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_index_s64)))
-void svstnt1h_scatter_u64base_index_s64(svbool_t, svuint64_t, int64_t, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_offset_u32)))
-void svstnt1h_scatter_u32base_offset_u32(svbool_t, svuint32_t, int64_t, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_offset_u64)))
-void svstnt1h_scatter_u64base_offset_u64(svbool_t, svuint64_t, int64_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_offset_s32)))
-void svstnt1h_scatter_u32base_offset_s32(svbool_t, svuint32_t, int64_t, svint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_offset_s64)))
-void svstnt1h_scatter_u64base_offset_s64(svbool_t, svuint64_t, int64_t, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_u32)))
-void svstnt1h_scatter_u32base_u32(svbool_t, svuint32_t, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_u64)))
-void svstnt1h_scatter_u64base_u64(svbool_t, svuint64_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_s32)))
-void svstnt1h_scatter_u32base_s32(svbool_t, svuint32_t, svint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_s64)))
-void svstnt1h_scatter_u64base_s64(svbool_t, svuint64_t, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_s64index_s64)))
-void svstnt1h_scatter_s64index_s64(svbool_t, int16_t *, svint64_t, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_s64index_u64)))
-void svstnt1h_scatter_s64index_u64(svbool_t, uint16_t *, svint64_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64index_s64)))
-void svstnt1h_scatter_u64index_s64(svbool_t, int16_t *, svuint64_t, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64index_u64)))
-void svstnt1h_scatter_u64index_u64(svbool_t, uint16_t *, svuint64_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32offset_s32)))
-void svstnt1h_scatter_u32offset_s32(svbool_t, int16_t *, svuint32_t, svint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32offset_u32)))
-void svstnt1h_scatter_u32offset_u32(svbool_t, uint16_t *, svuint32_t, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_s64offset_s64)))
-void svstnt1h_scatter_s64offset_s64(svbool_t, int16_t *, svint64_t, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_s64offset_u64)))
-void svstnt1h_scatter_s64offset_u64(svbool_t, uint16_t *, svint64_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64offset_s64)))
-void svstnt1h_scatter_u64offset_s64(svbool_t, int16_t *, svuint64_t, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64offset_u64)))
-void svstnt1h_scatter_u64offset_u64(svbool_t, uint16_t *, svuint64_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_index_u64)))
-void svstnt1w_scatter_u64base_index_u64(svbool_t, svuint64_t, int64_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_index_s64)))
-void svstnt1w_scatter_u64base_index_s64(svbool_t, svuint64_t, int64_t, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_offset_u64)))
-void svstnt1w_scatter_u64base_offset_u64(svbool_t, svuint64_t, int64_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_offset_s64)))
-void svstnt1w_scatter_u64base_offset_s64(svbool_t, svuint64_t, int64_t, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_u64)))
-void svstnt1w_scatter_u64base_u64(svbool_t, svuint64_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_s64)))
-void svstnt1w_scatter_u64base_s64(svbool_t, svuint64_t, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_s64index_s64)))
-void svstnt1w_scatter_s64index_s64(svbool_t, int32_t *, svint64_t, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_s64index_u64)))
-void svstnt1w_scatter_s64index_u64(svbool_t, uint32_t *, svint64_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64index_s64)))
-void svstnt1w_scatter_u64index_s64(svbool_t, int32_t *, svuint64_t, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64index_u64)))
-void svstnt1w_scatter_u64index_u64(svbool_t, uint32_t *, svuint64_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_s64offset_s64)))
-void svstnt1w_scatter_s64offset_s64(svbool_t, int32_t *, svint64_t, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_s64offset_u64)))
-void svstnt1w_scatter_s64offset_u64(svbool_t, uint32_t *, svint64_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64offset_s64)))
-void svstnt1w_scatter_u64offset_s64(svbool_t, int32_t *, svuint64_t, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64offset_u64)))
-void svstnt1w_scatter_u64offset_u64(svbool_t, uint32_t *, svuint64_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_u32_z)))
-svuint32_t svhistcnt_z(svbool_t, svuint32_t, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_u64_z)))
-svuint64_t svhistcnt_z(svbool_t, svuint64_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_s32_z)))
-svuint32_t svhistcnt_z(svbool_t, svint32_t, svint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_s64_z)))
-svuint64_t svhistcnt_z(svbool_t, svint64_t, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistseg_u8)))
-svuint8_t svhistseg(svuint8_t, svuint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistseg_s8)))
-svuint8_t svhistseg(svint8_t, svint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_index_u32)))
-svuint32_t svldnt1_gather_index_u32(svbool_t, svuint32_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_index_u64)))
-svuint64_t svldnt1_gather_index_u64(svbool_t, svuint64_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_index_f64)))
-svfloat64_t svldnt1_gather_index_f64(svbool_t, svuint64_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_index_f32)))
-svfloat32_t svldnt1_gather_index_f32(svbool_t, svuint32_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_index_s32)))
-svint32_t svldnt1_gather_index_s32(svbool_t, svuint32_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_index_s64)))
-svint64_t svldnt1_gather_index_s64(svbool_t, svuint64_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_offset_u32)))
-svuint32_t svldnt1_gather_offset_u32(svbool_t, svuint32_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_offset_u64)))
-svuint64_t svldnt1_gather_offset_u64(svbool_t, svuint64_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_offset_f64)))
-svfloat64_t svldnt1_gather_offset_f64(svbool_t, svuint64_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_offset_f32)))
-svfloat32_t svldnt1_gather_offset_f32(svbool_t, svuint32_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_offset_s32)))
-svint32_t svldnt1_gather_offset_s32(svbool_t, svuint32_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_offset_s64)))
-svint64_t svldnt1_gather_offset_s64(svbool_t, svuint64_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_u32)))
-svuint32_t svldnt1_gather_u32(svbool_t, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_u64)))
-svuint64_t svldnt1_gather_u64(svbool_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_f64)))
-svfloat64_t svldnt1_gather_f64(svbool_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_f32)))
-svfloat32_t svldnt1_gather_f32(svbool_t, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_s32)))
-svint32_t svldnt1_gather_s32(svbool_t, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_s64)))
-svint64_t svldnt1_gather_s64(svbool_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64index_u64)))
-svuint64_t svldnt1_gather_index(svbool_t, uint64_t const *, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64index_f64)))
-svfloat64_t svldnt1_gather_index(svbool_t, float64_t const *, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64index_s64)))
-svint64_t svldnt1_gather_index(svbool_t, int64_t const *, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64index_u64)))
-svuint64_t svldnt1_gather_index(svbool_t, uint64_t const *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64index_f64)))
-svfloat64_t svldnt1_gather_index(svbool_t, float64_t const *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64index_s64)))
-svint64_t svldnt1_gather_index(svbool_t, int64_t const *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32offset_u32)))
-svuint32_t svldnt1_gather_offset(svbool_t, uint32_t const *, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32offset_f32)))
-svfloat32_t svldnt1_gather_offset(svbool_t, float32_t const *, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32offset_s32)))
-svint32_t svldnt1_gather_offset(svbool_t, int32_t const *, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64offset_u64)))
-svuint64_t svldnt1_gather_offset(svbool_t, uint64_t const *, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64offset_f64)))
-svfloat64_t svldnt1_gather_offset(svbool_t, float64_t const *, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64offset_s64)))
-svint64_t svldnt1_gather_offset(svbool_t, int64_t const *, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64offset_u64)))
-svuint64_t svldnt1_gather_offset(svbool_t, uint64_t const *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64offset_f64)))
-svfloat64_t svldnt1_gather_offset(svbool_t, float64_t const *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64offset_s64)))
-svint64_t svldnt1_gather_offset(svbool_t, int64_t const *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32base_offset_u32)))
-svuint32_t svldnt1sb_gather_offset_u32(svbool_t, svuint32_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64base_offset_u64)))
-svuint64_t svldnt1sb_gather_offset_u64(svbool_t, svuint64_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32base_offset_s32)))
-svint32_t svldnt1sb_gather_offset_s32(svbool_t, svuint32_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64base_offset_s64)))
-svint64_t svldnt1sb_gather_offset_s64(svbool_t, svuint64_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32base_u32)))
-svuint32_t svldnt1sb_gather_u32(svbool_t, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64base_u64)))
-svuint64_t svldnt1sb_gather_u64(svbool_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32base_s32)))
-svint32_t svldnt1sb_gather_s32(svbool_t, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64base_s64)))
-svint64_t svldnt1sb_gather_s64(svbool_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32offset_u32)))
-svuint32_t svldnt1sb_gather_offset_u32(svbool_t, int8_t const *, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32offset_s32)))
-svint32_t svldnt1sb_gather_offset_s32(svbool_t, int8_t const *, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_s64offset_u64)))
-svuint64_t svldnt1sb_gather_offset_u64(svbool_t, int8_t const *, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_s64offset_s64)))
-svint64_t svldnt1sb_gather_offset_s64(svbool_t, int8_t const *, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64offset_u64)))
-svuint64_t svldnt1sb_gather_offset_u64(svbool_t, int8_t const *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64offset_s64)))
-svint64_t svldnt1sb_gather_offset_s64(svbool_t, int8_t const *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_index_u32)))
-svuint32_t svldnt1sh_gather_index_u32(svbool_t, svuint32_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_index_u64)))
-svuint64_t svldnt1sh_gather_index_u64(svbool_t, svuint64_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_index_s32)))
-svint32_t svldnt1sh_gather_index_s32(svbool_t, svuint32_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_index_s64)))
-svint64_t svldnt1sh_gather_index_s64(svbool_t, svuint64_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_offset_u32)))
-svuint32_t svldnt1sh_gather_offset_u32(svbool_t, svuint32_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_offset_u64)))
-svuint64_t svldnt1sh_gather_offset_u64(svbool_t, svuint64_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_offset_s32)))
-svint32_t svldnt1sh_gather_offset_s32(svbool_t, svuint32_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_offset_s64)))
-svint64_t svldnt1sh_gather_offset_s64(svbool_t, svuint64_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_u32)))
-svuint32_t svldnt1sh_gather_u32(svbool_t, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_u64)))
-svuint64_t svldnt1sh_gather_u64(svbool_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_s32)))
-svint32_t svldnt1sh_gather_s32(svbool_t, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_s64)))
-svint64_t svldnt1sh_gather_s64(svbool_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_s64index_u64)))
-svuint64_t svldnt1sh_gather_index_u64(svbool_t, int16_t const *, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_s64index_s64)))
-svint64_t svldnt1sh_gather_index_s64(svbool_t, int16_t const *, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64index_u64)))
-svuint64_t svldnt1sh_gather_index_u64(svbool_t, int16_t const *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64index_s64)))
-svint64_t svldnt1sh_gather_index_s64(svbool_t, int16_t const *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32offset_u32)))
-svuint32_t svldnt1sh_gather_offset_u32(svbool_t, int16_t const *, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32offset_s32)))
-svint32_t svldnt1sh_gather_offset_s32(svbool_t, int16_t const *, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_s64offset_u64)))
-svuint64_t svldnt1sh_gather_offset_u64(svbool_t, int16_t const *, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_s64offset_s64)))
-svint64_t svldnt1sh_gather_offset_s64(svbool_t, int16_t const *, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64offset_u64)))
-svuint64_t svldnt1sh_gather_offset_u64(svbool_t, int16_t const *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64offset_s64)))
-svint64_t svldnt1sh_gather_offset_s64(svbool_t, int16_t const *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_index_u64)))
-svuint64_t svldnt1sw_gather_index_u64(svbool_t, svuint64_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_index_s64)))
-svint64_t svldnt1sw_gather_index_s64(svbool_t, svuint64_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_offset_u64)))
-svuint64_t svldnt1sw_gather_offset_u64(svbool_t, svuint64_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_offset_s64)))
-svint64_t svldnt1sw_gather_offset_s64(svbool_t, svuint64_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_u64)))
-svuint64_t svldnt1sw_gather_u64(svbool_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_s64)))
-svint64_t svldnt1sw_gather_s64(svbool_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_s64index_u64)))
-svuint64_t svldnt1sw_gather_index_u64(svbool_t, int32_t const *, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_s64index_s64)))
-svint64_t svldnt1sw_gather_index_s64(svbool_t, int32_t const *, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64index_u64)))
-svuint64_t svldnt1sw_gather_index_u64(svbool_t, int32_t const *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64index_s64)))
-svint64_t svldnt1sw_gather_index_s64(svbool_t, int32_t const *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_s64offset_u64)))
-svuint64_t svldnt1sw_gather_offset_u64(svbool_t, int32_t const *, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_s64offset_s64)))
-svint64_t svldnt1sw_gather_offset_s64(svbool_t, int32_t const *, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64offset_u64)))
-svuint64_t svldnt1sw_gather_offset_u64(svbool_t, int32_t const *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64offset_s64)))
-svint64_t svldnt1sw_gather_offset_s64(svbool_t, int32_t const *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32base_offset_u32)))
-svuint32_t svldnt1ub_gather_offset_u32(svbool_t, svuint32_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64base_offset_u64)))
-svuint64_t svldnt1ub_gather_offset_u64(svbool_t, svuint64_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32base_offset_s32)))
-svint32_t svldnt1ub_gather_offset_s32(svbool_t, svuint32_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64base_offset_s64)))
-svint64_t svldnt1ub_gather_offset_s64(svbool_t, svuint64_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32base_u32)))
-svuint32_t svldnt1ub_gather_u32(svbool_t, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64base_u64)))
-svuint64_t svldnt1ub_gather_u64(svbool_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32base_s32)))
-svint32_t svldnt1ub_gather_s32(svbool_t, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64base_s64)))
-svint64_t svldnt1ub_gather_s64(svbool_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32offset_u32)))
-svuint32_t svldnt1ub_gather_offset_u32(svbool_t, uint8_t const *, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32offset_s32)))
-svint32_t svldnt1ub_gather_offset_s32(svbool_t, uint8_t const *, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_s64offset_u64)))
-svuint64_t svldnt1ub_gather_offset_u64(svbool_t, uint8_t const *, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_s64offset_s64)))
-svint64_t svldnt1ub_gather_offset_s64(svbool_t, uint8_t const *, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64offset_u64)))
-svuint64_t svldnt1ub_gather_offset_u64(svbool_t, uint8_t const *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64offset_s64)))
-svint64_t svldnt1ub_gather_offset_s64(svbool_t, uint8_t const *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_index_u32)))
-svuint32_t svldnt1uh_gather_index_u32(svbool_t, svuint32_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_index_u64)))
-svuint64_t svldnt1uh_gather_index_u64(svbool_t, svuint64_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_index_s32)))
-svint32_t svldnt1uh_gather_index_s32(svbool_t, svuint32_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_index_s64)))
-svint64_t svldnt1uh_gather_index_s64(svbool_t, svuint64_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_offset_u32)))
-svuint32_t svldnt1uh_gather_offset_u32(svbool_t, svuint32_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_offset_u64)))
-svuint64_t svldnt1uh_gather_offset_u64(svbool_t, svuint64_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_offset_s32)))
-svint32_t svldnt1uh_gather_offset_s32(svbool_t, svuint32_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_offset_s64)))
-svint64_t svldnt1uh_gather_offset_s64(svbool_t, svuint64_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_u32)))
-svuint32_t svldnt1uh_gather_u32(svbool_t, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_u64)))
-svuint64_t svldnt1uh_gather_u64(svbool_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_s32)))
-svint32_t svldnt1uh_gather_s32(svbool_t, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_s64)))
-svint64_t svldnt1uh_gather_s64(svbool_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_s64index_u64)))
-svuint64_t svldnt1uh_gather_index_u64(svbool_t, uint16_t const *, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_s64index_s64)))
-svint64_t svldnt1uh_gather_index_s64(svbool_t, uint16_t const *, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64index_u64)))
-svuint64_t svldnt1uh_gather_index_u64(svbool_t, uint16_t const *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64index_s64)))
-svint64_t svldnt1uh_gather_index_s64(svbool_t, uint16_t const *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32offset_u32)))
-svuint32_t svldnt1uh_gather_offset_u32(svbool_t, uint16_t const *, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32offset_s32)))
-svint32_t svldnt1uh_gather_offset_s32(svbool_t, uint16_t const *, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_s64offset_u64)))
-svuint64_t svldnt1uh_gather_offset_u64(svbool_t, uint16_t const *, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_s64offset_s64)))
-svint64_t svldnt1uh_gather_offset_s64(svbool_t, uint16_t const *, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64offset_u64)))
-svuint64_t svldnt1uh_gather_offset_u64(svbool_t, uint16_t const *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64offset_s64)))
-svint64_t svldnt1uh_gather_offset_s64(svbool_t, uint16_t const *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_index_u64)))
-svuint64_t svldnt1uw_gather_index_u64(svbool_t, svuint64_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_index_s64)))
-svint64_t svldnt1uw_gather_index_s64(svbool_t, svuint64_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_offset_u64)))
-svuint64_t svldnt1uw_gather_offset_u64(svbool_t, svuint64_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_offset_s64)))
-svint64_t svldnt1uw_gather_offset_s64(svbool_t, svuint64_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_u64)))
-svuint64_t svldnt1uw_gather_u64(svbool_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_s64)))
-svint64_t svldnt1uw_gather_s64(svbool_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_s64index_u64)))
-svuint64_t svldnt1uw_gather_index_u64(svbool_t, uint32_t const *, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_s64index_s64)))
-svint64_t svldnt1uw_gather_index_s64(svbool_t, uint32_t const *, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64index_u64)))
-svuint64_t svldnt1uw_gather_index_u64(svbool_t, uint32_t const *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64index_s64)))
-svint64_t svldnt1uw_gather_index_s64(svbool_t, uint32_t const *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_s64offset_u64)))
-svuint64_t svldnt1uw_gather_offset_u64(svbool_t, uint32_t const *, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_s64offset_s64)))
-svint64_t svldnt1uw_gather_offset_s64(svbool_t, uint32_t const *, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64offset_u64)))
-svuint64_t svldnt1uw_gather_offset_u64(svbool_t, uint32_t const *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64offset_s64)))
-svint64_t svldnt1uw_gather_offset_s64(svbool_t, uint32_t const *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmatch_u8)))
-svbool_t svmatch(svbool_t, svuint8_t, svuint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmatch_u16)))
-svbool_t svmatch(svbool_t, svuint16_t, svuint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmatch_s8)))
-svbool_t svmatch(svbool_t, svint8_t, svint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmatch_s16)))
-svbool_t svmatch(svbool_t, svint16_t, svint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmatch_u8)))
-svbool_t svnmatch(svbool_t, svuint8_t, svuint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmatch_u16)))
-svbool_t svnmatch(svbool_t, svuint16_t, svuint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmatch_s8)))
-svbool_t svnmatch(svbool_t, svint8_t, svint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmatch_s16)))
-svbool_t svnmatch(svbool_t, svint16_t, svint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_index_u32)))
-void svstnt1_scatter_index(svbool_t, svuint32_t, int64_t, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_index_u64)))
-void svstnt1_scatter_index(svbool_t, svuint64_t, int64_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_index_f64)))
-void svstnt1_scatter_index(svbool_t, svuint64_t, int64_t, svfloat64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_index_f32)))
-void svstnt1_scatter_index(svbool_t, svuint32_t, int64_t, svfloat32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_index_s32)))
-void svstnt1_scatter_index(svbool_t, svuint32_t, int64_t, svint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_index_s64)))
-void svstnt1_scatter_index(svbool_t, svuint64_t, int64_t, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_offset_u32)))
-void svstnt1_scatter_offset(svbool_t, svuint32_t, int64_t, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_offset_u64)))
-void svstnt1_scatter_offset(svbool_t, svuint64_t, int64_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_offset_f64)))
-void svstnt1_scatter_offset(svbool_t, svuint64_t, int64_t, svfloat64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_offset_f32)))
-void svstnt1_scatter_offset(svbool_t, svuint32_t, int64_t, svfloat32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_offset_s32)))
-void svstnt1_scatter_offset(svbool_t, svuint32_t, int64_t, svint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_offset_s64)))
-void svstnt1_scatter_offset(svbool_t, svuint64_t, int64_t, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_u32)))
-void svstnt1_scatter(svbool_t, svuint32_t, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_u64)))
-void svstnt1_scatter(svbool_t, svuint64_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_f64)))
-void svstnt1_scatter(svbool_t, svuint64_t, svfloat64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_f32)))
-void svstnt1_scatter(svbool_t, svuint32_t, svfloat32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_s32)))
-void svstnt1_scatter(svbool_t, svuint32_t, svint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_s64)))
-void svstnt1_scatter(svbool_t, svuint64_t, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64index_u64)))
-void svstnt1_scatter_index(svbool_t, uint64_t *, svint64_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64index_f64)))
-void svstnt1_scatter_index(svbool_t, float64_t *, svint64_t, svfloat64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64index_s64)))
-void svstnt1_scatter_index(svbool_t, int64_t *, svint64_t, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64index_u64)))
-void svstnt1_scatter_index(svbool_t, uint64_t *, svuint64_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64index_f64)))
-void svstnt1_scatter_index(svbool_t, float64_t *, svuint64_t, svfloat64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64index_s64)))
-void svstnt1_scatter_index(svbool_t, int64_t *, svuint64_t, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32offset_u32)))
-void svstnt1_scatter_offset(svbool_t, uint32_t *, svuint32_t, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32offset_f32)))
-void svstnt1_scatter_offset(svbool_t, float32_t *, svuint32_t, svfloat32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32offset_s32)))
-void svstnt1_scatter_offset(svbool_t, int32_t *, svuint32_t, svint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64offset_u64)))
-void svstnt1_scatter_offset(svbool_t, uint64_t *, svint64_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64offset_f64)))
-void svstnt1_scatter_offset(svbool_t, float64_t *, svint64_t, svfloat64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64offset_s64)))
-void svstnt1_scatter_offset(svbool_t, int64_t *, svint64_t, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64offset_u64)))
-void svstnt1_scatter_offset(svbool_t, uint64_t *, svuint64_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64offset_f64)))
-void svstnt1_scatter_offset(svbool_t, float64_t *, svuint64_t, svfloat64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64offset_s64)))
-void svstnt1_scatter_offset(svbool_t, int64_t *, svuint64_t, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32base_offset_u32)))
-void svstnt1b_scatter_offset(svbool_t, svuint32_t, int64_t, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64base_offset_u64)))
-void svstnt1b_scatter_offset(svbool_t, svuint64_t, int64_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32base_offset_s32)))
-void svstnt1b_scatter_offset(svbool_t, svuint32_t, int64_t, svint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64base_offset_s64)))
-void svstnt1b_scatter_offset(svbool_t, svuint64_t, int64_t, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32base_u32)))
-void svstnt1b_scatter(svbool_t, svuint32_t, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64base_u64)))
-void svstnt1b_scatter(svbool_t, svuint64_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32base_s32)))
-void svstnt1b_scatter(svbool_t, svuint32_t, svint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64base_s64)))
-void svstnt1b_scatter(svbool_t, svuint64_t, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32offset_s32)))
-void svstnt1b_scatter_offset(svbool_t, int8_t *, svuint32_t, svint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32offset_u32)))
-void svstnt1b_scatter_offset(svbool_t, uint8_t *, svuint32_t, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_s64offset_s64)))
-void svstnt1b_scatter_offset(svbool_t, int8_t *, svint64_t, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_s64offset_u64)))
-void svstnt1b_scatter_offset(svbool_t, uint8_t *, svint64_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64offset_s64)))
-void svstnt1b_scatter_offset(svbool_t, int8_t *, svuint64_t, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64offset_u64)))
-void svstnt1b_scatter_offset(svbool_t, uint8_t *, svuint64_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_index_u32)))
-void svstnt1h_scatter_index(svbool_t, svuint32_t, int64_t, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_index_u64)))
-void svstnt1h_scatter_index(svbool_t, svuint64_t, int64_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_index_s32)))
-void svstnt1h_scatter_index(svbool_t, svuint32_t, int64_t, svint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_index_s64)))
-void svstnt1h_scatter_index(svbool_t, svuint64_t, int64_t, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_offset_u32)))
-void svstnt1h_scatter_offset(svbool_t, svuint32_t, int64_t, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_offset_u64)))
-void svstnt1h_scatter_offset(svbool_t, svuint64_t, int64_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_offset_s32)))
-void svstnt1h_scatter_offset(svbool_t, svuint32_t, int64_t, svint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_offset_s64)))
-void svstnt1h_scatter_offset(svbool_t, svuint64_t, int64_t, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_u32)))
-void svstnt1h_scatter(svbool_t, svuint32_t, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_u64)))
-void svstnt1h_scatter(svbool_t, svuint64_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_s32)))
-void svstnt1h_scatter(svbool_t, svuint32_t, svint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_s64)))
-void svstnt1h_scatter(svbool_t, svuint64_t, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_s64index_s64)))
-void svstnt1h_scatter_index(svbool_t, int16_t *, svint64_t, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_s64index_u64)))
-void svstnt1h_scatter_index(svbool_t, uint16_t *, svint64_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64index_s64)))
-void svstnt1h_scatter_index(svbool_t, int16_t *, svuint64_t, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64index_u64)))
-void svstnt1h_scatter_index(svbool_t, uint16_t *, svuint64_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32offset_s32)))
-void svstnt1h_scatter_offset(svbool_t, int16_t *, svuint32_t, svint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32offset_u32)))
-void svstnt1h_scatter_offset(svbool_t, uint16_t *, svuint32_t, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_s64offset_s64)))
-void svstnt1h_scatter_offset(svbool_t, int16_t *, svint64_t, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_s64offset_u64)))
-void svstnt1h_scatter_offset(svbool_t, uint16_t *, svint64_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64offset_s64)))
-void svstnt1h_scatter_offset(svbool_t, int16_t *, svuint64_t, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64offset_u64)))
-void svstnt1h_scatter_offset(svbool_t, uint16_t *, svuint64_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_index_u64)))
-void svstnt1w_scatter_index(svbool_t, svuint64_t, int64_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_index_s64)))
-void svstnt1w_scatter_index(svbool_t, svuint64_t, int64_t, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_offset_u64)))
-void svstnt1w_scatter_offset(svbool_t, svuint64_t, int64_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_offset_s64)))
-void svstnt1w_scatter_offset(svbool_t, svuint64_t, int64_t, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_u64)))
-void svstnt1w_scatter(svbool_t, svuint64_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_s64)))
-void svstnt1w_scatter(svbool_t, svuint64_t, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_s64index_s64)))
-void svstnt1w_scatter_index(svbool_t, int32_t *, svint64_t, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_s64index_u64)))
-void svstnt1w_scatter_index(svbool_t, uint32_t *, svint64_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64index_s64)))
-void svstnt1w_scatter_index(svbool_t, int32_t *, svuint64_t, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64index_u64)))
-void svstnt1w_scatter_index(svbool_t, uint32_t *, svuint64_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_s64offset_s64)))
-void svstnt1w_scatter_offset(svbool_t, int32_t *, svint64_t, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_s64offset_u64)))
-void svstnt1w_scatter_offset(svbool_t, uint32_t *, svint64_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64offset_s64)))
-void svstnt1w_scatter_offset(svbool_t, int32_t *, svuint64_t, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64offset_u64)))
-void svstnt1w_scatter_offset(svbool_t, uint32_t *, svuint64_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f64_m)))
-svfloat64_t svamax_n_f64_m(svbool_t, svfloat64_t, float64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f32_m)))
-svfloat32_t svamax_n_f32_m(svbool_t, svfloat32_t, float32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f16_m)))
-svfloat16_t svamax_n_f16_m(svbool_t, svfloat16_t, float16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f64_x)))
-svfloat64_t svamax_n_f64_x(svbool_t, svfloat64_t, float64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f32_x)))
-svfloat32_t svamax_n_f32_x(svbool_t, svfloat32_t, float32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f16_x)))
-svfloat16_t svamax_n_f16_x(svbool_t, svfloat16_t, float16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f64_z)))
-svfloat64_t svamax_n_f64_z(svbool_t, svfloat64_t, float64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f32_z)))
-svfloat32_t svamax_n_f32_z(svbool_t, svfloat32_t, float32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f16_z)))
-svfloat16_t svamax_n_f16_z(svbool_t, svfloat16_t, float16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f64_m)))
-svfloat64_t svamax_f64_m(svbool_t, svfloat64_t, svfloat64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f32_m)))
-svfloat32_t svamax_f32_m(svbool_t, svfloat32_t, svfloat32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f16_m)))
-svfloat16_t svamax_f16_m(svbool_t, svfloat16_t, svfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f64_x)))
-svfloat64_t svamax_f64_x(svbool_t, svfloat64_t, svfloat64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f32_x)))
-svfloat32_t svamax_f32_x(svbool_t, svfloat32_t, svfloat32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f16_x)))
-svfloat16_t svamax_f16_x(svbool_t, svfloat16_t, svfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f64_z)))
-svfloat64_t svamax_f64_z(svbool_t, svfloat64_t, svfloat64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f32_z)))
-svfloat32_t svamax_f32_z(svbool_t, svfloat32_t, svfloat32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f16_z)))
-svfloat16_t svamax_f16_z(svbool_t, svfloat16_t, svfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f64_m)))
-svfloat64_t svamin_n_f64_m(svbool_t, svfloat64_t, float64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f32_m)))
-svfloat32_t svamin_n_f32_m(svbool_t, svfloat32_t, float32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f16_m)))
-svfloat16_t svamin_n_f16_m(svbool_t, svfloat16_t, float16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f64_x)))
-svfloat64_t svamin_n_f64_x(svbool_t, svfloat64_t, float64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f32_x)))
-svfloat32_t svamin_n_f32_x(svbool_t, svfloat32_t, float32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f16_x)))
-svfloat16_t svamin_n_f16_x(svbool_t, svfloat16_t, float16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f64_z)))
-svfloat64_t svamin_n_f64_z(svbool_t, svfloat64_t, float64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f32_z)))
-svfloat32_t svamin_n_f32_z(svbool_t, svfloat32_t, float32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f16_z)))
-svfloat16_t svamin_n_f16_z(svbool_t, svfloat16_t, float16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f64_m)))
-svfloat64_t svamin_f64_m(svbool_t, svfloat64_t, svfloat64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f32_m)))
-svfloat32_t svamin_f32_m(svbool_t, svfloat32_t, svfloat32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f16_m)))
-svfloat16_t svamin_f16_m(svbool_t, svfloat16_t, svfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f64_x)))
-svfloat64_t svamin_f64_x(svbool_t, svfloat64_t, svfloat64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f32_x)))
-svfloat32_t svamin_f32_x(svbool_t, svfloat32_t, svfloat32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f16_x)))
-svfloat16_t svamin_f16_x(svbool_t, svfloat16_t, svfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f64_z)))
-svfloat64_t svamin_f64_z(svbool_t, svfloat64_t, svfloat64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f32_z)))
-svfloat32_t svamin_f32_z(svbool_t, svfloat32_t, svfloat32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f16_z)))
-svfloat16_t svamin_f16_z(svbool_t, svfloat16_t, svfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f64_m)))
-svfloat64_t svamax_m(svbool_t, svfloat64_t, float64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f32_m)))
-svfloat32_t svamax_m(svbool_t, svfloat32_t, float32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f16_m)))
-svfloat16_t svamax_m(svbool_t, svfloat16_t, float16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f64_x)))
-svfloat64_t svamax_x(svbool_t, svfloat64_t, float64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f32_x)))
-svfloat32_t svamax_x(svbool_t, svfloat32_t, float32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f16_x)))
-svfloat16_t svamax_x(svbool_t, svfloat16_t, float16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f64_z)))
-svfloat64_t svamax_z(svbool_t, svfloat64_t, float64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f32_z)))
-svfloat32_t svamax_z(svbool_t, svfloat32_t, float32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f16_z)))
-svfloat16_t svamax_z(svbool_t, svfloat16_t, float16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f64_m)))
-svfloat64_t svamax_m(svbool_t, svfloat64_t, svfloat64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f32_m)))
-svfloat32_t svamax_m(svbool_t, svfloat32_t, svfloat32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f16_m)))
-svfloat16_t svamax_m(svbool_t, svfloat16_t, svfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f64_x)))
-svfloat64_t svamax_x(svbool_t, svfloat64_t, svfloat64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f32_x)))
-svfloat32_t svamax_x(svbool_t, svfloat32_t, svfloat32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f16_x)))
-svfloat16_t svamax_x(svbool_t, svfloat16_t, svfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f64_z)))
-svfloat64_t svamax_z(svbool_t, svfloat64_t, svfloat64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f32_z)))
-svfloat32_t svamax_z(svbool_t, svfloat32_t, svfloat32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f16_z)))
-svfloat16_t svamax_z(svbool_t, svfloat16_t, svfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f64_m)))
-svfloat64_t svamin_m(svbool_t, svfloat64_t, float64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f32_m)))
-svfloat32_t svamin_m(svbool_t, svfloat32_t, float32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f16_m)))
-svfloat16_t svamin_m(svbool_t, svfloat16_t, float16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f64_x)))
-svfloat64_t svamin_x(svbool_t, svfloat64_t, float64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f32_x)))
-svfloat32_t svamin_x(svbool_t, svfloat32_t, float32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f16_x)))
-svfloat16_t svamin_x(svbool_t, svfloat16_t, float16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f64_z)))
-svfloat64_t svamin_z(svbool_t, svfloat64_t, float64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f32_z)))
-svfloat32_t svamin_z(svbool_t, svfloat32_t, float32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f16_z)))
-svfloat16_t svamin_z(svbool_t, svfloat16_t, float16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f64_m)))
-svfloat64_t svamin_m(svbool_t, svfloat64_t, svfloat64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f32_m)))
-svfloat32_t svamin_m(svbool_t, svfloat32_t, svfloat32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f16_m)))
-svfloat16_t svamin_m(svbool_t, svfloat16_t, svfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f64_x)))
-svfloat64_t svamin_x(svbool_t, svfloat64_t, svfloat64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f32_x)))
-svfloat32_t svamin_x(svbool_t, svfloat32_t, svfloat32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f16_x)))
-svfloat16_t svamin_x(svbool_t, svfloat16_t, svfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f64_z)))
-svfloat64_t svamin_z(svbool_t, svfloat64_t, svfloat64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f32_z)))
-svfloat32_t svamin_z(svbool_t, svfloat32_t, svfloat32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f16_z)))
-svfloat16_t svamin_z(svbool_t, svfloat16_t, svfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_f16_mf8_fpm)))
-svfloat16_t svdot_f16_mf8_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, fpm_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_n_f16_mf8_fpm)))
-svfloat16_t svdot_n_f16_mf8_fpm(svfloat16_t, svmfloat8_t, mfloat8_t, fpm_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_f16_mf8_fpm)))
-svfloat16_t svdot_lane_f16_mf8_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_f16_mf8_fpm)))
-svfloat16_t svdot_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, fpm_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_n_f16_mf8_fpm)))
-svfloat16_t svdot_fpm(svfloat16_t, svmfloat8_t, mfloat8_t, fpm_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_f16_mf8_fpm)))
-svfloat16_t svdot_lane_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_f32_mf8_fpm)))
-svfloat32_t svdot_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_n_f32_mf8_fpm)))
-svfloat32_t svdot_n_f32_mf8_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_f32_mf8_fpm)))
-svfloat32_t svdot_lane_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_f32_mf8_fpm)))
-svfloat32_t svdot_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_n_f32_mf8_fpm)))
-svfloat32_t svdot_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_f32_mf8_fpm)))
-svfloat32_t svdot_lane_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_f16_mf8_fpm)))
-svfloat16_t svmlalb_f16_mf8_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, fpm_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_n_f16_mf8_fpm)))
-svfloat16_t svmlalb_n_f16_mf8_fpm(svfloat16_t, svmfloat8_t, mfloat8_t, fpm_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_lane_f16_mf8_fpm)))
-svfloat16_t svmlalb_lane_f16_mf8_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbb_f32_mf8_fpm)))
-svfloat32_t svmlallbb_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbb_n_f32_mf8_fpm)))
-svfloat32_t svmlallbb_n_f32_mf8_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbb_lane_f32_mf8_fpm)))
-svfloat32_t svmlallbb_lane_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbt_f32_mf8_fpm)))
-svfloat32_t svmlallbt_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbt_n_f32_mf8_fpm)))
-svfloat32_t svmlallbt_n_f32_mf8_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbt_lane_f32_mf8_fpm)))
-svfloat32_t svmlallbt_lane_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltb_f32_mf8_fpm)))
-svfloat32_t svmlalltb_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltb_n_f32_mf8_fpm)))
-svfloat32_t svmlalltb_n_f32_mf8_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltb_lane_f32_mf8_fpm)))
-svfloat32_t svmlalltb_lane_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltt_f32_mf8_fpm)))
-svfloat32_t svmlalltt_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltt_n_f32_mf8_fpm)))
-svfloat32_t svmlalltt_n_f32_mf8_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltt_lane_f32_mf8_fpm)))
-svfloat32_t svmlalltt_lane_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_f16_mf8_fpm)))
-svfloat16_t svmlalt_f16_mf8_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, fpm_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_n_f16_mf8_fpm)))
-svfloat16_t svmlalt_n_f16_mf8_fpm(svfloat16_t, svmfloat8_t, mfloat8_t, fpm_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_lane_f16_mf8_fpm)))
-svfloat16_t svmlalt_lane_f16_mf8_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_f16_mf8_fpm)))
-svfloat16_t svmlalb_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, fpm_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_n_f16_mf8_fpm)))
-svfloat16_t svmlalb_fpm(svfloat16_t, svmfloat8_t, mfloat8_t, fpm_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_lane_f16_mf8_fpm)))
-svfloat16_t svmlalb_lane_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbb_f32_mf8_fpm)))
-svfloat32_t svmlallbb_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbb_n_f32_mf8_fpm)))
-svfloat32_t svmlallbb_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbb_lane_f32_mf8_fpm)))
-svfloat32_t svmlallbb_lane_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbt_f32_mf8_fpm)))
-svfloat32_t svmlallbt_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbt_n_f32_mf8_fpm)))
-svfloat32_t svmlallbt_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbt_lane_f32_mf8_fpm)))
-svfloat32_t svmlallbt_lane_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltb_f32_mf8_fpm)))
-svfloat32_t svmlalltb_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltb_n_f32_mf8_fpm)))
-svfloat32_t svmlalltb_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltb_lane_f32_mf8_fpm)))
-svfloat32_t svmlalltb_lane_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltt_f32_mf8_fpm)))
-svfloat32_t svmlalltt_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltt_n_f32_mf8_fpm)))
-svfloat32_t svmlalltt_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltt_lane_f32_mf8_fpm)))
-svfloat32_t svmlalltt_lane_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_f16_mf8_fpm)))
-svfloat16_t svmlalt_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, fpm_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_n_f16_mf8_fpm)))
-svfloat16_t svmlalt_fpm(svfloat16_t, svmfloat8_t, mfloat8_t, fpm_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_lane_f16_mf8_fpm)))
-svfloat16_t svmlalt_lane_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt1_bf16_mf8_fpm)))
-svbfloat16_t svcvt1_bf16_mf8_fpm(svmfloat8_t, fpm_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt1_f16_mf8_fpm)))
-svfloat16_t svcvt1_f16_mf8_fpm(svmfloat8_t, fpm_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt2_bf16_mf8_fpm)))
-svbfloat16_t svcvt2_bf16_mf8_fpm(svmfloat8_t, fpm_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt2_f16_mf8_fpm)))
-svfloat16_t svcvt2_f16_mf8_fpm(svmfloat8_t, fpm_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt1_bf16_mf8_fpm)))
-svbfloat16_t svcvtlt1_bf16_mf8_fpm(svmfloat8_t, fpm_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt1_f16_mf8_fpm)))
-svfloat16_t svcvtlt1_f16_mf8_fpm(svmfloat8_t, fpm_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt2_bf16_mf8_fpm)))
-svbfloat16_t svcvtlt2_bf16_mf8_fpm(svmfloat8_t, fpm_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt2_f16_mf8_fpm)))
-svfloat16_t svcvtlt2_f16_mf8_fpm(svmfloat8_t, fpm_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtn_mf8_bf16_x2_fpm)))
-svmfloat8_t svcvtn_mf8_bf16_x2_fpm(svbfloat16x2_t, fpm_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtn_mf8_f16_x2_fpm)))
-svmfloat8_t svcvtn_mf8_f16_x2_fpm(svfloat16x2_t, fpm_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnb_mf8_f32_x2_fpm)))
-svmfloat8_t svcvtnb_mf8_f32_x2_fpm(svfloat32x2_t, fpm_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnt_mf8_f32_x2_fpm)))
-svmfloat8_t svcvtnt_mf8_f32_x2_fpm(svmfloat8_t, svfloat32x2_t, fpm_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt1_bf16_mf8_fpm)))
-svbfloat16_t svcvt1_bf16_fpm(svmfloat8_t, fpm_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt1_f16_mf8_fpm)))
-svfloat16_t svcvt1_f16_fpm(svmfloat8_t, fpm_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt2_bf16_mf8_fpm)))
-svbfloat16_t svcvt2_bf16_fpm(svmfloat8_t, fpm_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt2_f16_mf8_fpm)))
-svfloat16_t svcvt2_f16_fpm(svmfloat8_t, fpm_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt1_bf16_mf8_fpm)))
-svbfloat16_t svcvtlt1_bf16_fpm(svmfloat8_t, fpm_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt1_f16_mf8_fpm)))
-svfloat16_t svcvtlt1_f16_fpm(svmfloat8_t, fpm_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt2_bf16_mf8_fpm)))
-svbfloat16_t svcvtlt2_bf16_fpm(svmfloat8_t, fpm_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt2_f16_mf8_fpm)))
-svfloat16_t svcvtlt2_f16_fpm(svmfloat8_t, fpm_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtn_mf8_bf16_x2_fpm)))
-svmfloat8_t svcvtn_mf8_fpm(svbfloat16x2_t, fpm_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtn_mf8_f16_x2_fpm)))
-svmfloat8_t svcvtn_mf8_fpm(svfloat16x2_t, fpm_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnb_mf8_f32_x2_fpm)))
-svmfloat8_t svcvtnb_mf8_fpm(svfloat32x2_t, fpm_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnt_mf8_f32_x2_fpm)))
-svmfloat8_t svcvtnt_mf8_fpm(svmfloat8_t, svfloat32x2_t, fpm_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_u8)))
-svuint8_t svluti2_lane_u8(svuint8_t, svuint8_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_s8)))
-svint8_t svluti2_lane_s8(svint8_t, svuint8_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_u16)))
-svuint16_t svluti2_lane_u16(svuint16_t, svuint8_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_bf16)))
-svbfloat16_t svluti2_lane_bf16(svbfloat16_t, svuint8_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_f16)))
-svfloat16_t svluti2_lane_f16(svfloat16_t, svuint8_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_s16)))
-svint16_t svluti2_lane_s16(svint16_t, svuint8_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_u8)))
-svuint8_t svluti4_lane_u8(svuint8_t, svuint8_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_s8)))
-svint8_t svluti4_lane_s8(svint8_t, svuint8_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_u16)))
-svuint16_t svluti4_lane_u16(svuint16_t, svuint8_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_bf16)))
-svbfloat16_t svluti4_lane_bf16(svbfloat16_t, svuint8_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_f16)))
-svfloat16_t svluti4_lane_f16(svfloat16_t, svuint8_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_s16)))
-svint16_t svluti4_lane_s16(svint16_t, svuint8_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_u16_x2)))
-svuint16_t svluti4_lane_u16_x2(svuint16x2_t, svuint8_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_bf16_x2)))
-svbfloat16_t svluti4_lane_bf16_x2(svbfloat16x2_t, svuint8_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_f16_x2)))
-svfloat16_t svluti4_lane_f16_x2(svfloat16x2_t, svuint8_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_s16_x2)))
-svint16_t svluti4_lane_s16_x2(svint16x2_t, svuint8_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_u8)))
-svuint8_t svluti2_lane(svuint8_t, svuint8_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_s8)))
-svint8_t svluti2_lane(svint8_t, svuint8_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_u16)))
-svuint16_t svluti2_lane(svuint16_t, svuint8_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_bf16)))
-svbfloat16_t svluti2_lane(svbfloat16_t, svuint8_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_f16)))
-svfloat16_t svluti2_lane(svfloat16_t, svuint8_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_s16)))
-svint16_t svluti2_lane(svint16_t, svuint8_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_u8)))
-svuint8_t svluti4_lane(svuint8_t, svuint8_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_s8)))
-svint8_t svluti4_lane(svint8_t, svuint8_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_u16)))
-svuint16_t svluti4_lane(svuint16_t, svuint8_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_bf16)))
-svbfloat16_t svluti4_lane(svbfloat16_t, svuint8_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_f16)))
-svfloat16_t svluti4_lane(svfloat16_t, svuint8_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_s16)))
-svint16_t svluti4_lane(svint16_t, svuint8_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_u16_x2)))
-svuint16_t svluti4_lane(svuint16x2_t, svuint8_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_bf16_x2)))
-svbfloat16_t svluti4_lane(svbfloat16x2_t, svuint8_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_f16_x2)))
-svfloat16_t svluti4_lane(svfloat16x2_t, svuint8_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_s16_x2)))
-svint16_t svluti4_lane(svint16x2_t, svuint8_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesd_u8)))
-svuint8_t svaesd_u8(svuint8_t, svuint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaese_u8)))
-svuint8_t svaese_u8(svuint8_t, svuint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesimc_u8)))
-svuint8_t svaesimc_u8(svuint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesmc_u8)))
-svuint8_t svaesmc_u8(svuint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_pair_n_u64)))
-svuint64_t svpmullb_pair_n_u64(svuint64_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_pair_u64)))
-svuint64_t svpmullb_pair_u64(svuint64_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_pair_n_u64)))
-svuint64_t svpmullt_pair_n_u64(svuint64_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_pair_u64)))
-svuint64_t svpmullt_pair_u64(svuint64_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesd_u8)))
-svuint8_t svaesd(svuint8_t, svuint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaese_u8)))
-svuint8_t svaese(svuint8_t, svuint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesimc_u8)))
-svuint8_t svaesimc(svuint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesmc_u8)))
-svuint8_t svaesmc(svuint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_pair_n_u64)))
-svuint64_t svpmullb_pair(svuint64_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_pair_u64)))
-svuint64_t svpmullb_pair(svuint64_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_pair_n_u64)))
-svuint64_t svpmullt_pair(svuint64_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_pair_u64)))
-svuint64_t svpmullt_pair(svuint64_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_bf16_m)))
-svbfloat16_t svadd_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_bf16_x)))
-svbfloat16_t svadd_n_bf16_x(svbool_t, svbfloat16_t, bfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_bf16_z)))
-svbfloat16_t svadd_n_bf16_z(svbool_t, svbfloat16_t, bfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_bf16_m)))
-svbfloat16_t svadd_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_bf16_x)))
-svbfloat16_t svadd_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_bf16_z)))
-svbfloat16_t svadd_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_bf16)))
-svbfloat16_t svclamp_bf16(svbfloat16_t, svbfloat16_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_bf16_m)))
-svbfloat16_t svmax_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_bf16_x)))
-svbfloat16_t svmax_n_bf16_x(svbool_t, svbfloat16_t, bfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_bf16_z)))
-svbfloat16_t svmax_n_bf16_z(svbool_t, svbfloat16_t, bfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_bf16_m)))
-svbfloat16_t svmax_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_bf16_x)))
-svbfloat16_t svmax_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_bf16_z)))
-svbfloat16_t svmax_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_bf16_m)))
-svbfloat16_t svmaxnm_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_bf16_x)))
-svbfloat16_t svmaxnm_n_bf16_x(svbool_t, svbfloat16_t, bfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_bf16_z)))
-svbfloat16_t svmaxnm_n_bf16_z(svbool_t, svbfloat16_t, bfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_bf16_m)))
-svbfloat16_t svmaxnm_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_bf16_x)))
-svbfloat16_t svmaxnm_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_bf16_z)))
-svbfloat16_t svmaxnm_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_bf16_m)))
-svbfloat16_t svmin_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_bf16_x)))
-svbfloat16_t svmin_n_bf16_x(svbool_t, svbfloat16_t, bfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_bf16_z)))
-svbfloat16_t svmin_n_bf16_z(svbool_t, svbfloat16_t, bfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_bf16_m)))
-svbfloat16_t svmin_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_bf16_x)))
-svbfloat16_t svmin_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_bf16_z)))
-svbfloat16_t svmin_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_bf16_m)))
-svbfloat16_t svminnm_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_bf16_x)))
-svbfloat16_t svminnm_n_bf16_x(svbool_t, svbfloat16_t, bfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_bf16_z)))
-svbfloat16_t svminnm_n_bf16_z(svbool_t, svbfloat16_t, bfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_bf16_m)))
-svbfloat16_t svminnm_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_bf16_x)))
-svbfloat16_t svminnm_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_bf16_z)))
-svbfloat16_t svminnm_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_bf16_m)))
-svbfloat16_t svmla_n_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_bf16_x)))
-svbfloat16_t svmla_n_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_bf16_z)))
-svbfloat16_t svmla_n_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_bf16_m)))
-svbfloat16_t svmla_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_bf16_x)))
-svbfloat16_t svmla_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_bf16_z)))
-svbfloat16_t svmla_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_bf16)))
-svbfloat16_t svmla_lane_bf16(svbfloat16_t, svbfloat16_t, svbfloat16_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_bf16_m)))
-svbfloat16_t svmls_n_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_bf16_x)))
-svbfloat16_t svmls_n_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_bf16_z)))
-svbfloat16_t svmls_n_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_bf16_m)))
-svbfloat16_t svmls_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_bf16_x)))
-svbfloat16_t svmls_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_bf16_z)))
-svbfloat16_t svmls_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_bf16)))
-svbfloat16_t svmls_lane_bf16(svbfloat16_t, svbfloat16_t, svbfloat16_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_bf16_m)))
-svbfloat16_t svmul_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_bf16_x)))
-svbfloat16_t svmul_n_bf16_x(svbool_t, svbfloat16_t, bfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_bf16_z)))
-svbfloat16_t svmul_n_bf16_z(svbool_t, svbfloat16_t, bfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_m)))
-svbfloat16_t svmul_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_x)))
-svbfloat16_t svmul_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_z)))
-svbfloat16_t svmul_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_bf16)))
-svbfloat16_t svmul_lane_bf16(svbfloat16_t, svbfloat16_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_bf16_m)))
-svbfloat16_t svsub_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_bf16_x)))
-svbfloat16_t svsub_n_bf16_x(svbool_t, svbfloat16_t, bfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_bf16_z)))
-svbfloat16_t svsub_n_bf16_z(svbool_t, svbfloat16_t, bfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_bf16_m)))
-svbfloat16_t svsub_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_bf16_x)))
-svbfloat16_t svsub_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_bf16_z)))
-svbfloat16_t svsub_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_bf16_m)))
-svbfloat16_t svadd_m(svbool_t, svbfloat16_t, bfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_bf16_x)))
-svbfloat16_t svadd_x(svbool_t, svbfloat16_t, bfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_bf16_z)))
-svbfloat16_t svadd_z(svbool_t, svbfloat16_t, bfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_bf16_m)))
-svbfloat16_t svadd_m(svbool_t, svbfloat16_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_bf16_x)))
-svbfloat16_t svadd_x(svbool_t, svbfloat16_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_bf16_z)))
-svbfloat16_t svadd_z(svbool_t, svbfloat16_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_bf16)))
-svbfloat16_t svclamp(svbfloat16_t, svbfloat16_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_bf16_m)))
-svbfloat16_t svmax_m(svbool_t, svbfloat16_t, bfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_bf16_x)))
-svbfloat16_t svmax_x(svbool_t, svbfloat16_t, bfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_bf16_z)))
-svbfloat16_t svmax_z(svbool_t, svbfloat16_t, bfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_bf16_m)))
-svbfloat16_t svmax_m(svbool_t, svbfloat16_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_bf16_x)))
-svbfloat16_t svmax_x(svbool_t, svbfloat16_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_bf16_z)))
-svbfloat16_t svmax_z(svbool_t, svbfloat16_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_bf16_m)))
-svbfloat16_t svmaxnm_m(svbool_t, svbfloat16_t, bfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_bf16_x)))
-svbfloat16_t svmaxnm_x(svbool_t, svbfloat16_t, bfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_bf16_z)))
-svbfloat16_t svmaxnm_z(svbool_t, svbfloat16_t, bfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_bf16_m)))
-svbfloat16_t svmaxnm_m(svbool_t, svbfloat16_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_bf16_x)))
-svbfloat16_t svmaxnm_x(svbool_t, svbfloat16_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_bf16_z)))
-svbfloat16_t svmaxnm_z(svbool_t, svbfloat16_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_bf16_m)))
-svbfloat16_t svmin_m(svbool_t, svbfloat16_t, bfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_bf16_x)))
-svbfloat16_t svmin_x(svbool_t, svbfloat16_t, bfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_bf16_z)))
-svbfloat16_t svmin_z(svbool_t, svbfloat16_t, bfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_bf16_m)))
-svbfloat16_t svmin_m(svbool_t, svbfloat16_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_bf16_x)))
-svbfloat16_t svmin_x(svbool_t, svbfloat16_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_bf16_z)))
-svbfloat16_t svmin_z(svbool_t, svbfloat16_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_bf16_m)))
-svbfloat16_t svminnm_m(svbool_t, svbfloat16_t, bfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_bf16_x)))
-svbfloat16_t svminnm_x(svbool_t, svbfloat16_t, bfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_bf16_z)))
-svbfloat16_t svminnm_z(svbool_t, svbfloat16_t, bfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_bf16_m)))
-svbfloat16_t svminnm_m(svbool_t, svbfloat16_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_bf16_x)))
-svbfloat16_t svminnm_x(svbool_t, svbfloat16_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_bf16_z)))
-svbfloat16_t svminnm_z(svbool_t, svbfloat16_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_bf16_m)))
-svbfloat16_t svmla_m(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_bf16_x)))
-svbfloat16_t svmla_x(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_bf16_z)))
-svbfloat16_t svmla_z(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_bf16_m)))
-svbfloat16_t svmla_m(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_bf16_x)))
-svbfloat16_t svmla_x(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_bf16_z)))
-svbfloat16_t svmla_z(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_bf16)))
-svbfloat16_t svmla_lane(svbfloat16_t, svbfloat16_t, svbfloat16_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_bf16_m)))
-svbfloat16_t svmls_m(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_bf16_x)))
-svbfloat16_t svmls_x(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_bf16_z)))
-svbfloat16_t svmls_z(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_bf16_m)))
-svbfloat16_t svmls_m(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_bf16_x)))
-svbfloat16_t svmls_x(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_bf16_z)))
-svbfloat16_t svmls_z(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_bf16)))
-svbfloat16_t svmls_lane(svbfloat16_t, svbfloat16_t, svbfloat16_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_bf16_m)))
-svbfloat16_t svmul_m(svbool_t, svbfloat16_t, bfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_bf16_x)))
-svbfloat16_t svmul_x(svbool_t, svbfloat16_t, bfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_bf16_z)))
-svbfloat16_t svmul_z(svbool_t, svbfloat16_t, bfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_m)))
-svbfloat16_t svmul_m(svbool_t, svbfloat16_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_x)))
-svbfloat16_t svmul_x(svbool_t, svbfloat16_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_z)))
-svbfloat16_t svmul_z(svbool_t, svbfloat16_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_bf16)))
-svbfloat16_t svmul_lane(svbfloat16_t, svbfloat16_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_bf16_m)))
-svbfloat16_t svsub_m(svbool_t, svbfloat16_t, bfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_bf16_x)))
-svbfloat16_t svsub_x(svbool_t, svbfloat16_t, bfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_bf16_z)))
-svbfloat16_t svsub_z(svbool_t, svbfloat16_t, bfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_bf16_m)))
-svbfloat16_t svsub_m(svbool_t, svbfloat16_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_bf16_x)))
-svbfloat16_t svsub_x(svbool_t, svbfloat16_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_bf16_z)))
-svbfloat16_t svsub_z(svbool_t, svbfloat16_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u8)))
-svuint8_t svbdep_n_u8(svuint8_t, uint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u32)))
-svuint32_t svbdep_n_u32(svuint32_t, uint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u64)))
-svuint64_t svbdep_n_u64(svuint64_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u16)))
-svuint16_t svbdep_n_u16(svuint16_t, uint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_u8)))
-svuint8_t svbdep_u8(svuint8_t, svuint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_u32)))
-svuint32_t svbdep_u32(svuint32_t, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_u64)))
-svuint64_t svbdep_u64(svuint64_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_u16)))
-svuint16_t svbdep_u16(svuint16_t, svuint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_n_u8)))
-svuint8_t svbext_n_u8(svuint8_t, uint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_n_u32)))
-svuint32_t svbext_n_u32(svuint32_t, uint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_n_u64)))
-svuint64_t svbext_n_u64(svuint64_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_n_u16)))
-svuint16_t svbext_n_u16(svuint16_t, uint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_u8)))
-svuint8_t svbext_u8(svuint8_t, svuint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_u32)))
-svuint32_t svbext_u32(svuint32_t, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_u64)))
-svuint64_t svbext_u64(svuint64_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_u16)))
-svuint16_t svbext_u16(svuint16_t, svuint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_n_u8)))
-svuint8_t svbgrp_n_u8(svuint8_t, uint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_n_u32)))
-svuint32_t svbgrp_n_u32(svuint32_t, uint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_n_u64)))
-svuint64_t svbgrp_n_u64(svuint64_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_n_u16)))
-svuint16_t svbgrp_n_u16(svuint16_t, uint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u8)))
-svuint8_t svbgrp_u8(svuint8_t, svuint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u32)))
-svuint32_t svbgrp_u32(svuint32_t, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u64)))
-svuint64_t svbgrp_u64(svuint64_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u16)))
-svuint16_t svbgrp_u16(svuint16_t, svuint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u8)))
-svuint8_t svbdep(svuint8_t, uint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u32)))
-svuint32_t svbdep(svuint32_t, uint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u64)))
-svuint64_t svbdep(svuint64_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u16)))
-svuint16_t svbdep(svuint16_t, uint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_u8)))
-svuint8_t svbdep(svuint8_t, svuint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_u32)))
-svuint32_t svbdep(svuint32_t, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_u64)))
-svuint64_t svbdep(svuint64_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_u16)))
-svuint16_t svbdep(svuint16_t, svuint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_n_u8)))
-svuint8_t svbext(svuint8_t, uint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_n_u32)))
-svuint32_t svbext(svuint32_t, uint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_n_u64)))
-svuint64_t svbext(svuint64_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_n_u16)))
-svuint16_t svbext(svuint16_t, uint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_u8)))
-svuint8_t svbext(svuint8_t, svuint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_u32)))
-svuint32_t svbext(svuint32_t, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_u64)))
-svuint64_t svbext(svuint64_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_u16)))
-svuint16_t svbext(svuint16_t, svuint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_n_u8)))
-svuint8_t svbgrp(svuint8_t, uint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_n_u32)))
-svuint32_t svbgrp(svuint32_t, uint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_n_u64)))
-svuint64_t svbgrp(svuint64_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_n_u16)))
-svuint16_t svbgrp(svuint16_t, uint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u8)))
-svuint8_t svbgrp(svuint8_t, svuint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u32)))
-svuint32_t svbgrp(svuint32_t, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u64)))
-svuint64_t svbgrp(svuint64_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u16)))
-svuint16_t svbgrp(svuint16_t, svuint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsm4e_u32)))
-svuint32_t svsm4e_u32(svuint32_t, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsm4ekey_u32)))
-svuint32_t svsm4ekey_u32(svuint32_t, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsm4e_u32)))
-svuint32_t svsm4e(svuint32_t, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsm4ekey_u32)))
-svuint32_t svsm4ekey(svuint32_t, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_u32)))
-svuint32_t svld1q_gather_u64base_index_u32(svbool_t, svuint64_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_u64)))
-svuint64_t svld1q_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_u16)))
-svuint16_t svld1q_gather_u64base_index_u16(svbool_t, svuint64_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_bf16)))
-svbfloat16_t svld1q_gather_u64base_index_bf16(svbool_t, svuint64_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_f64)))
-svfloat64_t svld1q_gather_u64base_index_f64(svbool_t, svuint64_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_f32)))
-svfloat32_t svld1q_gather_u64base_index_f32(svbool_t, svuint64_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_f16)))
-svfloat16_t svld1q_gather_u64base_index_f16(svbool_t, svuint64_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_s32)))
-svint32_t svld1q_gather_u64base_index_s32(svbool_t, svuint64_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_s64)))
-svint64_t svld1q_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_s16)))
-svint16_t svld1q_gather_u64base_index_s16(svbool_t, svuint64_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u8)))
-svuint8_t svld1q_gather_u64base_offset_u8(svbool_t, svuint64_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u32)))
-svuint32_t svld1q_gather_u64base_offset_u32(svbool_t, svuint64_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u64)))
-svuint64_t svld1q_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u16)))
-svuint16_t svld1q_gather_u64base_offset_u16(svbool_t, svuint64_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_bf16)))
-svbfloat16_t svld1q_gather_u64base_offset_bf16(svbool_t, svuint64_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s8)))
-svint8_t svld1q_gather_u64base_offset_s8(svbool_t, svuint64_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_f64)))
-svfloat64_t svld1q_gather_u64base_offset_f64(svbool_t, svuint64_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_f32)))
-svfloat32_t svld1q_gather_u64base_offset_f32(svbool_t, svuint64_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_f16)))
-svfloat16_t svld1q_gather_u64base_offset_f16(svbool_t, svuint64_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s32)))
-svint32_t svld1q_gather_u64base_offset_s32(svbool_t, svuint64_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s64)))
-svint64_t svld1q_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_mf8)))
-svmfloat8_t svld1q_gather_u64base_offset_mf8(svbool_t, svuint64_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s16)))
-svint16_t svld1q_gather_u64base_offset_s16(svbool_t, svuint64_t, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u8)))
-svuint8_t svld1q_gather_u64base_u8(svbool_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u32)))
-svuint32_t svld1q_gather_u64base_u32(svbool_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u64)))
-svuint64_t svld1q_gather_u64base_u64(svbool_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u16)))
-svuint16_t svld1q_gather_u64base_u16(svbool_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_bf16)))
-svbfloat16_t svld1q_gather_u64base_bf16(svbool_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s8)))
-svint8_t svld1q_gather_u64base_s8(svbool_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_f64)))
-svfloat64_t svld1q_gather_u64base_f64(svbool_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_f32)))
-svfloat32_t svld1q_gather_u64base_f32(svbool_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_f16)))
-svfloat16_t svld1q_gather_u64base_f16(svbool_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s32)))
-svint32_t svld1q_gather_u64base_s32(svbool_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s64)))
-svint64_t svld1q_gather_u64base_s64(svbool_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_mf8)))
-svmfloat8_t svld1q_gather_u64base_mf8(svbool_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s16)))
-svint16_t svld1q_gather_u64base_s16(svbool_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_u32)))
-svuint32_t svld1q_gather_u64index_u32(svbool_t, uint32_t const *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_u64)))
-svuint64_t svld1q_gather_u64index_u64(svbool_t, uint64_t const *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_u16)))
-svuint16_t svld1q_gather_u64index_u16(svbool_t, uint16_t const *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_bf16)))
-svbfloat16_t svld1q_gather_u64index_bf16(svbool_t, bfloat16_t const *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_f64)))
-svfloat64_t svld1q_gather_u64index_f64(svbool_t, float64_t const *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_f32)))
-svfloat32_t svld1q_gather_u64index_f32(svbool_t, float32_t const *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_f16)))
-svfloat16_t svld1q_gather_u64index_f16(svbool_t, float16_t const *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_s32)))
-svint32_t svld1q_gather_u64index_s32(svbool_t, int32_t const *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_s64)))
-svint64_t svld1q_gather_u64index_s64(svbool_t, int64_t const *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_s16)))
-svint16_t svld1q_gather_u64index_s16(svbool_t, int16_t const *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u8)))
-svuint8_t svld1q_gather_u64offset_u8(svbool_t, uint8_t const *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u32)))
-svuint32_t svld1q_gather_u64offset_u32(svbool_t, uint32_t const *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u64)))
-svuint64_t svld1q_gather_u64offset_u64(svbool_t, uint64_t const *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u16)))
-svuint16_t svld1q_gather_u64offset_u16(svbool_t, uint16_t const *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_bf16)))
-svbfloat16_t svld1q_gather_u64offset_bf16(svbool_t, bfloat16_t const *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s8)))
-svint8_t svld1q_gather_u64offset_s8(svbool_t, int8_t const *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_f64)))
-svfloat64_t svld1q_gather_u64offset_f64(svbool_t, float64_t const *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_f32)))
-svfloat32_t svld1q_gather_u64offset_f32(svbool_t, float32_t const *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_f16)))
-svfloat16_t svld1q_gather_u64offset_f16(svbool_t, float16_t const *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s32)))
-svint32_t svld1q_gather_u64offset_s32(svbool_t, int32_t const *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s64)))
-svint64_t svld1q_gather_u64offset_s64(svbool_t, int64_t const *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_mf8)))
-svmfloat8_t svld1q_gather_u64offset_mf8(svbool_t, mfloat8_t const *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s16)))
-svint16_t svld1q_gather_u64offset_s16(svbool_t, int16_t const *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_u64)))
-svuint64_t svld1udq_u64(svbool_t, uint64_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_f64)))
-svfloat64_t svld1udq_f64(svbool_t, float64_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_s64)))
-svint64_t svld1udq_s64(svbool_t, int64_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_vnum_u64)))
-svuint64_t svld1udq_vnum_u64(svbool_t, uint64_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_vnum_f64)))
-svfloat64_t svld1udq_vnum_f64(svbool_t, float64_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_vnum_s64)))
-svint64_t svld1udq_vnum_s64(svbool_t, int64_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_u32)))
-svuint32_t svld1uwq_u32(svbool_t, uint32_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_f32)))
-svfloat32_t svld1uwq_f32(svbool_t, float32_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_s32)))
-svint32_t svld1uwq_s32(svbool_t, int32_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_vnum_u32)))
-svuint32_t svld1uwq_vnum_u32(svbool_t, uint32_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_vnum_f32)))
-svfloat32_t svld1uwq_vnum_f32(svbool_t, float32_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_vnum_s32)))
-svint32_t svld1uwq_vnum_s32(svbool_t, int32_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_u64)))
-void svst1dq_u64(svbool_t, uint64_t *, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_f64)))
-void svst1dq_f64(svbool_t, float64_t *, svfloat64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_s64)))
-void svst1dq_s64(svbool_t, int64_t *, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_vnum_u64)))
-void svst1dq_vnum_u64(svbool_t, uint64_t *, int64_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_vnum_f64)))
-void svst1dq_vnum_f64(svbool_t, float64_t *, int64_t, svfloat64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_vnum_s64)))
-void svst1dq_vnum_s64(svbool_t, int64_t *, int64_t, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u8)))
-void svst1q_scatter_u64base_u8(svbool_t, svuint64_t, svuint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u32)))
-void svst1q_scatter_u64base_u32(svbool_t, svuint64_t, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u64)))
-void svst1q_scatter_u64base_u64(svbool_t, svuint64_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u16)))
-void svst1q_scatter_u64base_u16(svbool_t, svuint64_t, svuint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_bf16)))
-void svst1q_scatter_u64base_bf16(svbool_t, svuint64_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s8)))
-void svst1q_scatter_u64base_s8(svbool_t, svuint64_t, svint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_f64)))
-void svst1q_scatter_u64base_f64(svbool_t, svuint64_t, svfloat64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_f32)))
-void svst1q_scatter_u64base_f32(svbool_t, svuint64_t, svfloat32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_f16)))
-void svst1q_scatter_u64base_f16(svbool_t, svuint64_t, svfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s32)))
-void svst1q_scatter_u64base_s32(svbool_t, svuint64_t, svint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s64)))
-void svst1q_scatter_u64base_s64(svbool_t, svuint64_t, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_mf8)))
-void svst1q_scatter_u64base_mf8(svbool_t, svuint64_t, svmfloat8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s16)))
-void svst1q_scatter_u64base_s16(svbool_t, svuint64_t, svint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_u32)))
-void svst1q_scatter_u64base_index_u32(svbool_t, svuint64_t, int64_t, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_u64)))
-void svst1q_scatter_u64base_index_u64(svbool_t, svuint64_t, int64_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_u16)))
-void svst1q_scatter_u64base_index_u16(svbool_t, svuint64_t, int64_t, svuint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_bf16)))
-void svst1q_scatter_u64base_index_bf16(svbool_t, svuint64_t, int64_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_f64)))
-void svst1q_scatter_u64base_index_f64(svbool_t, svuint64_t, int64_t, svfloat64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_f32)))
-void svst1q_scatter_u64base_index_f32(svbool_t, svuint64_t, int64_t, svfloat32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_f16)))
-void svst1q_scatter_u64base_index_f16(svbool_t, svuint64_t, int64_t, svfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_s32)))
-void svst1q_scatter_u64base_index_s32(svbool_t, svuint64_t, int64_t, svint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_s64)))
-void svst1q_scatter_u64base_index_s64(svbool_t, svuint64_t, int64_t, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_s16)))
-void svst1q_scatter_u64base_index_s16(svbool_t, svuint64_t, int64_t, svint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u8)))
-void svst1q_scatter_u64base_offset_u8(svbool_t, svuint64_t, int64_t, svuint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u32)))
-void svst1q_scatter_u64base_offset_u32(svbool_t, svuint64_t, int64_t, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u64)))
-void svst1q_scatter_u64base_offset_u64(svbool_t, svuint64_t, int64_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u16)))
-void svst1q_scatter_u64base_offset_u16(svbool_t, svuint64_t, int64_t, svuint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_bf16)))
-void svst1q_scatter_u64base_offset_bf16(svbool_t, svuint64_t, int64_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s8)))
-void svst1q_scatter_u64base_offset_s8(svbool_t, svuint64_t, int64_t, svint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_f64)))
-void svst1q_scatter_u64base_offset_f64(svbool_t, svuint64_t, int64_t, svfloat64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_f32)))
-void svst1q_scatter_u64base_offset_f32(svbool_t, svuint64_t, int64_t, svfloat32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_f16)))
-void svst1q_scatter_u64base_offset_f16(svbool_t, svuint64_t, int64_t, svfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s32)))
-void svst1q_scatter_u64base_offset_s32(svbool_t, svuint64_t, int64_t, svint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s64)))
-void svst1q_scatter_u64base_offset_s64(svbool_t, svuint64_t, int64_t, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_mf8)))
-void svst1q_scatter_u64base_offset_mf8(svbool_t, svuint64_t, int64_t, svmfloat8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s16)))
-void svst1q_scatter_u64base_offset_s16(svbool_t, svuint64_t, int64_t, svint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_u32)))
-void svst1q_scatter_s64index_u32(svbool_t, uint32_t *, svint64_t, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_u64)))
-void svst1q_scatter_s64index_u64(svbool_t, uint64_t *, svint64_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_u16)))
-void svst1q_scatter_s64index_u16(svbool_t, uint16_t *, svint64_t, svuint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_bf16)))
-void svst1q_scatter_s64index_bf16(svbool_t, bfloat16_t *, svint64_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_f64)))
-void svst1q_scatter_s64index_f64(svbool_t, float64_t *, svint64_t, svfloat64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_f32)))
-void svst1q_scatter_s64index_f32(svbool_t, float32_t *, svint64_t, svfloat32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_f16)))
-void svst1q_scatter_s64index_f16(svbool_t, float16_t *, svint64_t, svfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_s32)))
-void svst1q_scatter_s64index_s32(svbool_t, int32_t *, svint64_t, svint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_s64)))
-void svst1q_scatter_s64index_s64(svbool_t, int64_t *, svint64_t, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_s16)))
-void svst1q_scatter_s64index_s16(svbool_t, int16_t *, svint64_t, svint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_u32)))
-void svst1q_scatter_u64index_u32(svbool_t, uint32_t *, svuint64_t, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_u64)))
-void svst1q_scatter_u64index_u64(svbool_t, uint64_t *, svuint64_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_u16)))
-void svst1q_scatter_u64index_u16(svbool_t, uint16_t *, svuint64_t, svuint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_bf16)))
-void svst1q_scatter_u64index_bf16(svbool_t, bfloat16_t *, svuint64_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_f64)))
-void svst1q_scatter_u64index_f64(svbool_t, float64_t *, svuint64_t, svfloat64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_f32)))
-void svst1q_scatter_u64index_f32(svbool_t, float32_t *, svuint64_t, svfloat32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_f16)))
-void svst1q_scatter_u64index_f16(svbool_t, float16_t *, svuint64_t, svfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_s32)))
-void svst1q_scatter_u64index_s32(svbool_t, int32_t *, svuint64_t, svint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_s64)))
-void svst1q_scatter_u64index_s64(svbool_t, int64_t *, svuint64_t, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_s16)))
-void svst1q_scatter_u64index_s16(svbool_t, int16_t *, svuint64_t, svint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_u8)))
-void svst1q_scatter_s64offset_u8(svbool_t, uint8_t *, svint64_t, svuint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_u32)))
-void svst1q_scatter_s64offset_u32(svbool_t, uint32_t *, svint64_t, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_u64)))
-void svst1q_scatter_s64offset_u64(svbool_t, uint64_t *, svint64_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_u16)))
-void svst1q_scatter_s64offset_u16(svbool_t, uint16_t *, svint64_t, svuint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_bf16)))
-void svst1q_scatter_s64offset_bf16(svbool_t, bfloat16_t *, svint64_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_s8)))
-void svst1q_scatter_s64offset_s8(svbool_t, int8_t *, svint64_t, svint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_f64)))
-void svst1q_scatter_s64offset_f64(svbool_t, float64_t *, svint64_t, svfloat64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_f32)))
-void svst1q_scatter_s64offset_f32(svbool_t, float32_t *, svint64_t, svfloat32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_f16)))
-void svst1q_scatter_s64offset_f16(svbool_t, float16_t *, svint64_t, svfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_s32)))
-void svst1q_scatter_s64offset_s32(svbool_t, int32_t *, svint64_t, svint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_s64)))
-void svst1q_scatter_s64offset_s64(svbool_t, int64_t *, svint64_t, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_mf8)))
-void svst1q_scatter_s64offset_mf8(svbool_t, mfloat8_t *, svint64_t, svmfloat8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_s16)))
-void svst1q_scatter_s64offset_s16(svbool_t, int16_t *, svint64_t, svint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u8)))
-void svst1q_scatter_u64offset_u8(svbool_t, uint8_t *, svuint64_t, svuint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u32)))
-void svst1q_scatter_u64offset_u32(svbool_t, uint32_t *, svuint64_t, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u64)))
-void svst1q_scatter_u64offset_u64(svbool_t, uint64_t *, svuint64_t, svuint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u16)))
-void svst1q_scatter_u64offset_u16(svbool_t, uint16_t *, svuint64_t, svuint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_bf16)))
-void svst1q_scatter_u64offset_bf16(svbool_t, bfloat16_t *, svuint64_t, svbfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s8)))
-void svst1q_scatter_u64offset_s8(svbool_t, int8_t *, svuint64_t, svint8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_f64)))
-void svst1q_scatter_u64offset_f64(svbool_t, float64_t *, svuint64_t, svfloat64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_f32)))
-void svst1q_scatter_u64offset_f32(svbool_t, float32_t *, svuint64_t, svfloat32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_f16)))
-void svst1q_scatter_u64offset_f16(svbool_t, float16_t *, svuint64_t, svfloat16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s32)))
-void svst1q_scatter_u64offset_s32(svbool_t, int32_t *, svuint64_t, svint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s64)))
-void svst1q_scatter_u64offset_s64(svbool_t, int64_t *, svuint64_t, svint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_mf8)))
-void svst1q_scatter_u64offset_mf8(svbool_t, mfloat8_t *, svuint64_t, svmfloat8_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s16)))
-void svst1q_scatter_u64offset_s16(svbool_t, int16_t *, svuint64_t, svint16_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_u32)))
-void svst1wq_u32(svbool_t, uint32_t *, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_f32)))
-void svst1wq_f32(svbool_t, float32_t *, svfloat32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_s32)))
-void svst1wq_s32(svbool_t, int32_t *, svint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_u32)))
-void svst1wq_vnum_u32(svbool_t, uint32_t *, int64_t, svuint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_f32)))
-void svst1wq_vnum_f32(svbool_t, float32_t *, int64_t, svfloat32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_s32)))
-void svst1wq_vnum_s32(svbool_t, int32_t *, int64_t, svint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_u32)))
-svuint32_t svld1q_gather_index_u32(svbool_t, svuint64_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_u64)))
-svuint64_t svld1q_gather_index_u64(svbool_t, svuint64_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_u16)))
-svuint16_t svld1q_gather_index_u16(svbool_t, svuint64_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_bf16)))
-svbfloat16_t svld1q_gather_index_bf16(svbool_t, svuint64_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_f64)))
-svfloat64_t svld1q_gather_index_f64(svbool_t, svuint64_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_f32)))
-svfloat32_t svld1q_gather_index_f32(svbool_t, svuint64_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_f16)))
-svfloat16_t svld1q_gather_index_f16(svbool_t, svuint64_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_s32)))
-svint32_t svld1q_gather_index_s32(svbool_t, svuint64_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_s64)))
-svint64_t svld1q_gather_index_s64(svbool_t, svuint64_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_s16)))
-svint16_t svld1q_gather_index_s16(svbool_t, svuint64_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u8)))
-svuint8_t svld1q_gather_offset_u8(svbool_t, svuint64_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u32)))
-svuint32_t svld1q_gather_offset_u32(svbool_t, svuint64_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u64)))
-svuint64_t svld1q_gather_offset_u64(svbool_t, svuint64_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u16)))
-svuint16_t svld1q_gather_offset_u16(svbool_t, svuint64_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_bf16)))
-svbfloat16_t svld1q_gather_offset_bf16(svbool_t, svuint64_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s8)))
-svint8_t svld1q_gather_offset_s8(svbool_t, svuint64_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_f64)))
-svfloat64_t svld1q_gather_offset_f64(svbool_t, svuint64_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_f32)))
-svfloat32_t svld1q_gather_offset_f32(svbool_t, svuint64_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_f16)))
-svfloat16_t svld1q_gather_offset_f16(svbool_t, svuint64_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s32)))
-svint32_t svld1q_gather_offset_s32(svbool_t, svuint64_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s64)))
-svint64_t svld1q_gather_offset_s64(svbool_t, svuint64_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_mf8)))
-svmfloat8_t svld1q_gather_offset_mf8(svbool_t, svuint64_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s16)))
-svint16_t svld1q_gather_offset_s16(svbool_t, svuint64_t, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u8)))
-svuint8_t svld1q_gather_u8(svbool_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u32)))
-svuint32_t svld1q_gather_u32(svbool_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u64)))
-svuint64_t svld1q_gather_u64(svbool_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u16)))
-svuint16_t svld1q_gather_u16(svbool_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_bf16)))
-svbfloat16_t svld1q_gather_bf16(svbool_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s8)))
-svint8_t svld1q_gather_s8(svbool_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_f64)))
-svfloat64_t svld1q_gather_f64(svbool_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_f32)))
-svfloat32_t svld1q_gather_f32(svbool_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_f16)))
-svfloat16_t svld1q_gather_f16(svbool_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s32)))
-svint32_t svld1q_gather_s32(svbool_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s64)))
-svint64_t svld1q_gather_s64(svbool_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_mf8)))
-svmfloat8_t svld1q_gather_mf8(svbool_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s16)))
-svint16_t svld1q_gather_s16(svbool_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_u32)))
-svuint32_t svld1q_gather_index(svbool_t, uint32_t const *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_u64)))
-svuint64_t svld1q_gather_index(svbool_t, uint64_t const *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_u16)))
-svuint16_t svld1q_gather_index(svbool_t, uint16_t const *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_bf16)))
-svbfloat16_t svld1q_gather_index(svbool_t, bfloat16_t const *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_f64)))
-svfloat64_t svld1q_gather_index(svbool_t, float64_t const *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_f32)))
-svfloat32_t svld1q_gather_index(svbool_t, float32_t const *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_f16)))
-svfloat16_t svld1q_gather_index(svbool_t, float16_t const *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_s32)))
-svint32_t svld1q_gather_index(svbool_t, int32_t const *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_s64)))
-svint64_t svld1q_gather_index(svbool_t, int64_t const *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_s16)))
-svint16_t svld1q_gather_index(svbool_t, int16_t const *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u8)))
-svuint8_t svld1q_gather_offset(svbool_t, uint8_t const *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u32)))
-svuint32_t svld1q_gather_offset(svbool_t, uint32_t const *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u64)))
-svuint64_t svld1q_gather_offset(svbool_t, uint64_t const *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u16)))
-svuint16_t svld1q_gather_offset(svbool_t, uint16_t const *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_bf16)))
-svbfloat16_t svld1q_gather_offset(svbool_t, bfloat16_t const *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s8)))
-svint8_t svld1q_gather_offset(svbool_t, int8_t const *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_f64)))
-svfloat64_t svld1q_gather_offset(svbool_t, float64_t const *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_f32)))
-svfloat32_t svld1q_gather_offset(svbool_t, float32_t const *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_f16)))
-svfloat16_t svld1q_gather_offset(svbool_t, float16_t const *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s32)))
-svint32_t svld1q_gather_offset(svbool_t, int32_t const *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s64)))
-svint64_t svld1q_gather_offset(svbool_t, int64_t const *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_mf8)))
-svmfloat8_t svld1q_gather_offset(svbool_t, mfloat8_t const *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s16)))
-svint16_t svld1q_gather_offset(svbool_t, int16_t const *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_u64)))
-svuint64_t svld1udq(svbool_t, uint64_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_f64)))
-svfloat64_t svld1udq(svbool_t, float64_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_s64)))
-svint64_t svld1udq(svbool_t, int64_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_vnum_u64)))
-svuint64_t svld1udq_vnum(svbool_t, uint64_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_vnum_f64)))
-svfloat64_t svld1udq_vnum(svbool_t, float64_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_vnum_s64)))
-svint64_t svld1udq_vnum(svbool_t, int64_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_u32)))
-svuint32_t svld1uwq(svbool_t, uint32_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_f32)))
-svfloat32_t svld1uwq(svbool_t, float32_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_s32)))
-svint32_t svld1uwq(svbool_t, int32_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_vnum_u32)))
-svuint32_t svld1uwq_vnum(svbool_t, uint32_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_vnum_f32)))
-svfloat32_t svld1uwq_vnum(svbool_t, float32_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_vnum_s32)))
-svint32_t svld1uwq_vnum(svbool_t, int32_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_u64)))
-void svst1dq(svbool_t, uint64_t *, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_f64)))
-void svst1dq(svbool_t, float64_t *, svfloat64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_s64)))
-void svst1dq(svbool_t, int64_t *, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_vnum_u64)))
-void svst1dq_vnum(svbool_t, uint64_t *, int64_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_vnum_f64)))
-void svst1dq_vnum(svbool_t, float64_t *, int64_t, svfloat64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_vnum_s64)))
-void svst1dq_vnum(svbool_t, int64_t *, int64_t, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u8)))
-void svst1q_scatter(svbool_t, svuint64_t, svuint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u32)))
-void svst1q_scatter(svbool_t, svuint64_t, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u64)))
-void svst1q_scatter(svbool_t, svuint64_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u16)))
-void svst1q_scatter(svbool_t, svuint64_t, svuint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_bf16)))
-void svst1q_scatter(svbool_t, svuint64_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s8)))
-void svst1q_scatter(svbool_t, svuint64_t, svint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_f64)))
-void svst1q_scatter(svbool_t, svuint64_t, svfloat64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_f32)))
-void svst1q_scatter(svbool_t, svuint64_t, svfloat32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_f16)))
-void svst1q_scatter(svbool_t, svuint64_t, svfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s32)))
-void svst1q_scatter(svbool_t, svuint64_t, svint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s64)))
-void svst1q_scatter(svbool_t, svuint64_t, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_mf8)))
-void svst1q_scatter(svbool_t, svuint64_t, svmfloat8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s16)))
-void svst1q_scatter(svbool_t, svuint64_t, svint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_u32)))
-void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_u64)))
-void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_u16)))
-void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svuint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_bf16)))
-void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_f64)))
-void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svfloat64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_f32)))
-void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svfloat32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_f16)))
-void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_s32)))
-void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_s64)))
-void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_s16)))
-void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u8)))
-void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svuint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u32)))
-void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u64)))
-void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u16)))
-void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svuint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_bf16)))
-void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s8)))
-void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_f64)))
-void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svfloat64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_f32)))
-void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svfloat32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_f16)))
-void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s32)))
-void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s64)))
-void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_mf8)))
-void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svmfloat8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s16)))
-void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_u32)))
-void svst1q_scatter_index(svbool_t, uint32_t *, svint64_t, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_u64)))
-void svst1q_scatter_index(svbool_t, uint64_t *, svint64_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_u16)))
-void svst1q_scatter_index(svbool_t, uint16_t *, svint64_t, svuint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_bf16)))
-void svst1q_scatter_index(svbool_t, bfloat16_t *, svint64_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_f64)))
-void svst1q_scatter_index(svbool_t, float64_t *, svint64_t, svfloat64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_f32)))
-void svst1q_scatter_index(svbool_t, float32_t *, svint64_t, svfloat32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_f16)))
-void svst1q_scatter_index(svbool_t, float16_t *, svint64_t, svfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_s32)))
-void svst1q_scatter_index(svbool_t, int32_t *, svint64_t, svint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_s64)))
-void svst1q_scatter_index(svbool_t, int64_t *, svint64_t, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_s16)))
-void svst1q_scatter_index(svbool_t, int16_t *, svint64_t, svint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_u32)))
-void svst1q_scatter_index(svbool_t, uint32_t *, svuint64_t, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_u64)))
-void svst1q_scatter_index(svbool_t, uint64_t *, svuint64_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_u16)))
-void svst1q_scatter_index(svbool_t, uint16_t *, svuint64_t, svuint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_bf16)))
-void svst1q_scatter_index(svbool_t, bfloat16_t *, svuint64_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_f64)))
-void svst1q_scatter_index(svbool_t, float64_t *, svuint64_t, svfloat64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_f32)))
-void svst1q_scatter_index(svbool_t, float32_t *, svuint64_t, svfloat32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_f16)))
-void svst1q_scatter_index(svbool_t, float16_t *, svuint64_t, svfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_s32)))
-void svst1q_scatter_index(svbool_t, int32_t *, svuint64_t, svint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_s64)))
-void svst1q_scatter_index(svbool_t, int64_t *, svuint64_t, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_s16)))
-void svst1q_scatter_index(svbool_t, int16_t *, svuint64_t, svint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_u8)))
-void svst1q_scatter_offset(svbool_t, uint8_t *, svint64_t, svuint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_u32)))
-void svst1q_scatter_offset(svbool_t, uint32_t *, svint64_t, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_u64)))
-void svst1q_scatter_offset(svbool_t, uint64_t *, svint64_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_u16)))
-void svst1q_scatter_offset(svbool_t, uint16_t *, svint64_t, svuint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_bf16)))
-void svst1q_scatter_offset(svbool_t, bfloat16_t *, svint64_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_s8)))
-void svst1q_scatter_offset(svbool_t, int8_t *, svint64_t, svint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_f64)))
-void svst1q_scatter_offset(svbool_t, float64_t *, svint64_t, svfloat64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_f32)))
-void svst1q_scatter_offset(svbool_t, float32_t *, svint64_t, svfloat32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_f16)))
-void svst1q_scatter_offset(svbool_t, float16_t *, svint64_t, svfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_s32)))
-void svst1q_scatter_offset(svbool_t, int32_t *, svint64_t, svint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_s64)))
-void svst1q_scatter_offset(svbool_t, int64_t *, svint64_t, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_mf8)))
-void svst1q_scatter_offset(svbool_t, mfloat8_t *, svint64_t, svmfloat8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_s16)))
-void svst1q_scatter_offset(svbool_t, int16_t *, svint64_t, svint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u8)))
-void svst1q_scatter_offset(svbool_t, uint8_t *, svuint64_t, svuint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u32)))
-void svst1q_scatter_offset(svbool_t, uint32_t *, svuint64_t, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u64)))
-void svst1q_scatter_offset(svbool_t, uint64_t *, svuint64_t, svuint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u16)))
-void svst1q_scatter_offset(svbool_t, uint16_t *, svuint64_t, svuint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_bf16)))
-void svst1q_scatter_offset(svbool_t, bfloat16_t *, svuint64_t, svbfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s8)))
-void svst1q_scatter_offset(svbool_t, int8_t *, svuint64_t, svint8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_f64)))
-void svst1q_scatter_offset(svbool_t, float64_t *, svuint64_t, svfloat64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_f32)))
-void svst1q_scatter_offset(svbool_t, float32_t *, svuint64_t, svfloat32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_f16)))
-void svst1q_scatter_offset(svbool_t, float16_t *, svuint64_t, svfloat16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s32)))
-void svst1q_scatter_offset(svbool_t, int32_t *, svuint64_t, svint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s64)))
-void svst1q_scatter_offset(svbool_t, int64_t *, svuint64_t, svint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_mf8)))
-void svst1q_scatter_offset(svbool_t, mfloat8_t *, svuint64_t, svmfloat8_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s16)))
-void svst1q_scatter_offset(svbool_t, int16_t *, svuint64_t, svint16_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_u32)))
-void svst1wq(svbool_t, uint32_t *, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_f32)))
-void svst1wq(svbool_t, float32_t *, svfloat32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_s32)))
-void svst1wq(svbool_t, int32_t *, svint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_u32)))
-void svst1wq_vnum(svbool_t, uint32_t *, int64_t, svuint32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_f32)))
-void svst1wq_vnum(svbool_t, float32_t *, int64_t, svfloat32_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_s32)))
-void svst1wq_vnum(svbool_t, int32_t *, int64_t, svint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntp_c8)))
-uint64_t svcntp_c8(svcount_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntp_c32)))
-uint64_t svcntp_c32(svcount_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntp_c64)))
-uint64_t svcntp_c64(svcount_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntp_c16)))
-uint64_t svcntp_c16(svcount_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u8_x2)))
-svuint8x2_t svld1_u8_x2(svcount_t, uint8_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s8_x2)))
-svint8x2_t svld1_s8_x2(svcount_t, int8_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_mf8_x2)))
-svmfloat8x2_t svld1_mf8_x2(svcount_t, mfloat8_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u64_x2)))
-svuint64x2_t svld1_u64_x2(svcount_t, uint64_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f64_x2)))
-svfloat64x2_t svld1_f64_x2(svcount_t, float64_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s64_x2)))
-svint64x2_t svld1_s64_x2(svcount_t, int64_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u16_x2)))
-svuint16x2_t svld1_u16_x2(svcount_t, uint16_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_bf16_x2)))
-svbfloat16x2_t svld1_bf16_x2(svcount_t, bfloat16_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f16_x2)))
-svfloat16x2_t svld1_f16_x2(svcount_t, float16_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s16_x2)))
-svint16x2_t svld1_s16_x2(svcount_t, int16_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u32_x2)))
-svuint32x2_t svld1_u32_x2(svcount_t, uint32_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f32_x2)))
-svfloat32x2_t svld1_f32_x2(svcount_t, float32_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s32_x2)))
-svint32x2_t svld1_s32_x2(svcount_t, int32_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u8_x4)))
-svuint8x4_t svld1_u8_x4(svcount_t, uint8_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s8_x4)))
-svint8x4_t svld1_s8_x4(svcount_t, int8_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_mf8_x4)))
-svmfloat8x4_t svld1_mf8_x4(svcount_t, mfloat8_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u64_x4)))
-svuint64x4_t svld1_u64_x4(svcount_t, uint64_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f64_x4)))
-svfloat64x4_t svld1_f64_x4(svcount_t, float64_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s64_x4)))
-svint64x4_t svld1_s64_x4(svcount_t, int64_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u16_x4)))
-svuint16x4_t svld1_u16_x4(svcount_t, uint16_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_bf16_x4)))
-svbfloat16x4_t svld1_bf16_x4(svcount_t, bfloat16_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f16_x4)))
-svfloat16x4_t svld1_f16_x4(svcount_t, float16_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s16_x4)))
-svint16x4_t svld1_s16_x4(svcount_t, int16_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u32_x4)))
-svuint32x4_t svld1_u32_x4(svcount_t, uint32_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f32_x4)))
-svfloat32x4_t svld1_f32_x4(svcount_t, float32_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s32_x4)))
-svint32x4_t svld1_s32_x4(svcount_t, int32_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u8_x2)))
-svuint8x2_t svld1_vnum_u8_x2(svcount_t, uint8_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s8_x2)))
-svint8x2_t svld1_vnum_s8_x2(svcount_t, int8_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_mf8_x2)))
-svmfloat8x2_t svld1_vnum_mf8_x2(svcount_t, mfloat8_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u64_x2)))
-svuint64x2_t svld1_vnum_u64_x2(svcount_t, uint64_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f64_x2)))
-svfloat64x2_t svld1_vnum_f64_x2(svcount_t, float64_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s64_x2)))
-svint64x2_t svld1_vnum_s64_x2(svcount_t, int64_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u16_x2)))
-svuint16x2_t svld1_vnum_u16_x2(svcount_t, uint16_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_bf16_x2)))
-svbfloat16x2_t svld1_vnum_bf16_x2(svcount_t, bfloat16_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f16_x2)))
-svfloat16x2_t svld1_vnum_f16_x2(svcount_t, float16_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s16_x2)))
-svint16x2_t svld1_vnum_s16_x2(svcount_t, int16_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u32_x2)))
-svuint32x2_t svld1_vnum_u32_x2(svcount_t, uint32_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f32_x2)))
-svfloat32x2_t svld1_vnum_f32_x2(svcount_t, float32_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s32_x2)))
-svint32x2_t svld1_vnum_s32_x2(svcount_t, int32_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u8_x4)))
-svuint8x4_t svld1_vnum_u8_x4(svcount_t, uint8_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s8_x4)))
-svint8x4_t svld1_vnum_s8_x4(svcount_t, int8_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_mf8_x4)))
-svmfloat8x4_t svld1_vnum_mf8_x4(svcount_t, mfloat8_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u64_x4)))
-svuint64x4_t svld1_vnum_u64_x4(svcount_t, uint64_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f64_x4)))
-svfloat64x4_t svld1_vnum_f64_x4(svcount_t, float64_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s64_x4)))
-svint64x4_t svld1_vnum_s64_x4(svcount_t, int64_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u16_x4)))
-svuint16x4_t svld1_vnum_u16_x4(svcount_t, uint16_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_bf16_x4)))
-svbfloat16x4_t svld1_vnum_bf16_x4(svcount_t, bfloat16_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f16_x4)))
-svfloat16x4_t svld1_vnum_f16_x4(svcount_t, float16_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s16_x4)))
-svint16x4_t svld1_vnum_s16_x4(svcount_t, int16_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u32_x4)))
-svuint32x4_t svld1_vnum_u32_x4(svcount_t, uint32_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f32_x4)))
-svfloat32x4_t svld1_vnum_f32_x4(svcount_t, float32_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s32_x4)))
-svint32x4_t svld1_vnum_s32_x4(svcount_t, int32_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u8_x2)))
-svuint8x2_t svldnt1_u8_x2(svcount_t, uint8_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s8_x2)))
-svint8x2_t svldnt1_s8_x2(svcount_t, int8_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_mf8_x2)))
-svmfloat8x2_t svldnt1_mf8_x2(svcount_t, mfloat8_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u64_x2)))
-svuint64x2_t svldnt1_u64_x2(svcount_t, uint64_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f64_x2)))
-svfloat64x2_t svldnt1_f64_x2(svcount_t, float64_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s64_x2)))
-svint64x2_t svldnt1_s64_x2(svcount_t, int64_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u16_x2)))
-svuint16x2_t svldnt1_u16_x2(svcount_t, uint16_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_bf16_x2)))
-svbfloat16x2_t svldnt1_bf16_x2(svcount_t, bfloat16_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f16_x2)))
-svfloat16x2_t svldnt1_f16_x2(svcount_t, float16_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s16_x2)))
-svint16x2_t svldnt1_s16_x2(svcount_t, int16_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u32_x2)))
-svuint32x2_t svldnt1_u32_x2(svcount_t, uint32_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f32_x2)))
-svfloat32x2_t svldnt1_f32_x2(svcount_t, float32_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s32_x2)))
-svint32x2_t svldnt1_s32_x2(svcount_t, int32_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u8_x4)))
-svuint8x4_t svldnt1_u8_x4(svcount_t, uint8_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s8_x4)))
-svint8x4_t svldnt1_s8_x4(svcount_t, int8_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_mf8_x4)))
-svmfloat8x4_t svldnt1_mf8_x4(svcount_t, mfloat8_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u64_x4)))
-svuint64x4_t svldnt1_u64_x4(svcount_t, uint64_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f64_x4)))
-svfloat64x4_t svldnt1_f64_x4(svcount_t, float64_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s64_x4)))
-svint64x4_t svldnt1_s64_x4(svcount_t, int64_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u16_x4)))
-svuint16x4_t svldnt1_u16_x4(svcount_t, uint16_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_bf16_x4)))
-svbfloat16x4_t svldnt1_bf16_x4(svcount_t, bfloat16_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f16_x4)))
-svfloat16x4_t svldnt1_f16_x4(svcount_t, float16_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s16_x4)))
-svint16x4_t svldnt1_s16_x4(svcount_t, int16_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u32_x4)))
-svuint32x4_t svldnt1_u32_x4(svcount_t, uint32_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f32_x4)))
-svfloat32x4_t svldnt1_f32_x4(svcount_t, float32_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s32_x4)))
-svint32x4_t svldnt1_s32_x4(svcount_t, int32_t const *);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u8_x2)))
-svuint8x2_t svldnt1_vnum_u8_x2(svcount_t, uint8_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s8_x2)))
-svint8x2_t svldnt1_vnum_s8_x2(svcount_t, int8_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_mf8_x2)))
-svmfloat8x2_t svldnt1_vnum_mf8_x2(svcount_t, mfloat8_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u64_x2)))
-svuint64x2_t svldnt1_vnum_u64_x2(svcount_t, uint64_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f64_x2)))
-svfloat64x2_t svldnt1_vnum_f64_x2(svcount_t, float64_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s64_x2)))
-svint64x2_t svldnt1_vnum_s64_x2(svcount_t, int64_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u16_x2)))
-svuint16x2_t svldnt1_vnum_u16_x2(svcount_t, uint16_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_bf16_x2)))
-svbfloat16x2_t svldnt1_vnum_bf16_x2(svcount_t, bfloat16_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f16_x2)))
-svfloat16x2_t svldnt1_vnum_f16_x2(svcount_t, float16_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s16_x2)))
-svint16x2_t svldnt1_vnum_s16_x2(svcount_t, int16_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u32_x2)))
-svuint32x2_t svldnt1_vnum_u32_x2(svcount_t, uint32_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f32_x2)))
-svfloat32x2_t svldnt1_vnum_f32_x2(svcount_t, float32_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s32_x2)))
-svint32x2_t svldnt1_vnum_s32_x2(svcount_t, int32_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u8_x4)))
-svuint8x4_t svldnt1_vnum_u8_x4(svcount_t, uint8_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s8_x4)))
-svint8x4_t svldnt1_vnum_s8_x4(svcount_t, int8_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_mf8_x4)))
-svmfloat8x4_t svldnt1_vnum_mf8_x4(svcount_t, mfloat8_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u64_x4)))
-svuint64x4_t svldnt1_vnum_u64_x4(svcount_t, uint64_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f64_x4)))
-svfloat64x4_t svldnt1_vnum_f64_x4(svcount_t, float64_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s64_x4)))
-svint64x4_t svldnt1_vnum_s64_x4(svcount_t, int64_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u16_x4)))
-svuint16x4_t svldnt1_vnum_u16_x4(svcount_t, uint16_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_bf16_x4)))
-svbfloat16x4_t svldnt1_vnum_bf16_x4(svcount_t, bfloat16_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f16_x4)))
-svfloat16x4_t svldnt1_vnum_f16_x4(svcount_t, float16_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s16_x4)))
-svint16x4_t svldnt1_vnum_s16_x4(svcount_t, int16_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u32_x4)))
-svuint32x4_t svldnt1_vnum_u32_x4(svcount_t, uint32_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f32_x4)))
-svfloat32x4_t svldnt1_vnum_f32_x4(svcount_t, float32_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s32_x4)))
-svint32x4_t svldnt1_vnum_s32_x4(svcount_t, int32_t const *, int64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c8)))
-svbool_t svpext_lane_c8(svcount_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c32)))
-svbool_t svpext_lane_c32(svcount_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c64)))
-svbool_t svpext_lane_c64(svcount_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c16)))
-svbool_t svpext_lane_c16(svcount_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c8_x2)))
-svboolx2_t svpext_lane_c8_x2(svcount_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c32_x2)))
-svboolx2_t svpext_lane_c32_x2(svcount_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c64_x2)))
-svboolx2_t svpext_lane_c64_x2(svcount_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c16_x2)))
-svboolx2_t svpext_lane_c16_x2(svcount_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpfalse_c)))
-svcount_t svpfalse_c(void);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpsel_lane_c16)))
-svcount_t svpsel_lane_c16(svcount_t, svbool_t, uint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpsel_lane_c32)))
-svcount_t svpsel_lane_c32(svcount_t, svbool_t, uint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpsel_lane_c64)))
-svcount_t svpsel_lane_c64(svcount_t, svbool_t, uint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpsel_lane_c8)))
-svcount_t svpsel_lane_c8(svcount_t, svbool_t, uint32_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_c8)))
-svcount_t svptrue_c8(void);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_c32)))
-svcount_t svptrue_c32(void);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_c64)))
-svcount_t svptrue_c64(void);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_c16)))
-svcount_t svptrue_c16(void);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svreinterpret_b)))
-svbool_t svreinterpret_b(svcount_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svreinterpret_c)))
-svcount_t svreinterpret_c(svbool_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u8_x2)))
-void svst1_u8_x2(svcount_t, uint8_t *, svuint8x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s8_x2)))
-void svst1_s8_x2(svcount_t, int8_t *, svint8x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_mf8_x2)))
-void svst1_mf8_x2(svcount_t, mfloat8_t *, svmfloat8x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u64_x2)))
-void svst1_u64_x2(svcount_t, uint64_t *, svuint64x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f64_x2)))
-void svst1_f64_x2(svcount_t, float64_t *, svfloat64x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s64_x2)))
-void svst1_s64_x2(svcount_t, int64_t *, svint64x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u16_x2)))
-void svst1_u16_x2(svcount_t, uint16_t *, svuint16x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_bf16_x2)))
-void svst1_bf16_x2(svcount_t, bfloat16_t *, svbfloat16x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f16_x2)))
-void svst1_f16_x2(svcount_t, float16_t *, svfloat16x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s16_x2)))
-void svst1_s16_x2(svcount_t, int16_t *, svint16x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u32_x2)))
-void svst1_u32_x2(svcount_t, uint32_t *, svuint32x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f32_x2)))
-void svst1_f32_x2(svcount_t, float32_t *, svfloat32x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s32_x2)))
-void svst1_s32_x2(svcount_t, int32_t *, svint32x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u8_x4)))
-void svst1_u8_x4(svcount_t, uint8_t *, svuint8x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s8_x4)))
-void svst1_s8_x4(svcount_t, int8_t *, svint8x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_mf8_x4)))
-void svst1_mf8_x4(svcount_t, mfloat8_t *, svmfloat8x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u64_x4)))
-void svst1_u64_x4(svcount_t, uint64_t *, svuint64x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f64_x4)))
-void svst1_f64_x4(svcount_t, float64_t *, svfloat64x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s64_x4)))
-void svst1_s64_x4(svcount_t, int64_t *, svint64x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u16_x4)))
-void svst1_u16_x4(svcount_t, uint16_t *, svuint16x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_bf16_x4)))
-void svst1_bf16_x4(svcount_t, bfloat16_t *, svbfloat16x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f16_x4)))
-void svst1_f16_x4(svcount_t, float16_t *, svfloat16x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s16_x4)))
-void svst1_s16_x4(svcount_t, int16_t *, svint16x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u32_x4)))
-void svst1_u32_x4(svcount_t, uint32_t *, svuint32x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f32_x4)))
-void svst1_f32_x4(svcount_t, float32_t *, svfloat32x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s32_x4)))
-void svst1_s32_x4(svcount_t, int32_t *, svint32x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u8_x2)))
-void svst1_vnum_u8_x2(svcount_t, uint8_t *, int64_t, svuint8x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s8_x2)))
-void svst1_vnum_s8_x2(svcount_t, int8_t *, int64_t, svint8x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_mf8_x2)))
-void svst1_vnum_mf8_x2(svcount_t, mfloat8_t *, int64_t, svmfloat8x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u64_x2)))
-void svst1_vnum_u64_x2(svcount_t, uint64_t *, int64_t, svuint64x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f64_x2)))
-void svst1_vnum_f64_x2(svcount_t, float64_t *, int64_t, svfloat64x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s64_x2)))
-void svst1_vnum_s64_x2(svcount_t, int64_t *, int64_t, svint64x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u16_x2)))
-void svst1_vnum_u16_x2(svcount_t, uint16_t *, int64_t, svuint16x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_bf16_x2)))
-void svst1_vnum_bf16_x2(svcount_t, bfloat16_t *, int64_t, svbfloat16x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f16_x2)))
-void svst1_vnum_f16_x2(svcount_t, float16_t *, int64_t, svfloat16x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s16_x2)))
-void svst1_vnum_s16_x2(svcount_t, int16_t *, int64_t, svint16x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u32_x2)))
-void svst1_vnum_u32_x2(svcount_t, uint32_t *, int64_t, svuint32x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f32_x2)))
-void svst1_vnum_f32_x2(svcount_t, float32_t *, int64_t, svfloat32x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s32_x2)))
-void svst1_vnum_s32_x2(svcount_t, int32_t *, int64_t, svint32x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u8_x4)))
-void svst1_vnum_u8_x4(svcount_t, uint8_t *, int64_t, svuint8x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s8_x4)))
-void svst1_vnum_s8_x4(svcount_t, int8_t *, int64_t, svint8x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_mf8_x4)))
-void svst1_vnum_mf8_x4(svcount_t, mfloat8_t *, int64_t, svmfloat8x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u64_x4)))
-void svst1_vnum_u64_x4(svcount_t, uint64_t *, int64_t, svuint64x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f64_x4)))
-void svst1_vnum_f64_x4(svcount_t, float64_t *, int64_t, svfloat64x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s64_x4)))
-void svst1_vnum_s64_x4(svcount_t, int64_t *, int64_t, svint64x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u16_x4)))
-void svst1_vnum_u16_x4(svcount_t, uint16_t *, int64_t, svuint16x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_bf16_x4)))
-void svst1_vnum_bf16_x4(svcount_t, bfloat16_t *, int64_t, svbfloat16x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f16_x4)))
-void svst1_vnum_f16_x4(svcount_t, float16_t *, int64_t, svfloat16x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s16_x4)))
-void svst1_vnum_s16_x4(svcount_t, int16_t *, int64_t, svint16x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u32_x4)))
-void svst1_vnum_u32_x4(svcount_t, uint32_t *, int64_t, svuint32x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f32_x4)))
-void svst1_vnum_f32_x4(svcount_t, float32_t *, int64_t, svfloat32x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s32_x4)))
-void svst1_vnum_s32_x4(svcount_t, int32_t *, int64_t, svint32x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u8_x2)))
-void svstnt1_u8_x2(svcount_t, uint8_t *, svuint8x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s8_x2)))
-void svstnt1_s8_x2(svcount_t, int8_t *, svint8x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_mf8_x2)))
-void svstnt1_mf8_x2(svcount_t, mfloat8_t *, svmfloat8x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u64_x2)))
-void svstnt1_u64_x2(svcount_t, uint64_t *, svuint64x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f64_x2)))
-void svstnt1_f64_x2(svcount_t, float64_t *, svfloat64x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s64_x2)))
-void svstnt1_s64_x2(svcount_t, int64_t *, svint64x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u16_x2)))
-void svstnt1_u16_x2(svcount_t, uint16_t *, svuint16x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_bf16_x2)))
-void svstnt1_bf16_x2(svcount_t, bfloat16_t *, svbfloat16x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f16_x2)))
-void svstnt1_f16_x2(svcount_t, float16_t *, svfloat16x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s16_x2)))
-void svstnt1_s16_x2(svcount_t, int16_t *, svint16x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u32_x2)))
-void svstnt1_u32_x2(svcount_t, uint32_t *, svuint32x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f32_x2)))
-void svstnt1_f32_x2(svcount_t, float32_t *, svfloat32x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s32_x2)))
-void svstnt1_s32_x2(svcount_t, int32_t *, svint32x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u8_x4)))
-void svstnt1_u8_x4(svcount_t, uint8_t *, svuint8x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s8_x4)))
-void svstnt1_s8_x4(svcount_t, int8_t *, svint8x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_mf8_x4)))
-void svstnt1_mf8_x4(svcount_t, mfloat8_t *, svmfloat8x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u64_x4)))
-void svstnt1_u64_x4(svcount_t, uint64_t *, svuint64x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f64_x4)))
-void svstnt1_f64_x4(svcount_t, float64_t *, svfloat64x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s64_x4)))
-void svstnt1_s64_x4(svcount_t, int64_t *, svint64x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u16_x4)))
-void svstnt1_u16_x4(svcount_t, uint16_t *, svuint16x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_bf16_x4)))
-void svstnt1_bf16_x4(svcount_t, bfloat16_t *, svbfloat16x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f16_x4)))
-void svstnt1_f16_x4(svcount_t, float16_t *, svfloat16x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s16_x4)))
-void svstnt1_s16_x4(svcount_t, int16_t *, svint16x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u32_x4)))
-void svstnt1_u32_x4(svcount_t, uint32_t *, svuint32x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f32_x4)))
-void svstnt1_f32_x4(svcount_t, float32_t *, svfloat32x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s32_x4)))
-void svstnt1_s32_x4(svcount_t, int32_t *, svint32x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u8_x2)))
-void svstnt1_vnum_u8_x2(svcount_t, uint8_t *, int64_t, svuint8x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s8_x2)))
-void svstnt1_vnum_s8_x2(svcount_t, int8_t *, int64_t, svint8x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_mf8_x2)))
-void svstnt1_vnum_mf8_x2(svcount_t, mfloat8_t *, int64_t, svmfloat8x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u64_x2)))
-void svstnt1_vnum_u64_x2(svcount_t, uint64_t *, int64_t, svuint64x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f64_x2)))
-void svstnt1_vnum_f64_x2(svcount_t, float64_t *, int64_t, svfloat64x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s64_x2)))
-void svstnt1_vnum_s64_x2(svcount_t, int64_t *, int64_t, svint64x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u16_x2)))
-void svstnt1_vnum_u16_x2(svcount_t, uint16_t *, int64_t, svuint16x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_bf16_x2)))
-void svstnt1_vnum_bf16_x2(svcount_t, bfloat16_t *, int64_t, svbfloat16x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f16_x2)))
-void svstnt1_vnum_f16_x2(svcount_t, float16_t *, int64_t, svfloat16x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s16_x2)))
-void svstnt1_vnum_s16_x2(svcount_t, int16_t *, int64_t, svint16x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u32_x2)))
-void svstnt1_vnum_u32_x2(svcount_t, uint32_t *, int64_t, svuint32x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f32_x2)))
-void svstnt1_vnum_f32_x2(svcount_t, float32_t *, int64_t, svfloat32x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s32_x2)))
-void svstnt1_vnum_s32_x2(svcount_t, int32_t *, int64_t, svint32x2_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u8_x4)))
-void svstnt1_vnum_u8_x4(svcount_t, uint8_t *, int64_t, svuint8x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s8_x4)))
-void svstnt1_vnum_s8_x4(svcount_t, int8_t *, int64_t, svint8x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_mf8_x4)))
-void svstnt1_vnum_mf8_x4(svcount_t, mfloat8_t *, int64_t, svmfloat8x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u64_x4)))
-void svstnt1_vnum_u64_x4(svcount_t, uint64_t *, int64_t, svuint64x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f64_x4)))
-void svstnt1_vnum_f64_x4(svcount_t, float64_t *, int64_t, svfloat64x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s64_x4)))
-void svstnt1_vnum_s64_x4(svcount_t, int64_t *, int64_t, svint64x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u16_x4)))
-void svstnt1_vnum_u16_x4(svcount_t, uint16_t *, int64_t, svuint16x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_bf16_x4)))
-void svstnt1_vnum_bf16_x4(svcount_t, bfloat16_t *, int64_t, svbfloat16x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f16_x4)))
-void svstnt1_vnum_f16_x4(svcount_t, float16_t *, int64_t, svfloat16x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s16_x4)))
-void svstnt1_vnum_s16_x4(svcount_t, int16_t *, int64_t, svint16x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u32_x4)))
-void svstnt1_vnum_u32_x4(svcount_t, uint32_t *, int64_t, svuint32x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f32_x4)))
-void svstnt1_vnum_f32_x4(svcount_t, float32_t *, int64_t, svfloat32x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s32_x4)))
-void svstnt1_vnum_s32_x4(svcount_t, int32_t *, int64_t, svint32x4_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c8_s64)))
-svcount_t svwhilege_c8_s64(int64_t, int64_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c32_s64)))
-svcount_t svwhilege_c32_s64(int64_t, int64_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c64_s64)))
-svcount_t svwhilege_c64_s64(int64_t, int64_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c16_s64)))
-svcount_t svwhilege_c16_s64(int64_t, int64_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c8_u64)))
-svcount_t svwhilege_c8_u64(uint64_t, uint64_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c32_u64)))
-svcount_t svwhilege_c32_u64(uint64_t, uint64_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c64_u64)))
-svcount_t svwhilege_c64_u64(uint64_t, uint64_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c16_u64)))
-svcount_t svwhilege_c16_u64(uint64_t, uint64_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c8_s64)))
-svcount_t svwhilegt_c8_s64(int64_t, int64_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c32_s64)))
-svcount_t svwhilegt_c32_s64(int64_t, int64_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c64_s64)))
-svcount_t svwhilegt_c64_s64(int64_t, int64_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c16_s64)))
-svcount_t svwhilegt_c16_s64(int64_t, int64_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c8_u64)))
-svcount_t svwhilegt_c8_u64(uint64_t, uint64_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c32_u64)))
-svcount_t svwhilegt_c32_u64(uint64_t, uint64_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c64_u64)))
-svcount_t svwhilegt_c64_u64(uint64_t, uint64_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c16_u64)))
-svcount_t svwhilegt_c16_u64(uint64_t, uint64_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c8_s64)))
-svcount_t svwhilele_c8_s64(int64_t, int64_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c32_s64)))
-svcount_t svwhilele_c32_s64(int64_t, int64_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c64_s64)))
-svcount_t svwhilele_c64_s64(int64_t, int64_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c16_s64)))
-svcount_t svwhilele_c16_s64(int64_t, int64_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c8_u64)))
-svcount_t svwhilele_c8_u64(uint64_t, uint64_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c32_u64)))
-svcount_t svwhilele_c32_u64(uint64_t, uint64_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c64_u64)))
-svcount_t svwhilele_c64_u64(uint64_t, uint64_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c16_u64)))
-svcount_t svwhilele_c16_u64(uint64_t, uint64_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c8_u64)))
-svcount_t svwhilelt_c8_u64(uint64_t, uint64_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c32_u64)))
-svcount_t svwhilelt_c32_u64(uint64_t, uint64_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c64_u64)))
-svcount_t svwhilelt_c64_u64(uint64_t, uint64_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c16_u64)))
-svcount_t svwhilelt_c16_u64(uint64_t, uint64_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c8_s64)))
-svcount_t svwhilelt_c8_s64(int64_t, int64_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c32_s64)))
-svcount_t svwhilelt_c32_s64(int64_t, int64_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c64_s64)))
-svcount_t svwhilelt_c64_s64(int64_t, int64_t, uint64_t);
-__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c16_s64)))
-svcount_t svwhilelt_c16_s64(int64_t, int64_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u8_x2)))
-svuint8x2_t svld1_x2(svcount_t, uint8_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s8_x2)))
-svint8x2_t svld1_x2(svcount_t, int8_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_mf8_x2)))
-svmfloat8x2_t svld1_x2(svcount_t, mfloat8_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u64_x2)))
-svuint64x2_t svld1_x2(svcount_t, uint64_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f64_x2)))
-svfloat64x2_t svld1_x2(svcount_t, float64_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s64_x2)))
-svint64x2_t svld1_x2(svcount_t, int64_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u16_x2)))
-svuint16x2_t svld1_x2(svcount_t, uint16_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_bf16_x2)))
-svbfloat16x2_t svld1_x2(svcount_t, bfloat16_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f16_x2)))
-svfloat16x2_t svld1_x2(svcount_t, float16_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s16_x2)))
-svint16x2_t svld1_x2(svcount_t, int16_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u32_x2)))
-svuint32x2_t svld1_x2(svcount_t, uint32_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f32_x2)))
-svfloat32x2_t svld1_x2(svcount_t, float32_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s32_x2)))
-svint32x2_t svld1_x2(svcount_t, int32_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u8_x4)))
-svuint8x4_t svld1_x4(svcount_t, uint8_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s8_x4)))
-svint8x4_t svld1_x4(svcount_t, int8_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_mf8_x4)))
-svmfloat8x4_t svld1_x4(svcount_t, mfloat8_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u64_x4)))
-svuint64x4_t svld1_x4(svcount_t, uint64_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f64_x4)))
-svfloat64x4_t svld1_x4(svcount_t, float64_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s64_x4)))
-svint64x4_t svld1_x4(svcount_t, int64_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u16_x4)))
-svuint16x4_t svld1_x4(svcount_t, uint16_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_bf16_x4)))
-svbfloat16x4_t svld1_x4(svcount_t, bfloat16_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f16_x4)))
-svfloat16x4_t svld1_x4(svcount_t, float16_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s16_x4)))
-svint16x4_t svld1_x4(svcount_t, int16_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u32_x4)))
-svuint32x4_t svld1_x4(svcount_t, uint32_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f32_x4)))
-svfloat32x4_t svld1_x4(svcount_t, float32_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s32_x4)))
-svint32x4_t svld1_x4(svcount_t, int32_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u8_x2)))
-svuint8x2_t svld1_vnum_x2(svcount_t, uint8_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s8_x2)))
-svint8x2_t svld1_vnum_x2(svcount_t, int8_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_mf8_x2)))
-svmfloat8x2_t svld1_vnum_x2(svcount_t, mfloat8_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u64_x2)))
-svuint64x2_t svld1_vnum_x2(svcount_t, uint64_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f64_x2)))
-svfloat64x2_t svld1_vnum_x2(svcount_t, float64_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s64_x2)))
-svint64x2_t svld1_vnum_x2(svcount_t, int64_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u16_x2)))
-svuint16x2_t svld1_vnum_x2(svcount_t, uint16_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_bf16_x2)))
-svbfloat16x2_t svld1_vnum_x2(svcount_t, bfloat16_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f16_x2)))
-svfloat16x2_t svld1_vnum_x2(svcount_t, float16_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s16_x2)))
-svint16x2_t svld1_vnum_x2(svcount_t, int16_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u32_x2)))
-svuint32x2_t svld1_vnum_x2(svcount_t, uint32_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f32_x2)))
-svfloat32x2_t svld1_vnum_x2(svcount_t, float32_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s32_x2)))
-svint32x2_t svld1_vnum_x2(svcount_t, int32_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u8_x4)))
-svuint8x4_t svld1_vnum_x4(svcount_t, uint8_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s8_x4)))
-svint8x4_t svld1_vnum_x4(svcount_t, int8_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_mf8_x4)))
-svmfloat8x4_t svld1_vnum_x4(svcount_t, mfloat8_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u64_x4)))
-svuint64x4_t svld1_vnum_x4(svcount_t, uint64_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f64_x4)))
-svfloat64x4_t svld1_vnum_x4(svcount_t, float64_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s64_x4)))
-svint64x4_t svld1_vnum_x4(svcount_t, int64_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u16_x4)))
-svuint16x4_t svld1_vnum_x4(svcount_t, uint16_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_bf16_x4)))
-svbfloat16x4_t svld1_vnum_x4(svcount_t, bfloat16_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f16_x4)))
-svfloat16x4_t svld1_vnum_x4(svcount_t, float16_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s16_x4)))
-svint16x4_t svld1_vnum_x4(svcount_t, int16_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u32_x4)))
-svuint32x4_t svld1_vnum_x4(svcount_t, uint32_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f32_x4)))
-svfloat32x4_t svld1_vnum_x4(svcount_t, float32_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s32_x4)))
-svint32x4_t svld1_vnum_x4(svcount_t, int32_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u8_x2)))
-svuint8x2_t svldnt1_x2(svcount_t, uint8_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s8_x2)))
-svint8x2_t svldnt1_x2(svcount_t, int8_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_mf8_x2)))
-svmfloat8x2_t svldnt1_x2(svcount_t, mfloat8_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u64_x2)))
-svuint64x2_t svldnt1_x2(svcount_t, uint64_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f64_x2)))
-svfloat64x2_t svldnt1_x2(svcount_t, float64_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s64_x2)))
-svint64x2_t svldnt1_x2(svcount_t, int64_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u16_x2)))
-svuint16x2_t svldnt1_x2(svcount_t, uint16_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_bf16_x2)))
-svbfloat16x2_t svldnt1_x2(svcount_t, bfloat16_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f16_x2)))
-svfloat16x2_t svldnt1_x2(svcount_t, float16_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s16_x2)))
-svint16x2_t svldnt1_x2(svcount_t, int16_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u32_x2)))
-svuint32x2_t svldnt1_x2(svcount_t, uint32_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f32_x2)))
-svfloat32x2_t svldnt1_x2(svcount_t, float32_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s32_x2)))
-svint32x2_t svldnt1_x2(svcount_t, int32_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u8_x4)))
-svuint8x4_t svldnt1_x4(svcount_t, uint8_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s8_x4)))
-svint8x4_t svldnt1_x4(svcount_t, int8_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_mf8_x4)))
-svmfloat8x4_t svldnt1_x4(svcount_t, mfloat8_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u64_x4)))
-svuint64x4_t svldnt1_x4(svcount_t, uint64_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f64_x4)))
-svfloat64x4_t svldnt1_x4(svcount_t, float64_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s64_x4)))
-svint64x4_t svldnt1_x4(svcount_t, int64_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u16_x4)))
-svuint16x4_t svldnt1_x4(svcount_t, uint16_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_bf16_x4)))
-svbfloat16x4_t svldnt1_x4(svcount_t, bfloat16_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f16_x4)))
-svfloat16x4_t svldnt1_x4(svcount_t, float16_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s16_x4)))
-svint16x4_t svldnt1_x4(svcount_t, int16_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u32_x4)))
-svuint32x4_t svldnt1_x4(svcount_t, uint32_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f32_x4)))
-svfloat32x4_t svldnt1_x4(svcount_t, float32_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s32_x4)))
-svint32x4_t svldnt1_x4(svcount_t, int32_t const *);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u8_x2)))
-svuint8x2_t svldnt1_vnum_x2(svcount_t, uint8_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s8_x2)))
-svint8x2_t svldnt1_vnum_x2(svcount_t, int8_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_mf8_x2)))
-svmfloat8x2_t svldnt1_vnum_x2(svcount_t, mfloat8_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u64_x2)))
-svuint64x2_t svldnt1_vnum_x2(svcount_t, uint64_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f64_x2)))
-svfloat64x2_t svldnt1_vnum_x2(svcount_t, float64_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s64_x2)))
-svint64x2_t svldnt1_vnum_x2(svcount_t, int64_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u16_x2)))
-svuint16x2_t svldnt1_vnum_x2(svcount_t, uint16_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_bf16_x2)))
-svbfloat16x2_t svldnt1_vnum_x2(svcount_t, bfloat16_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f16_x2)))
-svfloat16x2_t svldnt1_vnum_x2(svcount_t, float16_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s16_x2)))
-svint16x2_t svldnt1_vnum_x2(svcount_t, int16_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u32_x2)))
-svuint32x2_t svldnt1_vnum_x2(svcount_t, uint32_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f32_x2)))
-svfloat32x2_t svldnt1_vnum_x2(svcount_t, float32_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s32_x2)))
-svint32x2_t svldnt1_vnum_x2(svcount_t, int32_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u8_x4)))
-svuint8x4_t svldnt1_vnum_x4(svcount_t, uint8_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s8_x4)))
-svint8x4_t svldnt1_vnum_x4(svcount_t, int8_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_mf8_x4)))
-svmfloat8x4_t svldnt1_vnum_x4(svcount_t, mfloat8_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u64_x4)))
-svuint64x4_t svldnt1_vnum_x4(svcount_t, uint64_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f64_x4)))
-svfloat64x4_t svldnt1_vnum_x4(svcount_t, float64_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s64_x4)))
-svint64x4_t svldnt1_vnum_x4(svcount_t, int64_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u16_x4)))
-svuint16x4_t svldnt1_vnum_x4(svcount_t, uint16_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_bf16_x4)))
-svbfloat16x4_t svldnt1_vnum_x4(svcount_t, bfloat16_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f16_x4)))
-svfloat16x4_t svldnt1_vnum_x4(svcount_t, float16_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s16_x4)))
-svint16x4_t svldnt1_vnum_x4(svcount_t, int16_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u32_x4)))
-svuint32x4_t svldnt1_vnum_x4(svcount_t, uint32_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f32_x4)))
-svfloat32x4_t svldnt1_vnum_x4(svcount_t, float32_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s32_x4)))
-svint32x4_t svldnt1_vnum_x4(svcount_t, int32_t const *, int64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svreinterpret_b)))
-svbool_t svreinterpret(svcount_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svreinterpret_c)))
-svcount_t svreinterpret(svbool_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u8_x2)))
-void svst1(svcount_t, uint8_t *, svuint8x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s8_x2)))
-void svst1(svcount_t, int8_t *, svint8x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_mf8_x2)))
-void svst1(svcount_t, mfloat8_t *, svmfloat8x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u64_x2)))
-void svst1(svcount_t, uint64_t *, svuint64x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f64_x2)))
-void svst1(svcount_t, float64_t *, svfloat64x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s64_x2)))
-void svst1(svcount_t, int64_t *, svint64x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u16_x2)))
-void svst1(svcount_t, uint16_t *, svuint16x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_bf16_x2)))
-void svst1(svcount_t, bfloat16_t *, svbfloat16x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f16_x2)))
-void svst1(svcount_t, float16_t *, svfloat16x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s16_x2)))
-void svst1(svcount_t, int16_t *, svint16x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u32_x2)))
-void svst1(svcount_t, uint32_t *, svuint32x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f32_x2)))
-void svst1(svcount_t, float32_t *, svfloat32x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s32_x2)))
-void svst1(svcount_t, int32_t *, svint32x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u8_x4)))
-void svst1(svcount_t, uint8_t *, svuint8x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s8_x4)))
-void svst1(svcount_t, int8_t *, svint8x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_mf8_x4)))
-void svst1(svcount_t, mfloat8_t *, svmfloat8x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u64_x4)))
-void svst1(svcount_t, uint64_t *, svuint64x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f64_x4)))
-void svst1(svcount_t, float64_t *, svfloat64x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s64_x4)))
-void svst1(svcount_t, int64_t *, svint64x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u16_x4)))
-void svst1(svcount_t, uint16_t *, svuint16x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_bf16_x4)))
-void svst1(svcount_t, bfloat16_t *, svbfloat16x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f16_x4)))
-void svst1(svcount_t, float16_t *, svfloat16x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s16_x4)))
-void svst1(svcount_t, int16_t *, svint16x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u32_x4)))
-void svst1(svcount_t, uint32_t *, svuint32x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f32_x4)))
-void svst1(svcount_t, float32_t *, svfloat32x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s32_x4)))
-void svst1(svcount_t, int32_t *, svint32x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u8_x2)))
-void svst1_vnum(svcount_t, uint8_t *, int64_t, svuint8x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s8_x2)))
-void svst1_vnum(svcount_t, int8_t *, int64_t, svint8x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_mf8_x2)))
-void svst1_vnum(svcount_t, mfloat8_t *, int64_t, svmfloat8x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u64_x2)))
-void svst1_vnum(svcount_t, uint64_t *, int64_t, svuint64x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f64_x2)))
-void svst1_vnum(svcount_t, float64_t *, int64_t, svfloat64x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s64_x2)))
-void svst1_vnum(svcount_t, int64_t *, int64_t, svint64x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u16_x2)))
-void svst1_vnum(svcount_t, uint16_t *, int64_t, svuint16x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_bf16_x2)))
-void svst1_vnum(svcount_t, bfloat16_t *, int64_t, svbfloat16x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f16_x2)))
-void svst1_vnum(svcount_t, float16_t *, int64_t, svfloat16x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s16_x2)))
-void svst1_vnum(svcount_t, int16_t *, int64_t, svint16x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u32_x2)))
-void svst1_vnum(svcount_t, uint32_t *, int64_t, svuint32x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f32_x2)))
-void svst1_vnum(svcount_t, float32_t *, int64_t, svfloat32x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s32_x2)))
-void svst1_vnum(svcount_t, int32_t *, int64_t, svint32x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u8_x4)))
-void svst1_vnum(svcount_t, uint8_t *, int64_t, svuint8x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s8_x4)))
-void svst1_vnum(svcount_t, int8_t *, int64_t, svint8x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_mf8_x4)))
-void svst1_vnum(svcount_t, mfloat8_t *, int64_t, svmfloat8x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u64_x4)))
-void svst1_vnum(svcount_t, uint64_t *, int64_t, svuint64x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f64_x4)))
-void svst1_vnum(svcount_t, float64_t *, int64_t, svfloat64x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s64_x4)))
-void svst1_vnum(svcount_t, int64_t *, int64_t, svint64x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u16_x4)))
-void svst1_vnum(svcount_t, uint16_t *, int64_t, svuint16x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_bf16_x4)))
-void svst1_vnum(svcount_t, bfloat16_t *, int64_t, svbfloat16x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f16_x4)))
-void svst1_vnum(svcount_t, float16_t *, int64_t, svfloat16x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s16_x4)))
-void svst1_vnum(svcount_t, int16_t *, int64_t, svint16x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u32_x4)))
-void svst1_vnum(svcount_t, uint32_t *, int64_t, svuint32x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f32_x4)))
-void svst1_vnum(svcount_t, float32_t *, int64_t, svfloat32x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s32_x4)))
-void svst1_vnum(svcount_t, int32_t *, int64_t, svint32x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u8_x2)))
-void svstnt1(svcount_t, uint8_t *, svuint8x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s8_x2)))
-void svstnt1(svcount_t, int8_t *, svint8x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_mf8_x2)))
-void svstnt1(svcount_t, mfloat8_t *, svmfloat8x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u64_x2)))
-void svstnt1(svcount_t, uint64_t *, svuint64x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f64_x2)))
-void svstnt1(svcount_t, float64_t *, svfloat64x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s64_x2)))
-void svstnt1(svcount_t, int64_t *, svint64x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u16_x2)))
-void svstnt1(svcount_t, uint16_t *, svuint16x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_bf16_x2)))
-void svstnt1(svcount_t, bfloat16_t *, svbfloat16x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f16_x2)))
-void svstnt1(svcount_t, float16_t *, svfloat16x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s16_x2)))
-void svstnt1(svcount_t, int16_t *, svint16x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u32_x2)))
-void svstnt1(svcount_t, uint32_t *, svuint32x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f32_x2)))
-void svstnt1(svcount_t, float32_t *, svfloat32x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s32_x2)))
-void svstnt1(svcount_t, int32_t *, svint32x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u8_x4)))
-void svstnt1(svcount_t, uint8_t *, svuint8x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s8_x4)))
-void svstnt1(svcount_t, int8_t *, svint8x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_mf8_x4)))
-void svstnt1(svcount_t, mfloat8_t *, svmfloat8x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u64_x4)))
-void svstnt1(svcount_t, uint64_t *, svuint64x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f64_x4)))
-void svstnt1(svcount_t, float64_t *, svfloat64x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s64_x4)))
-void svstnt1(svcount_t, int64_t *, svint64x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u16_x4)))
-void svstnt1(svcount_t, uint16_t *, svuint16x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_bf16_x4)))
-void svstnt1(svcount_t, bfloat16_t *, svbfloat16x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f16_x4)))
-void svstnt1(svcount_t, float16_t *, svfloat16x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s16_x4)))
-void svstnt1(svcount_t, int16_t *, svint16x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u32_x4)))
-void svstnt1(svcount_t, uint32_t *, svuint32x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f32_x4)))
-void svstnt1(svcount_t, float32_t *, svfloat32x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s32_x4)))
-void svstnt1(svcount_t, int32_t *, svint32x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u8_x2)))
-void svstnt1_vnum(svcount_t, uint8_t *, int64_t, svuint8x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s8_x2)))
-void svstnt1_vnum(svcount_t, int8_t *, int64_t, svint8x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_mf8_x2)))
-void svstnt1_vnum(svcount_t, mfloat8_t *, int64_t, svmfloat8x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u64_x2)))
-void svstnt1_vnum(svcount_t, uint64_t *, int64_t, svuint64x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f64_x2)))
-void svstnt1_vnum(svcount_t, float64_t *, int64_t, svfloat64x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s64_x2)))
-void svstnt1_vnum(svcount_t, int64_t *, int64_t, svint64x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u16_x2)))
-void svstnt1_vnum(svcount_t, uint16_t *, int64_t, svuint16x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_bf16_x2)))
-void svstnt1_vnum(svcount_t, bfloat16_t *, int64_t, svbfloat16x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f16_x2)))
-void svstnt1_vnum(svcount_t, float16_t *, int64_t, svfloat16x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s16_x2)))
-void svstnt1_vnum(svcount_t, int16_t *, int64_t, svint16x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u32_x2)))
-void svstnt1_vnum(svcount_t, uint32_t *, int64_t, svuint32x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f32_x2)))
-void svstnt1_vnum(svcount_t, float32_t *, int64_t, svfloat32x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s32_x2)))
-void svstnt1_vnum(svcount_t, int32_t *, int64_t, svint32x2_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u8_x4)))
-void svstnt1_vnum(svcount_t, uint8_t *, int64_t, svuint8x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s8_x4)))
-void svstnt1_vnum(svcount_t, int8_t *, int64_t, svint8x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_mf8_x4)))
-void svstnt1_vnum(svcount_t, mfloat8_t *, int64_t, svmfloat8x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u64_x4)))
-void svstnt1_vnum(svcount_t, uint64_t *, int64_t, svuint64x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f64_x4)))
-void svstnt1_vnum(svcount_t, float64_t *, int64_t, svfloat64x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s64_x4)))
-void svstnt1_vnum(svcount_t, int64_t *, int64_t, svint64x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u16_x4)))
-void svstnt1_vnum(svcount_t, uint16_t *, int64_t, svuint16x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_bf16_x4)))
-void svstnt1_vnum(svcount_t, bfloat16_t *, int64_t, svbfloat16x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f16_x4)))
-void svstnt1_vnum(svcount_t, float16_t *, int64_t, svfloat16x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s16_x4)))
-void svstnt1_vnum(svcount_t, int16_t *, int64_t, svint16x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u32_x4)))
-void svstnt1_vnum(svcount_t, uint32_t *, int64_t, svuint32x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f32_x4)))
-void svstnt1_vnum(svcount_t, float32_t *, int64_t, svfloat32x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s32_x4)))
-void svstnt1_vnum(svcount_t, int32_t *, int64_t, svint32x4_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c8_s64)))
-svcount_t svwhilege_c8(int64_t, int64_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c32_s64)))
-svcount_t svwhilege_c32(int64_t, int64_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c64_s64)))
-svcount_t svwhilege_c64(int64_t, int64_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c16_s64)))
-svcount_t svwhilege_c16(int64_t, int64_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c8_u64)))
-svcount_t svwhilege_c8(uint64_t, uint64_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c32_u64)))
-svcount_t svwhilege_c32(uint64_t, uint64_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c64_u64)))
-svcount_t svwhilege_c64(uint64_t, uint64_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c16_u64)))
-svcount_t svwhilege_c16(uint64_t, uint64_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c8_s64)))
-svcount_t svwhilegt_c8(int64_t, int64_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c32_s64)))
-svcount_t svwhilegt_c32(int64_t, int64_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c64_s64)))
-svcount_t svwhilegt_c64(int64_t, int64_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c16_s64)))
-svcount_t svwhilegt_c16(int64_t, int64_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c8_u64)))
-svcount_t svwhilegt_c8(uint64_t, uint64_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c32_u64)))
-svcount_t svwhilegt_c32(uint64_t, uint64_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c64_u64)))
-svcount_t svwhilegt_c64(uint64_t, uint64_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c16_u64)))
-svcount_t svwhilegt_c16(uint64_t, uint64_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c8_s64)))
-svcount_t svwhilele_c8(int64_t, int64_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c32_s64)))
-svcount_t svwhilele_c32(int64_t, int64_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c64_s64)))
-svcount_t svwhilele_c64(int64_t, int64_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c16_s64)))
-svcount_t svwhilele_c16(int64_t, int64_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c8_u64)))
-svcount_t svwhilele_c8(uint64_t, uint64_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c32_u64)))
-svcount_t svwhilele_c32(uint64_t, uint64_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c64_u64)))
-svcount_t svwhilele_c64(uint64_t, uint64_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c16_u64)))
-svcount_t svwhilele_c16(uint64_t, uint64_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c8_u64)))
-svcount_t svwhilelt_c8(uint64_t, uint64_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c32_u64)))
-svcount_t svwhilelt_c32(uint64_t, uint64_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c64_u64)))
-svcount_t svwhilelt_c64(uint64_t, uint64_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c16_u64)))
-svcount_t svwhilelt_c16(uint64_t, uint64_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c8_s64)))
-svcount_t svwhilelt_c8(int64_t, int64_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c32_s64)))
-svcount_t svwhilelt_c32(int64_t, int64_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c64_s64)))
-svcount_t svwhilelt_c64(int64_t, int64_t, uint64_t);
-__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c16_s64)))
-svcount_t svwhilelt_c16(int64_t, int64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt_f32_f16_z)))
+svfloat32_t svcvtlt_f32_f16_z(svbool_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt_f64_f32_z)))
+svfloat64_t svcvtlt_f64_f32_z(svbool_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnt_bf16_f32_z)))
+svbfloat16_t svcvtnt_bf16_f32_z(svbfloat16_t, svbool_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnt_f16_f32_z)))
+svfloat16_t svcvtnt_f16_f32_z(svfloat16_t, svbool_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnt_f32_f64_z)))
+svfloat32_t svcvtnt_f32_f64_z(svfloat32_t, svbool_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtxnt_f32_f64_z)))
+svfloat32_t svcvtxnt_f32_f64_z(svfloat32_t, svbool_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svfirstp_b8)))
+int64_t svfirstp_b8(svbool_t, svbool_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svfirstp_b32)))
+int64_t svfirstp_b32(svbool_t, svbool_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svfirstp_b64)))
+int64_t svfirstp_b64(svbool_t, svbool_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svfirstp_b16)))
+int64_t svfirstp_b16(svbool_t, svbool_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastp_b8)))
+int64_t svlastp_b8(svbool_t, svbool_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastp_b32)))
+int64_t svlastp_b32(svbool_t, svbool_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastp_b64)))
+int64_t svlastp_b64(svbool_t, svbool_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastp_b16)))
+int64_t svlastp_b16(svbool_t, svbool_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint32x_f64_m)))
+svfloat64_t svrint32x_f64_m(svfloat64_t, svbool_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint32x_f32_m)))
+svfloat32_t svrint32x_f32_m(svfloat32_t, svbool_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint32x_f64_x)))
+svfloat64_t svrint32x_f64_x(svbool_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint32x_f32_x)))
+svfloat32_t svrint32x_f32_x(svbool_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint32x_f64_z)))
+svfloat64_t svrint32x_f64_z(svbool_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint32x_f32_z)))
+svfloat32_t svrint32x_f32_z(svbool_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint32z_f64_m)))
+svfloat64_t svrint32z_f64_m(svfloat64_t, svbool_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint32z_f32_m)))
+svfloat32_t svrint32z_f32_m(svfloat32_t, svbool_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint32z_f64_x)))
+svfloat64_t svrint32z_f64_x(svbool_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint32z_f32_x)))
+svfloat32_t svrint32z_f32_x(svbool_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint32z_f64_z)))
+svfloat64_t svrint32z_f64_z(svbool_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint32z_f32_z)))
+svfloat32_t svrint32z_f32_z(svbool_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint64x_f64_m)))
+svfloat64_t svrint64x_f64_m(svfloat64_t, svbool_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint64x_f32_m)))
+svfloat32_t svrint64x_f32_m(svfloat32_t, svbool_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint64x_f64_x)))
+svfloat64_t svrint64x_f64_x(svbool_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint64x_f32_x)))
+svfloat32_t svrint64x_f32_x(svbool_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint64x_f64_z)))
+svfloat64_t svrint64x_f64_z(svbool_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint64x_f32_z)))
+svfloat32_t svrint64x_f32_z(svbool_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint64z_f64_m)))
+svfloat64_t svrint64z_f64_m(svfloat64_t, svbool_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint64z_f32_m)))
+svfloat32_t svrint64z_f32_m(svfloat32_t, svbool_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint64z_f64_x)))
+svfloat64_t svrint64z_f64_x(svbool_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint64z_f32_x)))
+svfloat32_t svrint64z_f32_x(svbool_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint64z_f64_z)))
+svfloat64_t svrint64z_f64_z(svbool_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint64z_f32_z)))
+svfloat32_t svrint64z_f32_z(svbool_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt_f32_f16_z)))
+svfloat32_t svcvtlt_f32_z(svbool_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt_f64_f32_z)))
+svfloat64_t svcvtlt_f64_z(svbool_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnt_bf16_f32_z)))
+svbfloat16_t svcvtnt_bf16_z(svbfloat16_t, svbool_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnt_f16_f32_z)))
+svfloat16_t svcvtnt_f16_z(svfloat16_t, svbool_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnt_f32_f64_z)))
+svfloat32_t svcvtnt_f32_z(svfloat32_t, svbool_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtxnt_f32_f64_z)))
+svfloat32_t svcvtxnt_f32_z(svfloat32_t, svbool_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint32x_f64_m)))
+svfloat64_t svrint32x_m(svfloat64_t, svbool_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint32x_f32_m)))
+svfloat32_t svrint32x_m(svfloat32_t, svbool_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint32x_f64_x)))
+svfloat64_t svrint32x_x(svbool_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint32x_f32_x)))
+svfloat32_t svrint32x_x(svbool_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint32x_f64_z)))
+svfloat64_t svrint32x_z(svbool_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint32x_f32_z)))
+svfloat32_t svrint32x_z(svbool_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint32z_f64_m)))
+svfloat64_t svrint32z_m(svfloat64_t, svbool_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint32z_f32_m)))
+svfloat32_t svrint32z_m(svfloat32_t, svbool_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint32z_f64_x)))
+svfloat64_t svrint32z_x(svbool_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint32z_f32_x)))
+svfloat32_t svrint32z_x(svbool_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint32z_f64_z)))
+svfloat64_t svrint32z_z(svbool_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint32z_f32_z)))
+svfloat32_t svrint32z_z(svbool_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint64x_f64_m)))
+svfloat64_t svrint64x_m(svfloat64_t, svbool_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint64x_f32_m)))
+svfloat32_t svrint64x_m(svfloat32_t, svbool_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint64x_f64_x)))
+svfloat64_t svrint64x_x(svbool_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint64x_f32_x)))
+svfloat32_t svrint64x_x(svbool_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint64x_f64_z)))
+svfloat64_t svrint64x_z(svbool_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint64x_f32_z)))
+svfloat32_t svrint64x_z(svbool_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint64z_f64_m)))
+svfloat64_t svrint64z_m(svfloat64_t, svbool_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint64z_f32_m)))
+svfloat32_t svrint64z_m(svfloat32_t, svbool_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint64z_f64_x)))
+svfloat64_t svrint64z_x(svbool_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint64z_f32_x)))
+svfloat32_t svrint64z_x(svbool_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint64z_f64_z)))
+svfloat64_t svrint64z_z(svbool_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrint64z_f32_z)))
+svfloat32_t svrint64z_z(svbool_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_u8)))
+svuint8_t svcompact_u8(svbool_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_u16)))
+svuint16_t svcompact_u16(svbool_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_bf16)))
+svbfloat16_t svcompact_bf16(svbool_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_s8)))
+svint8_t svcompact_s8(svbool_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_f16)))
+svfloat16_t svcompact_f16(svbool_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_mf8)))
+svmfloat8_t svcompact_mf8(svbool_t, svmfloat8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_s16)))
+svint16_t svcompact_s16(svbool_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_u8)))
+svuint8_t svexpand_u8(svbool_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_u32)))
+svuint32_t svexpand_u32(svbool_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_u64)))
+svuint64_t svexpand_u64(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_u16)))
+svuint16_t svexpand_u16(svbool_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_bf16)))
+svbfloat16_t svexpand_bf16(svbool_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_s8)))
+svint8_t svexpand_s8(svbool_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_f64)))
+svfloat64_t svexpand_f64(svbool_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_f32)))
+svfloat32_t svexpand_f32(svbool_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_f16)))
+svfloat16_t svexpand_f16(svbool_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_s32)))
+svint32_t svexpand_s32(svbool_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_s64)))
+svint64_t svexpand_s64(svbool_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_mf8)))
+svmfloat8_t svexpand_mf8(svbool_t, svmfloat8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_s16)))
+svint16_t svexpand_s16(svbool_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_u8)))
+svuint8_t svcompact(svbool_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_u16)))
+svuint16_t svcompact(svbool_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_bf16)))
+svbfloat16_t svcompact(svbool_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_s8)))
+svint8_t svcompact(svbool_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_f16)))
+svfloat16_t svcompact(svbool_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_mf8)))
+svmfloat8_t svcompact(svbool_t, svmfloat8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_s16)))
+svint16_t svcompact(svbool_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_u8)))
+svuint8_t svexpand(svbool_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_u32)))
+svuint32_t svexpand(svbool_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_u64)))
+svuint64_t svexpand(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_u16)))
+svuint16_t svexpand(svbool_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_bf16)))
+svbfloat16_t svexpand(svbool_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_s8)))
+svint8_t svexpand(svbool_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_f64)))
+svfloat64_t svexpand(svbool_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_f32)))
+svfloat32_t svexpand(svbool_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_f16)))
+svfloat16_t svexpand(svbool_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_s32)))
+svint32_t svexpand(svbool_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_s64)))
+svint64_t svexpand(svbool_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_mf8)))
+svmfloat8_t svexpand(svbool_t, svmfloat8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpand_s16)))
+svint16_t svexpand(svbool_t, svint16_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_n_s8)))
 svint8_t svaba_n_s8(svint8_t, svint8_t, int8_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_n_s32)))
@@ -19059,6 +15573,3848 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svxar_n_s64)))
 svint64_t svxar(svint64_t, svint64_t, uint64_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svxar_n_s16)))
 svint16_t svxar(svint16_t, svint16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f64_m)))
+svfloat64_t svamax_n_f64_m(svbool_t, svfloat64_t, float64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f32_m)))
+svfloat32_t svamax_n_f32_m(svbool_t, svfloat32_t, float32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f16_m)))
+svfloat16_t svamax_n_f16_m(svbool_t, svfloat16_t, float16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f64_x)))
+svfloat64_t svamax_n_f64_x(svbool_t, svfloat64_t, float64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f32_x)))
+svfloat32_t svamax_n_f32_x(svbool_t, svfloat32_t, float32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f16_x)))
+svfloat16_t svamax_n_f16_x(svbool_t, svfloat16_t, float16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f64_z)))
+svfloat64_t svamax_n_f64_z(svbool_t, svfloat64_t, float64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f32_z)))
+svfloat32_t svamax_n_f32_z(svbool_t, svfloat32_t, float32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f16_z)))
+svfloat16_t svamax_n_f16_z(svbool_t, svfloat16_t, float16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f64_m)))
+svfloat64_t svamax_f64_m(svbool_t, svfloat64_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f32_m)))
+svfloat32_t svamax_f32_m(svbool_t, svfloat32_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f16_m)))
+svfloat16_t svamax_f16_m(svbool_t, svfloat16_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f64_x)))
+svfloat64_t svamax_f64_x(svbool_t, svfloat64_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f32_x)))
+svfloat32_t svamax_f32_x(svbool_t, svfloat32_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f16_x)))
+svfloat16_t svamax_f16_x(svbool_t, svfloat16_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f64_z)))
+svfloat64_t svamax_f64_z(svbool_t, svfloat64_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f32_z)))
+svfloat32_t svamax_f32_z(svbool_t, svfloat32_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f16_z)))
+svfloat16_t svamax_f16_z(svbool_t, svfloat16_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f64_m)))
+svfloat64_t svamin_n_f64_m(svbool_t, svfloat64_t, float64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f32_m)))
+svfloat32_t svamin_n_f32_m(svbool_t, svfloat32_t, float32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f16_m)))
+svfloat16_t svamin_n_f16_m(svbool_t, svfloat16_t, float16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f64_x)))
+svfloat64_t svamin_n_f64_x(svbool_t, svfloat64_t, float64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f32_x)))
+svfloat32_t svamin_n_f32_x(svbool_t, svfloat32_t, float32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f16_x)))
+svfloat16_t svamin_n_f16_x(svbool_t, svfloat16_t, float16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f64_z)))
+svfloat64_t svamin_n_f64_z(svbool_t, svfloat64_t, float64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f32_z)))
+svfloat32_t svamin_n_f32_z(svbool_t, svfloat32_t, float32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f16_z)))
+svfloat16_t svamin_n_f16_z(svbool_t, svfloat16_t, float16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f64_m)))
+svfloat64_t svamin_f64_m(svbool_t, svfloat64_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f32_m)))
+svfloat32_t svamin_f32_m(svbool_t, svfloat32_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f16_m)))
+svfloat16_t svamin_f16_m(svbool_t, svfloat16_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f64_x)))
+svfloat64_t svamin_f64_x(svbool_t, svfloat64_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f32_x)))
+svfloat32_t svamin_f32_x(svbool_t, svfloat32_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f16_x)))
+svfloat16_t svamin_f16_x(svbool_t, svfloat16_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f64_z)))
+svfloat64_t svamin_f64_z(svbool_t, svfloat64_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f32_z)))
+svfloat32_t svamin_f32_z(svbool_t, svfloat32_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f16_z)))
+svfloat16_t svamin_f16_z(svbool_t, svfloat16_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f64_m)))
+svfloat64_t svamax_m(svbool_t, svfloat64_t, float64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f32_m)))
+svfloat32_t svamax_m(svbool_t, svfloat32_t, float32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f16_m)))
+svfloat16_t svamax_m(svbool_t, svfloat16_t, float16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f64_x)))
+svfloat64_t svamax_x(svbool_t, svfloat64_t, float64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f32_x)))
+svfloat32_t svamax_x(svbool_t, svfloat32_t, float32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f16_x)))
+svfloat16_t svamax_x(svbool_t, svfloat16_t, float16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f64_z)))
+svfloat64_t svamax_z(svbool_t, svfloat64_t, float64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f32_z)))
+svfloat32_t svamax_z(svbool_t, svfloat32_t, float32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f16_z)))
+svfloat16_t svamax_z(svbool_t, svfloat16_t, float16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f64_m)))
+svfloat64_t svamax_m(svbool_t, svfloat64_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f32_m)))
+svfloat32_t svamax_m(svbool_t, svfloat32_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f16_m)))
+svfloat16_t svamax_m(svbool_t, svfloat16_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f64_x)))
+svfloat64_t svamax_x(svbool_t, svfloat64_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f32_x)))
+svfloat32_t svamax_x(svbool_t, svfloat32_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f16_x)))
+svfloat16_t svamax_x(svbool_t, svfloat16_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f64_z)))
+svfloat64_t svamax_z(svbool_t, svfloat64_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f32_z)))
+svfloat32_t svamax_z(svbool_t, svfloat32_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f16_z)))
+svfloat16_t svamax_z(svbool_t, svfloat16_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f64_m)))
+svfloat64_t svamin_m(svbool_t, svfloat64_t, float64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f32_m)))
+svfloat32_t svamin_m(svbool_t, svfloat32_t, float32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f16_m)))
+svfloat16_t svamin_m(svbool_t, svfloat16_t, float16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f64_x)))
+svfloat64_t svamin_x(svbool_t, svfloat64_t, float64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f32_x)))
+svfloat32_t svamin_x(svbool_t, svfloat32_t, float32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f16_x)))
+svfloat16_t svamin_x(svbool_t, svfloat16_t, float16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f64_z)))
+svfloat64_t svamin_z(svbool_t, svfloat64_t, float64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f32_z)))
+svfloat32_t svamin_z(svbool_t, svfloat32_t, float32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f16_z)))
+svfloat16_t svamin_z(svbool_t, svfloat16_t, float16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f64_m)))
+svfloat64_t svamin_m(svbool_t, svfloat64_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f32_m)))
+svfloat32_t svamin_m(svbool_t, svfloat32_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f16_m)))
+svfloat16_t svamin_m(svbool_t, svfloat16_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f64_x)))
+svfloat64_t svamin_x(svbool_t, svfloat64_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f32_x)))
+svfloat32_t svamin_x(svbool_t, svfloat32_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f16_x)))
+svfloat16_t svamin_x(svbool_t, svfloat16_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f64_z)))
+svfloat64_t svamin_z(svbool_t, svfloat64_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f32_z)))
+svfloat32_t svamin_z(svbool_t, svfloat32_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f16_z)))
+svfloat16_t svamin_z(svbool_t, svfloat16_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt1_bf16_mf8_fpm)))
+svbfloat16_t svcvt1_bf16_mf8_fpm(svmfloat8_t, fpm_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt1_f16_mf8_fpm)))
+svfloat16_t svcvt1_f16_mf8_fpm(svmfloat8_t, fpm_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt2_bf16_mf8_fpm)))
+svbfloat16_t svcvt2_bf16_mf8_fpm(svmfloat8_t, fpm_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt2_f16_mf8_fpm)))
+svfloat16_t svcvt2_f16_mf8_fpm(svmfloat8_t, fpm_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt1_bf16_mf8_fpm)))
+svbfloat16_t svcvtlt1_bf16_mf8_fpm(svmfloat8_t, fpm_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt1_f16_mf8_fpm)))
+svfloat16_t svcvtlt1_f16_mf8_fpm(svmfloat8_t, fpm_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt2_bf16_mf8_fpm)))
+svbfloat16_t svcvtlt2_bf16_mf8_fpm(svmfloat8_t, fpm_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt2_f16_mf8_fpm)))
+svfloat16_t svcvtlt2_f16_mf8_fpm(svmfloat8_t, fpm_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtn_mf8_bf16_x2_fpm)))
+svmfloat8_t svcvtn_mf8_bf16_x2_fpm(svbfloat16x2_t, fpm_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtn_mf8_f16_x2_fpm)))
+svmfloat8_t svcvtn_mf8_f16_x2_fpm(svfloat16x2_t, fpm_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnb_mf8_f32_x2_fpm)))
+svmfloat8_t svcvtnb_mf8_f32_x2_fpm(svfloat32x2_t, fpm_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnt_mf8_f32_x2_fpm)))
+svmfloat8_t svcvtnt_mf8_f32_x2_fpm(svmfloat8_t, svfloat32x2_t, fpm_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt1_bf16_mf8_fpm)))
+svbfloat16_t svcvt1_bf16_fpm(svmfloat8_t, fpm_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt1_f16_mf8_fpm)))
+svfloat16_t svcvt1_f16_fpm(svmfloat8_t, fpm_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt2_bf16_mf8_fpm)))
+svbfloat16_t svcvt2_bf16_fpm(svmfloat8_t, fpm_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt2_f16_mf8_fpm)))
+svfloat16_t svcvt2_f16_fpm(svmfloat8_t, fpm_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt1_bf16_mf8_fpm)))
+svbfloat16_t svcvtlt1_bf16_fpm(svmfloat8_t, fpm_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt1_f16_mf8_fpm)))
+svfloat16_t svcvtlt1_f16_fpm(svmfloat8_t, fpm_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt2_bf16_mf8_fpm)))
+svbfloat16_t svcvtlt2_bf16_fpm(svmfloat8_t, fpm_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt2_f16_mf8_fpm)))
+svfloat16_t svcvtlt2_f16_fpm(svmfloat8_t, fpm_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtn_mf8_bf16_x2_fpm)))
+svmfloat8_t svcvtn_mf8_fpm(svbfloat16x2_t, fpm_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtn_mf8_f16_x2_fpm)))
+svmfloat8_t svcvtn_mf8_fpm(svfloat16x2_t, fpm_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnb_mf8_f32_x2_fpm)))
+svmfloat8_t svcvtnb_mf8_fpm(svfloat32x2_t, fpm_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnt_mf8_f32_x2_fpm)))
+svmfloat8_t svcvtnt_mf8_fpm(svmfloat8_t, svfloat32x2_t, fpm_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_u8)))
+svuint8_t svluti2_lane_u8(svuint8_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_s8)))
+svint8_t svluti2_lane_s8(svint8_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_u16)))
+svuint16_t svluti2_lane_u16(svuint16_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_bf16)))
+svbfloat16_t svluti2_lane_bf16(svbfloat16_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_f16)))
+svfloat16_t svluti2_lane_f16(svfloat16_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_s16)))
+svint16_t svluti2_lane_s16(svint16_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_u8)))
+svuint8_t svluti4_lane_u8(svuint8_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_s8)))
+svint8_t svluti4_lane_s8(svint8_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_u16)))
+svuint16_t svluti4_lane_u16(svuint16_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_bf16)))
+svbfloat16_t svluti4_lane_bf16(svbfloat16_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_f16)))
+svfloat16_t svluti4_lane_f16(svfloat16_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_s16)))
+svint16_t svluti4_lane_s16(svint16_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_u16_x2)))
+svuint16_t svluti4_lane_u16_x2(svuint16x2_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_bf16_x2)))
+svbfloat16_t svluti4_lane_bf16_x2(svbfloat16x2_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_f16_x2)))
+svfloat16_t svluti4_lane_f16_x2(svfloat16x2_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_s16_x2)))
+svint16_t svluti4_lane_s16_x2(svint16x2_t, svuint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_u8)))
+svuint8_t svluti2_lane(svuint8_t, svuint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_s8)))
+svint8_t svluti2_lane(svint8_t, svuint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_u16)))
+svuint16_t svluti2_lane(svuint16_t, svuint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_bf16)))
+svbfloat16_t svluti2_lane(svbfloat16_t, svuint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_f16)))
+svfloat16_t svluti2_lane(svfloat16_t, svuint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_s16)))
+svint16_t svluti2_lane(svint16_t, svuint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_u8)))
+svuint8_t svluti4_lane(svuint8_t, svuint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_s8)))
+svint8_t svluti4_lane(svint8_t, svuint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_u16)))
+svuint16_t svluti4_lane(svuint16_t, svuint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_bf16)))
+svbfloat16_t svluti4_lane(svbfloat16_t, svuint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_f16)))
+svfloat16_t svluti4_lane(svfloat16_t, svuint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_s16)))
+svint16_t svluti4_lane(svint16_t, svuint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_u16_x2)))
+svuint16_t svluti4_lane(svuint16x2_t, svuint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_bf16_x2)))
+svbfloat16_t svluti4_lane(svbfloat16x2_t, svuint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_f16_x2)))
+svfloat16_t svluti4_lane(svfloat16x2_t, svuint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_s16_x2)))
+svint16_t svluti4_lane(svint16x2_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmmla_f32)))
+svfloat32_t svbfmmla_f32(svfloat32_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmmla_f32)))
+svfloat32_t svbfmmla(svfloat32_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfdot_n_f32)))
+svfloat32_t svbfdot_n_f32(svfloat32_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfdot_f32)))
+svfloat32_t svbfdot_f32(svfloat32_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfdot_lane_f32)))
+svfloat32_t svbfdot_lane_f32(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalb_n_f32)))
+svfloat32_t svbfmlalb_n_f32(svfloat32_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalb_f32)))
+svfloat32_t svbfmlalb_f32(svfloat32_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalb_lane_f32)))
+svfloat32_t svbfmlalb_lane_f32(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalt_n_f32)))
+svfloat32_t svbfmlalt_n_f32(svfloat32_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalt_f32)))
+svfloat32_t svbfmlalt_f32(svfloat32_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalt_lane_f32)))
+svfloat32_t svbfmlalt_lane_f32(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_bf16_f32_m)))
+svbfloat16_t svcvt_bf16_f32_m(svbfloat16_t, svbool_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_bf16_f32_x)))
+svbfloat16_t svcvt_bf16_f32_x(svbool_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_bf16_f32_z)))
+svbfloat16_t svcvt_bf16_f32_z(svbool_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnt_bf16_f32_m)))
+svbfloat16_t svcvtnt_bf16_f32_m(svbfloat16_t, svbool_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfdot_n_f32)))
+svfloat32_t svbfdot(svfloat32_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfdot_f32)))
+svfloat32_t svbfdot(svfloat32_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfdot_lane_f32)))
+svfloat32_t svbfdot_lane(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalb_n_f32)))
+svfloat32_t svbfmlalb(svfloat32_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalb_f32)))
+svfloat32_t svbfmlalb(svfloat32_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalb_lane_f32)))
+svfloat32_t svbfmlalb_lane(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalt_n_f32)))
+svfloat32_t svbfmlalt(svfloat32_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalt_f32)))
+svfloat32_t svbfmlalt(svfloat32_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalt_lane_f32)))
+svfloat32_t svbfmlalt_lane(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_bf16_f32_m)))
+svbfloat16_t svcvt_bf16_m(svbfloat16_t, svbool_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_bf16_f32_x)))
+svbfloat16_t svcvt_bf16_x(svbool_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_bf16_f32_z)))
+svbfloat16_t svcvt_bf16_z(svbool_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnt_bf16_f32_m)))
+svbfloat16_t svcvtnt_bf16_m(svbfloat16_t, svbool_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_f32)))
+svfloat32_t svmmla_f32(svfloat32_t, svfloat32_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_f32)))
+svfloat32_t svmmla(svfloat32_t, svfloat32_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u8)))
+svuint8_t svld1ro_u8(svbool_t, uint8_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u32)))
+svuint32_t svld1ro_u32(svbool_t, uint32_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u64)))
+svuint64_t svld1ro_u64(svbool_t, uint64_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u16)))
+svuint16_t svld1ro_u16(svbool_t, uint16_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_bf16)))
+svbfloat16_t svld1ro_bf16(svbool_t, bfloat16_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s8)))
+svint8_t svld1ro_s8(svbool_t, int8_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_f64)))
+svfloat64_t svld1ro_f64(svbool_t, float64_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_f32)))
+svfloat32_t svld1ro_f32(svbool_t, float32_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_f16)))
+svfloat16_t svld1ro_f16(svbool_t, float16_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s32)))
+svint32_t svld1ro_s32(svbool_t, int32_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s64)))
+svint64_t svld1ro_s64(svbool_t, int64_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_mf8)))
+svmfloat8_t svld1ro_mf8(svbool_t, mfloat8_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s16)))
+svint16_t svld1ro_s16(svbool_t, int16_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_f64)))
+svfloat64_t svmmla_f64(svfloat64_t, svfloat64_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u8)))
+svuint8_t svtrn1q_u8(svuint8_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u32)))
+svuint32_t svtrn1q_u32(svuint32_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u64)))
+svuint64_t svtrn1q_u64(svuint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u16)))
+svuint16_t svtrn1q_u16(svuint16_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_bf16)))
+svbfloat16_t svtrn1q_bf16(svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s8)))
+svint8_t svtrn1q_s8(svint8_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_f64)))
+svfloat64_t svtrn1q_f64(svfloat64_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_f32)))
+svfloat32_t svtrn1q_f32(svfloat32_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_f16)))
+svfloat16_t svtrn1q_f16(svfloat16_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s32)))
+svint32_t svtrn1q_s32(svint32_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s64)))
+svint64_t svtrn1q_s64(svint64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s16)))
+svint16_t svtrn1q_s16(svint16_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u8)))
+svuint8_t svtrn2q_u8(svuint8_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u32)))
+svuint32_t svtrn2q_u32(svuint32_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u64)))
+svuint64_t svtrn2q_u64(svuint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u16)))
+svuint16_t svtrn2q_u16(svuint16_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_bf16)))
+svbfloat16_t svtrn2q_bf16(svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s8)))
+svint8_t svtrn2q_s8(svint8_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_f64)))
+svfloat64_t svtrn2q_f64(svfloat64_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_f32)))
+svfloat32_t svtrn2q_f32(svfloat32_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_f16)))
+svfloat16_t svtrn2q_f16(svfloat16_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s32)))
+svint32_t svtrn2q_s32(svint32_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s64)))
+svint64_t svtrn2q_s64(svint64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s16)))
+svint16_t svtrn2q_s16(svint16_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u8)))
+svuint8_t svuzp1q_u8(svuint8_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u32)))
+svuint32_t svuzp1q_u32(svuint32_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u64)))
+svuint64_t svuzp1q_u64(svuint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u16)))
+svuint16_t svuzp1q_u16(svuint16_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_bf16)))
+svbfloat16_t svuzp1q_bf16(svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s8)))
+svint8_t svuzp1q_s8(svint8_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_f64)))
+svfloat64_t svuzp1q_f64(svfloat64_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_f32)))
+svfloat32_t svuzp1q_f32(svfloat32_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_f16)))
+svfloat16_t svuzp1q_f16(svfloat16_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s32)))
+svint32_t svuzp1q_s32(svint32_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s64)))
+svint64_t svuzp1q_s64(svint64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s16)))
+svint16_t svuzp1q_s16(svint16_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u8)))
+svuint8_t svuzp2q_u8(svuint8_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u32)))
+svuint32_t svuzp2q_u32(svuint32_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u64)))
+svuint64_t svuzp2q_u64(svuint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u16)))
+svuint16_t svuzp2q_u16(svuint16_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_bf16)))
+svbfloat16_t svuzp2q_bf16(svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s8)))
+svint8_t svuzp2q_s8(svint8_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_f64)))
+svfloat64_t svuzp2q_f64(svfloat64_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_f32)))
+svfloat32_t svuzp2q_f32(svfloat32_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_f16)))
+svfloat16_t svuzp2q_f16(svfloat16_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s32)))
+svint32_t svuzp2q_s32(svint32_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s64)))
+svint64_t svuzp2q_s64(svint64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s16)))
+svint16_t svuzp2q_s16(svint16_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u8)))
+svuint8_t svzip1q_u8(svuint8_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u32)))
+svuint32_t svzip1q_u32(svuint32_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u64)))
+svuint64_t svzip1q_u64(svuint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u16)))
+svuint16_t svzip1q_u16(svuint16_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_bf16)))
+svbfloat16_t svzip1q_bf16(svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s8)))
+svint8_t svzip1q_s8(svint8_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_f64)))
+svfloat64_t svzip1q_f64(svfloat64_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_f32)))
+svfloat32_t svzip1q_f32(svfloat32_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_f16)))
+svfloat16_t svzip1q_f16(svfloat16_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s32)))
+svint32_t svzip1q_s32(svint32_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s64)))
+svint64_t svzip1q_s64(svint64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s16)))
+svint16_t svzip1q_s16(svint16_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u8)))
+svuint8_t svzip2q_u8(svuint8_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u32)))
+svuint32_t svzip2q_u32(svuint32_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u64)))
+svuint64_t svzip2q_u64(svuint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u16)))
+svuint16_t svzip2q_u16(svuint16_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_bf16)))
+svbfloat16_t svzip2q_bf16(svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s8)))
+svint8_t svzip2q_s8(svint8_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_f64)))
+svfloat64_t svzip2q_f64(svfloat64_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_f32)))
+svfloat32_t svzip2q_f32(svfloat32_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_f16)))
+svfloat16_t svzip2q_f16(svfloat16_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s32)))
+svint32_t svzip2q_s32(svint32_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s64)))
+svint64_t svzip2q_s64(svint64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s16)))
+svint16_t svzip2q_s16(svint16_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u8)))
+svuint8_t svld1ro(svbool_t, uint8_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u32)))
+svuint32_t svld1ro(svbool_t, uint32_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u64)))
+svuint64_t svld1ro(svbool_t, uint64_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u16)))
+svuint16_t svld1ro(svbool_t, uint16_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_bf16)))
+svbfloat16_t svld1ro(svbool_t, bfloat16_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s8)))
+svint8_t svld1ro(svbool_t, int8_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_f64)))
+svfloat64_t svld1ro(svbool_t, float64_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_f32)))
+svfloat32_t svld1ro(svbool_t, float32_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_f16)))
+svfloat16_t svld1ro(svbool_t, float16_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s32)))
+svint32_t svld1ro(svbool_t, int32_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s64)))
+svint64_t svld1ro(svbool_t, int64_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_mf8)))
+svmfloat8_t svld1ro(svbool_t, mfloat8_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s16)))
+svint16_t svld1ro(svbool_t, int16_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_f64)))
+svfloat64_t svmmla(svfloat64_t, svfloat64_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u8)))
+svuint8_t svtrn1q(svuint8_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u32)))
+svuint32_t svtrn1q(svuint32_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u64)))
+svuint64_t svtrn1q(svuint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u16)))
+svuint16_t svtrn1q(svuint16_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_bf16)))
+svbfloat16_t svtrn1q(svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s8)))
+svint8_t svtrn1q(svint8_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_f64)))
+svfloat64_t svtrn1q(svfloat64_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_f32)))
+svfloat32_t svtrn1q(svfloat32_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_f16)))
+svfloat16_t svtrn1q(svfloat16_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s32)))
+svint32_t svtrn1q(svint32_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s64)))
+svint64_t svtrn1q(svint64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s16)))
+svint16_t svtrn1q(svint16_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u8)))
+svuint8_t svtrn2q(svuint8_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u32)))
+svuint32_t svtrn2q(svuint32_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u64)))
+svuint64_t svtrn2q(svuint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u16)))
+svuint16_t svtrn2q(svuint16_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_bf16)))
+svbfloat16_t svtrn2q(svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s8)))
+svint8_t svtrn2q(svint8_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_f64)))
+svfloat64_t svtrn2q(svfloat64_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_f32)))
+svfloat32_t svtrn2q(svfloat32_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_f16)))
+svfloat16_t svtrn2q(svfloat16_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s32)))
+svint32_t svtrn2q(svint32_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s64)))
+svint64_t svtrn2q(svint64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s16)))
+svint16_t svtrn2q(svint16_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u8)))
+svuint8_t svuzp1q(svuint8_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u32)))
+svuint32_t svuzp1q(svuint32_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u64)))
+svuint64_t svuzp1q(svuint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u16)))
+svuint16_t svuzp1q(svuint16_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_bf16)))
+svbfloat16_t svuzp1q(svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s8)))
+svint8_t svuzp1q(svint8_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_f64)))
+svfloat64_t svuzp1q(svfloat64_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_f32)))
+svfloat32_t svuzp1q(svfloat32_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_f16)))
+svfloat16_t svuzp1q(svfloat16_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s32)))
+svint32_t svuzp1q(svint32_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s64)))
+svint64_t svuzp1q(svint64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s16)))
+svint16_t svuzp1q(svint16_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u8)))
+svuint8_t svuzp2q(svuint8_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u32)))
+svuint32_t svuzp2q(svuint32_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u64)))
+svuint64_t svuzp2q(svuint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u16)))
+svuint16_t svuzp2q(svuint16_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_bf16)))
+svbfloat16_t svuzp2q(svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s8)))
+svint8_t svuzp2q(svint8_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_f64)))
+svfloat64_t svuzp2q(svfloat64_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_f32)))
+svfloat32_t svuzp2q(svfloat32_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_f16)))
+svfloat16_t svuzp2q(svfloat16_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s32)))
+svint32_t svuzp2q(svint32_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s64)))
+svint64_t svuzp2q(svint64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s16)))
+svint16_t svuzp2q(svint16_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u8)))
+svuint8_t svzip1q(svuint8_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u32)))
+svuint32_t svzip1q(svuint32_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u64)))
+svuint64_t svzip1q(svuint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u16)))
+svuint16_t svzip1q(svuint16_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_bf16)))
+svbfloat16_t svzip1q(svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s8)))
+svint8_t svzip1q(svint8_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_f64)))
+svfloat64_t svzip1q(svfloat64_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_f32)))
+svfloat32_t svzip1q(svfloat32_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_f16)))
+svfloat16_t svzip1q(svfloat16_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s32)))
+svint32_t svzip1q(svint32_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s64)))
+svint64_t svzip1q(svint64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s16)))
+svint16_t svzip1q(svint16_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u8)))
+svuint8_t svzip2q(svuint8_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u32)))
+svuint32_t svzip2q(svuint32_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u64)))
+svuint64_t svzip2q(svuint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u16)))
+svuint16_t svzip2q(svuint16_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_bf16)))
+svbfloat16_t svzip2q(svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s8)))
+svint8_t svzip2q(svint8_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_f64)))
+svfloat64_t svzip2q(svfloat64_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_f32)))
+svfloat32_t svzip2q(svfloat32_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_f16)))
+svfloat16_t svzip2q(svfloat16_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s32)))
+svint32_t svzip2q(svint32_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s64)))
+svint64_t svzip2q(svint64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s16)))
+svint16_t svzip2q(svint16_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_s32)))
+svint32_t svmmla_s32(svint32_t, svint8_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_u32)))
+svuint32_t svmmla_u32(svuint32_t, svuint8_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusmmla_s32)))
+svint32_t svusmmla_s32(svint32_t, svuint8_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_s32)))
+svint32_t svmmla(svint32_t, svint8_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_u32)))
+svuint32_t svmmla(svuint32_t, svuint8_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusmmla_s32)))
+svint32_t svusmmla(svint32_t, svuint8_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsudot_n_s32)))
+svint32_t svsudot_n_s32(svint32_t, svint8_t, uint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsudot_s32)))
+svint32_t svsudot_s32(svint32_t, svint8_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsudot_lane_s32)))
+svint32_t svsudot_lane_s32(svint32_t, svint8_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusdot_n_s32)))
+svint32_t svusdot_n_s32(svint32_t, svuint8_t, int8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusdot_s32)))
+svint32_t svusdot_s32(svint32_t, svuint8_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusdot_lane_s32)))
+svint32_t svusdot_lane_s32(svint32_t, svuint8_t, svint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsudot_n_s32)))
+svint32_t svsudot(svint32_t, svint8_t, uint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsudot_s32)))
+svint32_t svsudot(svint32_t, svint8_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsudot_lane_s32)))
+svint32_t svsudot_lane(svint32_t, svint8_t, svuint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusdot_n_s32)))
+svint32_t svusdot(svint32_t, svuint8_t, int8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusdot_s32)))
+svint32_t svusdot(svint32_t, svuint8_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusdot_lane_s32)))
+svint32_t svusdot_lane(svint32_t, svuint8_t, svint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesd_lane_u8_x2)))
+svuint8x2_t svaesd_lane_u8_x2(svuint8x2_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesd_lane_u8_x4)))
+svuint8x4_t svaesd_lane_u8_x4(svuint8x4_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesdimc_lane_u8_x2)))
+svuint8x2_t svaesdimc_lane_u8_x2(svuint8x2_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesdimc_lane_u8_x4)))
+svuint8x4_t svaesdimc_lane_u8_x4(svuint8x4_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaese_lane_u8_x2)))
+svuint8x2_t svaese_lane_u8_x2(svuint8x2_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaese_lane_u8_x4)))
+svuint8x4_t svaese_lane_u8_x4(svuint8x4_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesemc_lane_u8_x2)))
+svuint8x2_t svaesemc_lane_u8_x2(svuint8x2_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesemc_lane_u8_x4)))
+svuint8x4_t svaesemc_lane_u8_x4(svuint8x4_t, svuint8_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmlal_pair_n_u64_x2)))
+svuint64x2_t svpmlal_pair_n_u64_x2(svuint64x2_t, svuint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmlal_pair_u64_x2)))
+svuint64x2_t svpmlal_pair_u64_x2(svuint64x2_t, svuint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmull_pair_n_u64_x2)))
+svuint64x2_t svpmull_pair_n_u64_x2(svuint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmull_pair_u64_x2)))
+svuint64x2_t svpmull_pair_u64_x2(svuint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesd_lane_u8_x2)))
+svuint8x2_t svaesd_lane(svuint8x2_t, svuint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesd_lane_u8_x4)))
+svuint8x4_t svaesd_lane(svuint8x4_t, svuint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesdimc_lane_u8_x2)))
+svuint8x2_t svaesdimc_lane(svuint8x2_t, svuint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesdimc_lane_u8_x4)))
+svuint8x4_t svaesdimc_lane(svuint8x4_t, svuint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaese_lane_u8_x2)))
+svuint8x2_t svaese_lane(svuint8x2_t, svuint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaese_lane_u8_x4)))
+svuint8x4_t svaese_lane(svuint8x4_t, svuint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesemc_lane_u8_x2)))
+svuint8x2_t svaesemc_lane(svuint8x2_t, svuint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesemc_lane_u8_x4)))
+svuint8x4_t svaesemc_lane(svuint8x4_t, svuint8_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmlal_pair_n_u64_x2)))
+svuint64x2_t svpmlal_pair(svuint64x2_t, svuint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmlal_pair_u64_x2)))
+svuint64x2_t svpmlal_pair(svuint64x2_t, svuint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmull_pair_n_u64_x2)))
+svuint64x2_t svpmull_pair(svuint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmull_pair_u64_x2)))
+svuint64x2_t svpmull_pair(svuint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesd_u8)))
+svuint8_t svaesd_u8(svuint8_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaese_u8)))
+svuint8_t svaese_u8(svuint8_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesimc_u8)))
+svuint8_t svaesimc_u8(svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesmc_u8)))
+svuint8_t svaesmc_u8(svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_pair_n_u64)))
+svuint64_t svpmullb_pair_n_u64(svuint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_pair_u64)))
+svuint64_t svpmullb_pair_u64(svuint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_pair_n_u64)))
+svuint64_t svpmullt_pair_n_u64(svuint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_pair_u64)))
+svuint64_t svpmullt_pair_u64(svuint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesd_u8)))
+svuint8_t svaesd(svuint8_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaese_u8)))
+svuint8_t svaese(svuint8_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesimc_u8)))
+svuint8_t svaesimc(svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesmc_u8)))
+svuint8_t svaesmc(svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_pair_n_u64)))
+svuint64_t svpmullb_pair(svuint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_pair_u64)))
+svuint64_t svpmullb_pair(svuint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_pair_n_u64)))
+svuint64_t svpmullt_pair(svuint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_pair_u64)))
+svuint64_t svpmullt_pair(svuint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_bf16_m)))
+svbfloat16_t svadd_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_bf16_x)))
+svbfloat16_t svadd_n_bf16_x(svbool_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_bf16_z)))
+svbfloat16_t svadd_n_bf16_z(svbool_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_bf16_m)))
+svbfloat16_t svadd_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_bf16_x)))
+svbfloat16_t svadd_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_bf16_z)))
+svbfloat16_t svadd_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_bf16)))
+svbfloat16_t svclamp_bf16(svbfloat16_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_bf16_m)))
+svbfloat16_t svmax_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_bf16_x)))
+svbfloat16_t svmax_n_bf16_x(svbool_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_bf16_z)))
+svbfloat16_t svmax_n_bf16_z(svbool_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_bf16_m)))
+svbfloat16_t svmax_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_bf16_x)))
+svbfloat16_t svmax_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_bf16_z)))
+svbfloat16_t svmax_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_bf16_m)))
+svbfloat16_t svmaxnm_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_bf16_x)))
+svbfloat16_t svmaxnm_n_bf16_x(svbool_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_bf16_z)))
+svbfloat16_t svmaxnm_n_bf16_z(svbool_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_bf16_m)))
+svbfloat16_t svmaxnm_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_bf16_x)))
+svbfloat16_t svmaxnm_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_bf16_z)))
+svbfloat16_t svmaxnm_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_bf16_m)))
+svbfloat16_t svmin_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_bf16_x)))
+svbfloat16_t svmin_n_bf16_x(svbool_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_bf16_z)))
+svbfloat16_t svmin_n_bf16_z(svbool_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_bf16_m)))
+svbfloat16_t svmin_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_bf16_x)))
+svbfloat16_t svmin_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_bf16_z)))
+svbfloat16_t svmin_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_bf16_m)))
+svbfloat16_t svminnm_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_bf16_x)))
+svbfloat16_t svminnm_n_bf16_x(svbool_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_bf16_z)))
+svbfloat16_t svminnm_n_bf16_z(svbool_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_bf16_m)))
+svbfloat16_t svminnm_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_bf16_x)))
+svbfloat16_t svminnm_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_bf16_z)))
+svbfloat16_t svminnm_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_bf16_m)))
+svbfloat16_t svmla_n_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_bf16_x)))
+svbfloat16_t svmla_n_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_bf16_z)))
+svbfloat16_t svmla_n_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_bf16_m)))
+svbfloat16_t svmla_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_bf16_x)))
+svbfloat16_t svmla_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_bf16_z)))
+svbfloat16_t svmla_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_bf16)))
+svbfloat16_t svmla_lane_bf16(svbfloat16_t, svbfloat16_t, svbfloat16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_bf16_m)))
+svbfloat16_t svmls_n_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_bf16_x)))
+svbfloat16_t svmls_n_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_bf16_z)))
+svbfloat16_t svmls_n_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_bf16_m)))
+svbfloat16_t svmls_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_bf16_x)))
+svbfloat16_t svmls_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_bf16_z)))
+svbfloat16_t svmls_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_bf16)))
+svbfloat16_t svmls_lane_bf16(svbfloat16_t, svbfloat16_t, svbfloat16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_bf16_m)))
+svbfloat16_t svmul_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_bf16_x)))
+svbfloat16_t svmul_n_bf16_x(svbool_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_bf16_z)))
+svbfloat16_t svmul_n_bf16_z(svbool_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_m)))
+svbfloat16_t svmul_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_x)))
+svbfloat16_t svmul_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_z)))
+svbfloat16_t svmul_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_bf16)))
+svbfloat16_t svmul_lane_bf16(svbfloat16_t, svbfloat16_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_bf16_m)))
+svbfloat16_t svsub_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_bf16_x)))
+svbfloat16_t svsub_n_bf16_x(svbool_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_bf16_z)))
+svbfloat16_t svsub_n_bf16_z(svbool_t, svbfloat16_t, bfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_bf16_m)))
+svbfloat16_t svsub_bf16_m(svbool_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_bf16_x)))
+svbfloat16_t svsub_bf16_x(svbool_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_bf16_z)))
+svbfloat16_t svsub_bf16_z(svbool_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_bf16_m)))
+svbfloat16_t svadd_m(svbool_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_bf16_x)))
+svbfloat16_t svadd_x(svbool_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_bf16_z)))
+svbfloat16_t svadd_z(svbool_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_bf16_m)))
+svbfloat16_t svadd_m(svbool_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_bf16_x)))
+svbfloat16_t svadd_x(svbool_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_bf16_z)))
+svbfloat16_t svadd_z(svbool_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_bf16)))
+svbfloat16_t svclamp(svbfloat16_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_bf16_m)))
+svbfloat16_t svmax_m(svbool_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_bf16_x)))
+svbfloat16_t svmax_x(svbool_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_n_bf16_z)))
+svbfloat16_t svmax_z(svbool_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_bf16_m)))
+svbfloat16_t svmax_m(svbool_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_bf16_x)))
+svbfloat16_t svmax_x(svbool_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmax_bf16_z)))
+svbfloat16_t svmax_z(svbool_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_bf16_m)))
+svbfloat16_t svmaxnm_m(svbool_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_bf16_x)))
+svbfloat16_t svmaxnm_x(svbool_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_n_bf16_z)))
+svbfloat16_t svmaxnm_z(svbool_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_bf16_m)))
+svbfloat16_t svmaxnm_m(svbool_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_bf16_x)))
+svbfloat16_t svmaxnm_x(svbool_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnm_bf16_z)))
+svbfloat16_t svmaxnm_z(svbool_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_bf16_m)))
+svbfloat16_t svmin_m(svbool_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_bf16_x)))
+svbfloat16_t svmin_x(svbool_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_n_bf16_z)))
+svbfloat16_t svmin_z(svbool_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_bf16_m)))
+svbfloat16_t svmin_m(svbool_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_bf16_x)))
+svbfloat16_t svmin_x(svbool_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmin_bf16_z)))
+svbfloat16_t svmin_z(svbool_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_bf16_m)))
+svbfloat16_t svminnm_m(svbool_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_bf16_x)))
+svbfloat16_t svminnm_x(svbool_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_n_bf16_z)))
+svbfloat16_t svminnm_z(svbool_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_bf16_m)))
+svbfloat16_t svminnm_m(svbool_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_bf16_x)))
+svbfloat16_t svminnm_x(svbool_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_bf16_z)))
+svbfloat16_t svminnm_z(svbool_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_bf16_m)))
+svbfloat16_t svmla_m(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_bf16_x)))
+svbfloat16_t svmla_x(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_n_bf16_z)))
+svbfloat16_t svmla_z(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_bf16_m)))
+svbfloat16_t svmla_m(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_bf16_x)))
+svbfloat16_t svmla_x(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_bf16_z)))
+svbfloat16_t svmla_z(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmla_lane_bf16)))
+svbfloat16_t svmla_lane(svbfloat16_t, svbfloat16_t, svbfloat16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_bf16_m)))
+svbfloat16_t svmls_m(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_bf16_x)))
+svbfloat16_t svmls_x(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_n_bf16_z)))
+svbfloat16_t svmls_z(svbool_t, svbfloat16_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_bf16_m)))
+svbfloat16_t svmls_m(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_bf16_x)))
+svbfloat16_t svmls_x(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_bf16_z)))
+svbfloat16_t svmls_z(svbool_t, svbfloat16_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmls_lane_bf16)))
+svbfloat16_t svmls_lane(svbfloat16_t, svbfloat16_t, svbfloat16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_bf16_m)))
+svbfloat16_t svmul_m(svbool_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_bf16_x)))
+svbfloat16_t svmul_x(svbool_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_n_bf16_z)))
+svbfloat16_t svmul_z(svbool_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_m)))
+svbfloat16_t svmul_m(svbool_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_x)))
+svbfloat16_t svmul_x(svbool_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_bf16_z)))
+svbfloat16_t svmul_z(svbool_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmul_lane_bf16)))
+svbfloat16_t svmul_lane(svbfloat16_t, svbfloat16_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_bf16_m)))
+svbfloat16_t svsub_m(svbool_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_bf16_x)))
+svbfloat16_t svsub_x(svbool_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_bf16_z)))
+svbfloat16_t svsub_z(svbool_t, svbfloat16_t, bfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_bf16_m)))
+svbfloat16_t svsub_m(svbool_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_bf16_x)))
+svbfloat16_t svsub_x(svbool_t, svbfloat16_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_bf16_z)))
+svbfloat16_t svsub_z(svbool_t, svbfloat16_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_n_bf16_m)))
+svbfloat16_t svscale_n_bf16_m(svbool_t, svbfloat16_t, int16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_n_bf16_x)))
+svbfloat16_t svscale_n_bf16_x(svbool_t, svbfloat16_t, int16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_n_bf16_z)))
+svbfloat16_t svscale_n_bf16_z(svbool_t, svbfloat16_t, int16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_bf16_m)))
+svbfloat16_t svscale_bf16_m(svbool_t, svbfloat16_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_bf16_x)))
+svbfloat16_t svscale_bf16_x(svbool_t, svbfloat16_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_bf16_z)))
+svbfloat16_t svscale_bf16_z(svbool_t, svbfloat16_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_n_bf16_m)))
+svbfloat16_t svscale_m(svbool_t, svbfloat16_t, int16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_n_bf16_x)))
+svbfloat16_t svscale_x(svbool_t, svbfloat16_t, int16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_n_bf16_z)))
+svbfloat16_t svscale_z(svbool_t, svbfloat16_t, int16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_bf16_m)))
+svbfloat16_t svscale_m(svbool_t, svbfloat16_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_bf16_x)))
+svbfloat16_t svscale_x(svbool_t, svbfloat16_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_bf16_z)))
+svbfloat16_t svscale_z(svbool_t, svbfloat16_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u8)))
+svuint8_t svbdep_n_u8(svuint8_t, uint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u32)))
+svuint32_t svbdep_n_u32(svuint32_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u64)))
+svuint64_t svbdep_n_u64(svuint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u16)))
+svuint16_t svbdep_n_u16(svuint16_t, uint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_u8)))
+svuint8_t svbdep_u8(svuint8_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_u32)))
+svuint32_t svbdep_u32(svuint32_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_u64)))
+svuint64_t svbdep_u64(svuint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_u16)))
+svuint16_t svbdep_u16(svuint16_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_n_u8)))
+svuint8_t svbext_n_u8(svuint8_t, uint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_n_u32)))
+svuint32_t svbext_n_u32(svuint32_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_n_u64)))
+svuint64_t svbext_n_u64(svuint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_n_u16)))
+svuint16_t svbext_n_u16(svuint16_t, uint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_u8)))
+svuint8_t svbext_u8(svuint8_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_u32)))
+svuint32_t svbext_u32(svuint32_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_u64)))
+svuint64_t svbext_u64(svuint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_u16)))
+svuint16_t svbext_u16(svuint16_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_n_u8)))
+svuint8_t svbgrp_n_u8(svuint8_t, uint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_n_u32)))
+svuint32_t svbgrp_n_u32(svuint32_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_n_u64)))
+svuint64_t svbgrp_n_u64(svuint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_n_u16)))
+svuint16_t svbgrp_n_u16(svuint16_t, uint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u8)))
+svuint8_t svbgrp_u8(svuint8_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u32)))
+svuint32_t svbgrp_u32(svuint32_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u64)))
+svuint64_t svbgrp_u64(svuint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u16)))
+svuint16_t svbgrp_u16(svuint16_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u8)))
+svuint8_t svbdep(svuint8_t, uint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u32)))
+svuint32_t svbdep(svuint32_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u64)))
+svuint64_t svbdep(svuint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u16)))
+svuint16_t svbdep(svuint16_t, uint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_u8)))
+svuint8_t svbdep(svuint8_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_u32)))
+svuint32_t svbdep(svuint32_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_u64)))
+svuint64_t svbdep(svuint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_u16)))
+svuint16_t svbdep(svuint16_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_n_u8)))
+svuint8_t svbext(svuint8_t, uint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_n_u32)))
+svuint32_t svbext(svuint32_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_n_u64)))
+svuint64_t svbext(svuint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_n_u16)))
+svuint16_t svbext(svuint16_t, uint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_u8)))
+svuint8_t svbext(svuint8_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_u32)))
+svuint32_t svbext(svuint32_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_u64)))
+svuint64_t svbext(svuint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbext_u16)))
+svuint16_t svbext(svuint16_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_n_u8)))
+svuint8_t svbgrp(svuint8_t, uint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_n_u32)))
+svuint32_t svbgrp(svuint32_t, uint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_n_u64)))
+svuint64_t svbgrp(svuint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_n_u16)))
+svuint16_t svbgrp(svuint16_t, uint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u8)))
+svuint8_t svbgrp(svuint8_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u32)))
+svuint32_t svbgrp(svuint32_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u64)))
+svuint64_t svbgrp(svuint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u16)))
+svuint16_t svbgrp(svuint16_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_f32_f16)))
+svfloat32_t svmmla_f32_f16(svfloat32_t, svfloat16_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_f32_f16)))
+svfloat32_t svmmla(svfloat32_t, svfloat16_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrax1_u64)))
+svuint64_t svrax1_u64(svuint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrax1_s64)))
+svint64_t svrax1_s64(svint64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrax1_u64)))
+svuint64_t svrax1(svuint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrax1_s64)))
+svint64_t svrax1(svint64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsm4e_u32)))
+svuint32_t svsm4e_u32(svuint32_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsm4ekey_u32)))
+svuint32_t svsm4ekey_u32(svuint32_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsm4e_u32)))
+svuint32_t svsm4e(svuint32_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsm4ekey_u32)))
+svuint32_t svsm4ekey(svuint32_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_u32_z)))
+svuint32_t svhistcnt_u32_z(svbool_t, svuint32_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_u64_z)))
+svuint64_t svhistcnt_u64_z(svbool_t, svuint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_s32_z)))
+svuint32_t svhistcnt_s32_z(svbool_t, svint32_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_s64_z)))
+svuint64_t svhistcnt_s64_z(svbool_t, svint64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistseg_u8)))
+svuint8_t svhistseg_u8(svuint8_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistseg_s8)))
+svuint8_t svhistseg_s8(svint8_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_index_u32)))
+svuint32_t svldnt1_gather_u32base_index_u32(svbool_t, svuint32_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_index_u64)))
+svuint64_t svldnt1_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_index_f64)))
+svfloat64_t svldnt1_gather_u64base_index_f64(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_index_f32)))
+svfloat32_t svldnt1_gather_u32base_index_f32(svbool_t, svuint32_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_index_s32)))
+svint32_t svldnt1_gather_u32base_index_s32(svbool_t, svuint32_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_index_s64)))
+svint64_t svldnt1_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_offset_u32)))
+svuint32_t svldnt1_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_offset_u64)))
+svuint64_t svldnt1_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_offset_f64)))
+svfloat64_t svldnt1_gather_u64base_offset_f64(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_offset_f32)))
+svfloat32_t svldnt1_gather_u32base_offset_f32(svbool_t, svuint32_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_offset_s32)))
+svint32_t svldnt1_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_offset_s64)))
+svint64_t svldnt1_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_u32)))
+svuint32_t svldnt1_gather_u32base_u32(svbool_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_u64)))
+svuint64_t svldnt1_gather_u64base_u64(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_f64)))
+svfloat64_t svldnt1_gather_u64base_f64(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_f32)))
+svfloat32_t svldnt1_gather_u32base_f32(svbool_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_s32)))
+svint32_t svldnt1_gather_u32base_s32(svbool_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_s64)))
+svint64_t svldnt1_gather_u64base_s64(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64index_u64)))
+svuint64_t svldnt1_gather_s64index_u64(svbool_t, uint64_t const *, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64index_f64)))
+svfloat64_t svldnt1_gather_s64index_f64(svbool_t, float64_t const *, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64index_s64)))
+svint64_t svldnt1_gather_s64index_s64(svbool_t, int64_t const *, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64index_u64)))
+svuint64_t svldnt1_gather_u64index_u64(svbool_t, uint64_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64index_f64)))
+svfloat64_t svldnt1_gather_u64index_f64(svbool_t, float64_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64index_s64)))
+svint64_t svldnt1_gather_u64index_s64(svbool_t, int64_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32offset_u32)))
+svuint32_t svldnt1_gather_u32offset_u32(svbool_t, uint32_t const *, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32offset_f32)))
+svfloat32_t svldnt1_gather_u32offset_f32(svbool_t, float32_t const *, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32offset_s32)))
+svint32_t svldnt1_gather_u32offset_s32(svbool_t, int32_t const *, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64offset_u64)))
+svuint64_t svldnt1_gather_s64offset_u64(svbool_t, uint64_t const *, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64offset_f64)))
+svfloat64_t svldnt1_gather_s64offset_f64(svbool_t, float64_t const *, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64offset_s64)))
+svint64_t svldnt1_gather_s64offset_s64(svbool_t, int64_t const *, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64offset_u64)))
+svuint64_t svldnt1_gather_u64offset_u64(svbool_t, uint64_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64offset_f64)))
+svfloat64_t svldnt1_gather_u64offset_f64(svbool_t, float64_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64offset_s64)))
+svint64_t svldnt1_gather_u64offset_s64(svbool_t, int64_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32base_offset_u32)))
+svuint32_t svldnt1sb_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64base_offset_u64)))
+svuint64_t svldnt1sb_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32base_offset_s32)))
+svint32_t svldnt1sb_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64base_offset_s64)))
+svint64_t svldnt1sb_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32base_u32)))
+svuint32_t svldnt1sb_gather_u32base_u32(svbool_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64base_u64)))
+svuint64_t svldnt1sb_gather_u64base_u64(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32base_s32)))
+svint32_t svldnt1sb_gather_u32base_s32(svbool_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64base_s64)))
+svint64_t svldnt1sb_gather_u64base_s64(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32offset_u32)))
+svuint32_t svldnt1sb_gather_u32offset_u32(svbool_t, int8_t const *, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32offset_s32)))
+svint32_t svldnt1sb_gather_u32offset_s32(svbool_t, int8_t const *, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_s64offset_u64)))
+svuint64_t svldnt1sb_gather_s64offset_u64(svbool_t, int8_t const *, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_s64offset_s64)))
+svint64_t svldnt1sb_gather_s64offset_s64(svbool_t, int8_t const *, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64offset_u64)))
+svuint64_t svldnt1sb_gather_u64offset_u64(svbool_t, int8_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64offset_s64)))
+svint64_t svldnt1sb_gather_u64offset_s64(svbool_t, int8_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_index_u32)))
+svuint32_t svldnt1sh_gather_u32base_index_u32(svbool_t, svuint32_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_index_u64)))
+svuint64_t svldnt1sh_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_index_s32)))
+svint32_t svldnt1sh_gather_u32base_index_s32(svbool_t, svuint32_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_index_s64)))
+svint64_t svldnt1sh_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_offset_u32)))
+svuint32_t svldnt1sh_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_offset_u64)))
+svuint64_t svldnt1sh_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_offset_s32)))
+svint32_t svldnt1sh_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_offset_s64)))
+svint64_t svldnt1sh_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_u32)))
+svuint32_t svldnt1sh_gather_u32base_u32(svbool_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_u64)))
+svuint64_t svldnt1sh_gather_u64base_u64(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_s32)))
+svint32_t svldnt1sh_gather_u32base_s32(svbool_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_s64)))
+svint64_t svldnt1sh_gather_u64base_s64(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_s64index_u64)))
+svuint64_t svldnt1sh_gather_s64index_u64(svbool_t, int16_t const *, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_s64index_s64)))
+svint64_t svldnt1sh_gather_s64index_s64(svbool_t, int16_t const *, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64index_u64)))
+svuint64_t svldnt1sh_gather_u64index_u64(svbool_t, int16_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64index_s64)))
+svint64_t svldnt1sh_gather_u64index_s64(svbool_t, int16_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32offset_u32)))
+svuint32_t svldnt1sh_gather_u32offset_u32(svbool_t, int16_t const *, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32offset_s32)))
+svint32_t svldnt1sh_gather_u32offset_s32(svbool_t, int16_t const *, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_s64offset_u64)))
+svuint64_t svldnt1sh_gather_s64offset_u64(svbool_t, int16_t const *, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_s64offset_s64)))
+svint64_t svldnt1sh_gather_s64offset_s64(svbool_t, int16_t const *, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64offset_u64)))
+svuint64_t svldnt1sh_gather_u64offset_u64(svbool_t, int16_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64offset_s64)))
+svint64_t svldnt1sh_gather_u64offset_s64(svbool_t, int16_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_index_u64)))
+svuint64_t svldnt1sw_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_index_s64)))
+svint64_t svldnt1sw_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_offset_u64)))
+svuint64_t svldnt1sw_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_offset_s64)))
+svint64_t svldnt1sw_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_u64)))
+svuint64_t svldnt1sw_gather_u64base_u64(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_s64)))
+svint64_t svldnt1sw_gather_u64base_s64(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_s64index_u64)))
+svuint64_t svldnt1sw_gather_s64index_u64(svbool_t, int32_t const *, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_s64index_s64)))
+svint64_t svldnt1sw_gather_s64index_s64(svbool_t, int32_t const *, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64index_u64)))
+svuint64_t svldnt1sw_gather_u64index_u64(svbool_t, int32_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64index_s64)))
+svint64_t svldnt1sw_gather_u64index_s64(svbool_t, int32_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_s64offset_u64)))
+svuint64_t svldnt1sw_gather_s64offset_u64(svbool_t, int32_t const *, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_s64offset_s64)))
+svint64_t svldnt1sw_gather_s64offset_s64(svbool_t, int32_t const *, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64offset_u64)))
+svuint64_t svldnt1sw_gather_u64offset_u64(svbool_t, int32_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64offset_s64)))
+svint64_t svldnt1sw_gather_u64offset_s64(svbool_t, int32_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32base_offset_u32)))
+svuint32_t svldnt1ub_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64base_offset_u64)))
+svuint64_t svldnt1ub_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32base_offset_s32)))
+svint32_t svldnt1ub_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64base_offset_s64)))
+svint64_t svldnt1ub_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32base_u32)))
+svuint32_t svldnt1ub_gather_u32base_u32(svbool_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64base_u64)))
+svuint64_t svldnt1ub_gather_u64base_u64(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32base_s32)))
+svint32_t svldnt1ub_gather_u32base_s32(svbool_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64base_s64)))
+svint64_t svldnt1ub_gather_u64base_s64(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32offset_u32)))
+svuint32_t svldnt1ub_gather_u32offset_u32(svbool_t, uint8_t const *, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32offset_s32)))
+svint32_t svldnt1ub_gather_u32offset_s32(svbool_t, uint8_t const *, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_s64offset_u64)))
+svuint64_t svldnt1ub_gather_s64offset_u64(svbool_t, uint8_t const *, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_s64offset_s64)))
+svint64_t svldnt1ub_gather_s64offset_s64(svbool_t, uint8_t const *, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64offset_u64)))
+svuint64_t svldnt1ub_gather_u64offset_u64(svbool_t, uint8_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64offset_s64)))
+svint64_t svldnt1ub_gather_u64offset_s64(svbool_t, uint8_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_index_u32)))
+svuint32_t svldnt1uh_gather_u32base_index_u32(svbool_t, svuint32_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_index_u64)))
+svuint64_t svldnt1uh_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_index_s32)))
+svint32_t svldnt1uh_gather_u32base_index_s32(svbool_t, svuint32_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_index_s64)))
+svint64_t svldnt1uh_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_offset_u32)))
+svuint32_t svldnt1uh_gather_u32base_offset_u32(svbool_t, svuint32_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_offset_u64)))
+svuint64_t svldnt1uh_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_offset_s32)))
+svint32_t svldnt1uh_gather_u32base_offset_s32(svbool_t, svuint32_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_offset_s64)))
+svint64_t svldnt1uh_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_u32)))
+svuint32_t svldnt1uh_gather_u32base_u32(svbool_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_u64)))
+svuint64_t svldnt1uh_gather_u64base_u64(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_s32)))
+svint32_t svldnt1uh_gather_u32base_s32(svbool_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_s64)))
+svint64_t svldnt1uh_gather_u64base_s64(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_s64index_u64)))
+svuint64_t svldnt1uh_gather_s64index_u64(svbool_t, uint16_t const *, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_s64index_s64)))
+svint64_t svldnt1uh_gather_s64index_s64(svbool_t, uint16_t const *, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64index_u64)))
+svuint64_t svldnt1uh_gather_u64index_u64(svbool_t, uint16_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64index_s64)))
+svint64_t svldnt1uh_gather_u64index_s64(svbool_t, uint16_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32offset_u32)))
+svuint32_t svldnt1uh_gather_u32offset_u32(svbool_t, uint16_t const *, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32offset_s32)))
+svint32_t svldnt1uh_gather_u32offset_s32(svbool_t, uint16_t const *, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_s64offset_u64)))
+svuint64_t svldnt1uh_gather_s64offset_u64(svbool_t, uint16_t const *, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_s64offset_s64)))
+svint64_t svldnt1uh_gather_s64offset_s64(svbool_t, uint16_t const *, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64offset_u64)))
+svuint64_t svldnt1uh_gather_u64offset_u64(svbool_t, uint16_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64offset_s64)))
+svint64_t svldnt1uh_gather_u64offset_s64(svbool_t, uint16_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_index_u64)))
+svuint64_t svldnt1uw_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_index_s64)))
+svint64_t svldnt1uw_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_offset_u64)))
+svuint64_t svldnt1uw_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_offset_s64)))
+svint64_t svldnt1uw_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_u64)))
+svuint64_t svldnt1uw_gather_u64base_u64(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_s64)))
+svint64_t svldnt1uw_gather_u64base_s64(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_s64index_u64)))
+svuint64_t svldnt1uw_gather_s64index_u64(svbool_t, uint32_t const *, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_s64index_s64)))
+svint64_t svldnt1uw_gather_s64index_s64(svbool_t, uint32_t const *, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64index_u64)))
+svuint64_t svldnt1uw_gather_u64index_u64(svbool_t, uint32_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64index_s64)))
+svint64_t svldnt1uw_gather_u64index_s64(svbool_t, uint32_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_s64offset_u64)))
+svuint64_t svldnt1uw_gather_s64offset_u64(svbool_t, uint32_t const *, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_s64offset_s64)))
+svint64_t svldnt1uw_gather_s64offset_s64(svbool_t, uint32_t const *, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64offset_u64)))
+svuint64_t svldnt1uw_gather_u64offset_u64(svbool_t, uint32_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64offset_s64)))
+svint64_t svldnt1uw_gather_u64offset_s64(svbool_t, uint32_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmatch_u8)))
+svbool_t svmatch_u8(svbool_t, svuint8_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmatch_u16)))
+svbool_t svmatch_u16(svbool_t, svuint16_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmatch_s8)))
+svbool_t svmatch_s8(svbool_t, svint8_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmatch_s16)))
+svbool_t svmatch_s16(svbool_t, svint16_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmatch_u8)))
+svbool_t svnmatch_u8(svbool_t, svuint8_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmatch_u16)))
+svbool_t svnmatch_u16(svbool_t, svuint16_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmatch_s8)))
+svbool_t svnmatch_s8(svbool_t, svint8_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmatch_s16)))
+svbool_t svnmatch_s16(svbool_t, svint16_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_index_u32)))
+void svstnt1_scatter_u32base_index_u32(svbool_t, svuint32_t, int64_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_index_u64)))
+void svstnt1_scatter_u64base_index_u64(svbool_t, svuint64_t, int64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_index_f64)))
+void svstnt1_scatter_u64base_index_f64(svbool_t, svuint64_t, int64_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_index_f32)))
+void svstnt1_scatter_u32base_index_f32(svbool_t, svuint32_t, int64_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_index_s32)))
+void svstnt1_scatter_u32base_index_s32(svbool_t, svuint32_t, int64_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_index_s64)))
+void svstnt1_scatter_u64base_index_s64(svbool_t, svuint64_t, int64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_offset_u32)))
+void svstnt1_scatter_u32base_offset_u32(svbool_t, svuint32_t, int64_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_offset_u64)))
+void svstnt1_scatter_u64base_offset_u64(svbool_t, svuint64_t, int64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_offset_f64)))
+void svstnt1_scatter_u64base_offset_f64(svbool_t, svuint64_t, int64_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_offset_f32)))
+void svstnt1_scatter_u32base_offset_f32(svbool_t, svuint32_t, int64_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_offset_s32)))
+void svstnt1_scatter_u32base_offset_s32(svbool_t, svuint32_t, int64_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_offset_s64)))
+void svstnt1_scatter_u64base_offset_s64(svbool_t, svuint64_t, int64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_u32)))
+void svstnt1_scatter_u32base_u32(svbool_t, svuint32_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_u64)))
+void svstnt1_scatter_u64base_u64(svbool_t, svuint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_f64)))
+void svstnt1_scatter_u64base_f64(svbool_t, svuint64_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_f32)))
+void svstnt1_scatter_u32base_f32(svbool_t, svuint32_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_s32)))
+void svstnt1_scatter_u32base_s32(svbool_t, svuint32_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_s64)))
+void svstnt1_scatter_u64base_s64(svbool_t, svuint64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64index_u64)))
+void svstnt1_scatter_s64index_u64(svbool_t, uint64_t *, svint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64index_f64)))
+void svstnt1_scatter_s64index_f64(svbool_t, float64_t *, svint64_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64index_s64)))
+void svstnt1_scatter_s64index_s64(svbool_t, int64_t *, svint64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64index_u64)))
+void svstnt1_scatter_u64index_u64(svbool_t, uint64_t *, svuint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64index_f64)))
+void svstnt1_scatter_u64index_f64(svbool_t, float64_t *, svuint64_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64index_s64)))
+void svstnt1_scatter_u64index_s64(svbool_t, int64_t *, svuint64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32offset_u32)))
+void svstnt1_scatter_u32offset_u32(svbool_t, uint32_t *, svuint32_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32offset_f32)))
+void svstnt1_scatter_u32offset_f32(svbool_t, float32_t *, svuint32_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32offset_s32)))
+void svstnt1_scatter_u32offset_s32(svbool_t, int32_t *, svuint32_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64offset_u64)))
+void svstnt1_scatter_s64offset_u64(svbool_t, uint64_t *, svint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64offset_f64)))
+void svstnt1_scatter_s64offset_f64(svbool_t, float64_t *, svint64_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64offset_s64)))
+void svstnt1_scatter_s64offset_s64(svbool_t, int64_t *, svint64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64offset_u64)))
+void svstnt1_scatter_u64offset_u64(svbool_t, uint64_t *, svuint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64offset_f64)))
+void svstnt1_scatter_u64offset_f64(svbool_t, float64_t *, svuint64_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64offset_s64)))
+void svstnt1_scatter_u64offset_s64(svbool_t, int64_t *, svuint64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32base_offset_u32)))
+void svstnt1b_scatter_u32base_offset_u32(svbool_t, svuint32_t, int64_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64base_offset_u64)))
+void svstnt1b_scatter_u64base_offset_u64(svbool_t, svuint64_t, int64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32base_offset_s32)))
+void svstnt1b_scatter_u32base_offset_s32(svbool_t, svuint32_t, int64_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64base_offset_s64)))
+void svstnt1b_scatter_u64base_offset_s64(svbool_t, svuint64_t, int64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32base_u32)))
+void svstnt1b_scatter_u32base_u32(svbool_t, svuint32_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64base_u64)))
+void svstnt1b_scatter_u64base_u64(svbool_t, svuint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32base_s32)))
+void svstnt1b_scatter_u32base_s32(svbool_t, svuint32_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64base_s64)))
+void svstnt1b_scatter_u64base_s64(svbool_t, svuint64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32offset_s32)))
+void svstnt1b_scatter_u32offset_s32(svbool_t, int8_t *, svuint32_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32offset_u32)))
+void svstnt1b_scatter_u32offset_u32(svbool_t, uint8_t *, svuint32_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_s64offset_s64)))
+void svstnt1b_scatter_s64offset_s64(svbool_t, int8_t *, svint64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_s64offset_u64)))
+void svstnt1b_scatter_s64offset_u64(svbool_t, uint8_t *, svint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64offset_s64)))
+void svstnt1b_scatter_u64offset_s64(svbool_t, int8_t *, svuint64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64offset_u64)))
+void svstnt1b_scatter_u64offset_u64(svbool_t, uint8_t *, svuint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_index_u32)))
+void svstnt1h_scatter_u32base_index_u32(svbool_t, svuint32_t, int64_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_index_u64)))
+void svstnt1h_scatter_u64base_index_u64(svbool_t, svuint64_t, int64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_index_s32)))
+void svstnt1h_scatter_u32base_index_s32(svbool_t, svuint32_t, int64_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_index_s64)))
+void svstnt1h_scatter_u64base_index_s64(svbool_t, svuint64_t, int64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_offset_u32)))
+void svstnt1h_scatter_u32base_offset_u32(svbool_t, svuint32_t, int64_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_offset_u64)))
+void svstnt1h_scatter_u64base_offset_u64(svbool_t, svuint64_t, int64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_offset_s32)))
+void svstnt1h_scatter_u32base_offset_s32(svbool_t, svuint32_t, int64_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_offset_s64)))
+void svstnt1h_scatter_u64base_offset_s64(svbool_t, svuint64_t, int64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_u32)))
+void svstnt1h_scatter_u32base_u32(svbool_t, svuint32_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_u64)))
+void svstnt1h_scatter_u64base_u64(svbool_t, svuint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_s32)))
+void svstnt1h_scatter_u32base_s32(svbool_t, svuint32_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_s64)))
+void svstnt1h_scatter_u64base_s64(svbool_t, svuint64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_s64index_s64)))
+void svstnt1h_scatter_s64index_s64(svbool_t, int16_t *, svint64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_s64index_u64)))
+void svstnt1h_scatter_s64index_u64(svbool_t, uint16_t *, svint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64index_s64)))
+void svstnt1h_scatter_u64index_s64(svbool_t, int16_t *, svuint64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64index_u64)))
+void svstnt1h_scatter_u64index_u64(svbool_t, uint16_t *, svuint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32offset_s32)))
+void svstnt1h_scatter_u32offset_s32(svbool_t, int16_t *, svuint32_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32offset_u32)))
+void svstnt1h_scatter_u32offset_u32(svbool_t, uint16_t *, svuint32_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_s64offset_s64)))
+void svstnt1h_scatter_s64offset_s64(svbool_t, int16_t *, svint64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_s64offset_u64)))
+void svstnt1h_scatter_s64offset_u64(svbool_t, uint16_t *, svint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64offset_s64)))
+void svstnt1h_scatter_u64offset_s64(svbool_t, int16_t *, svuint64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64offset_u64)))
+void svstnt1h_scatter_u64offset_u64(svbool_t, uint16_t *, svuint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_index_u64)))
+void svstnt1w_scatter_u64base_index_u64(svbool_t, svuint64_t, int64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_index_s64)))
+void svstnt1w_scatter_u64base_index_s64(svbool_t, svuint64_t, int64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_offset_u64)))
+void svstnt1w_scatter_u64base_offset_u64(svbool_t, svuint64_t, int64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_offset_s64)))
+void svstnt1w_scatter_u64base_offset_s64(svbool_t, svuint64_t, int64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_u64)))
+void svstnt1w_scatter_u64base_u64(svbool_t, svuint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_s64)))
+void svstnt1w_scatter_u64base_s64(svbool_t, svuint64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_s64index_s64)))
+void svstnt1w_scatter_s64index_s64(svbool_t, int32_t *, svint64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_s64index_u64)))
+void svstnt1w_scatter_s64index_u64(svbool_t, uint32_t *, svint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64index_s64)))
+void svstnt1w_scatter_u64index_s64(svbool_t, int32_t *, svuint64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64index_u64)))
+void svstnt1w_scatter_u64index_u64(svbool_t, uint32_t *, svuint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_s64offset_s64)))
+void svstnt1w_scatter_s64offset_s64(svbool_t, int32_t *, svint64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_s64offset_u64)))
+void svstnt1w_scatter_s64offset_u64(svbool_t, uint32_t *, svint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64offset_s64)))
+void svstnt1w_scatter_u64offset_s64(svbool_t, int32_t *, svuint64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64offset_u64)))
+void svstnt1w_scatter_u64offset_u64(svbool_t, uint32_t *, svuint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_u32_z)))
+svuint32_t svhistcnt_z(svbool_t, svuint32_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_u64_z)))
+svuint64_t svhistcnt_z(svbool_t, svuint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_s32_z)))
+svuint32_t svhistcnt_z(svbool_t, svint32_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_s64_z)))
+svuint64_t svhistcnt_z(svbool_t, svint64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistseg_u8)))
+svuint8_t svhistseg(svuint8_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistseg_s8)))
+svuint8_t svhistseg(svint8_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_index_u32)))
+svuint32_t svldnt1_gather_index_u32(svbool_t, svuint32_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_index_u64)))
+svuint64_t svldnt1_gather_index_u64(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_index_f64)))
+svfloat64_t svldnt1_gather_index_f64(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_index_f32)))
+svfloat32_t svldnt1_gather_index_f32(svbool_t, svuint32_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_index_s32)))
+svint32_t svldnt1_gather_index_s32(svbool_t, svuint32_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_index_s64)))
+svint64_t svldnt1_gather_index_s64(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_offset_u32)))
+svuint32_t svldnt1_gather_offset_u32(svbool_t, svuint32_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_offset_u64)))
+svuint64_t svldnt1_gather_offset_u64(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_offset_f64)))
+svfloat64_t svldnt1_gather_offset_f64(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_offset_f32)))
+svfloat32_t svldnt1_gather_offset_f32(svbool_t, svuint32_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_offset_s32)))
+svint32_t svldnt1_gather_offset_s32(svbool_t, svuint32_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_offset_s64)))
+svint64_t svldnt1_gather_offset_s64(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_u32)))
+svuint32_t svldnt1_gather_u32(svbool_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_u64)))
+svuint64_t svldnt1_gather_u64(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_f64)))
+svfloat64_t svldnt1_gather_f64(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_f32)))
+svfloat32_t svldnt1_gather_f32(svbool_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32base_s32)))
+svint32_t svldnt1_gather_s32(svbool_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64base_s64)))
+svint64_t svldnt1_gather_s64(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64index_u64)))
+svuint64_t svldnt1_gather_index(svbool_t, uint64_t const *, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64index_f64)))
+svfloat64_t svldnt1_gather_index(svbool_t, float64_t const *, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64index_s64)))
+svint64_t svldnt1_gather_index(svbool_t, int64_t const *, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64index_u64)))
+svuint64_t svldnt1_gather_index(svbool_t, uint64_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64index_f64)))
+svfloat64_t svldnt1_gather_index(svbool_t, float64_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64index_s64)))
+svint64_t svldnt1_gather_index(svbool_t, int64_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32offset_u32)))
+svuint32_t svldnt1_gather_offset(svbool_t, uint32_t const *, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32offset_f32)))
+svfloat32_t svldnt1_gather_offset(svbool_t, float32_t const *, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u32offset_s32)))
+svint32_t svldnt1_gather_offset(svbool_t, int32_t const *, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64offset_u64)))
+svuint64_t svldnt1_gather_offset(svbool_t, uint64_t const *, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64offset_f64)))
+svfloat64_t svldnt1_gather_offset(svbool_t, float64_t const *, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_s64offset_s64)))
+svint64_t svldnt1_gather_offset(svbool_t, int64_t const *, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64offset_u64)))
+svuint64_t svldnt1_gather_offset(svbool_t, uint64_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64offset_f64)))
+svfloat64_t svldnt1_gather_offset(svbool_t, float64_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_gather_u64offset_s64)))
+svint64_t svldnt1_gather_offset(svbool_t, int64_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32base_offset_u32)))
+svuint32_t svldnt1sb_gather_offset_u32(svbool_t, svuint32_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64base_offset_u64)))
+svuint64_t svldnt1sb_gather_offset_u64(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32base_offset_s32)))
+svint32_t svldnt1sb_gather_offset_s32(svbool_t, svuint32_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64base_offset_s64)))
+svint64_t svldnt1sb_gather_offset_s64(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32base_u32)))
+svuint32_t svldnt1sb_gather_u32(svbool_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64base_u64)))
+svuint64_t svldnt1sb_gather_u64(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32base_s32)))
+svint32_t svldnt1sb_gather_s32(svbool_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64base_s64)))
+svint64_t svldnt1sb_gather_s64(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32offset_u32)))
+svuint32_t svldnt1sb_gather_offset_u32(svbool_t, int8_t const *, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u32offset_s32)))
+svint32_t svldnt1sb_gather_offset_s32(svbool_t, int8_t const *, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_s64offset_u64)))
+svuint64_t svldnt1sb_gather_offset_u64(svbool_t, int8_t const *, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_s64offset_s64)))
+svint64_t svldnt1sb_gather_offset_s64(svbool_t, int8_t const *, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64offset_u64)))
+svuint64_t svldnt1sb_gather_offset_u64(svbool_t, int8_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sb_gather_u64offset_s64)))
+svint64_t svldnt1sb_gather_offset_s64(svbool_t, int8_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_index_u32)))
+svuint32_t svldnt1sh_gather_index_u32(svbool_t, svuint32_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_index_u64)))
+svuint64_t svldnt1sh_gather_index_u64(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_index_s32)))
+svint32_t svldnt1sh_gather_index_s32(svbool_t, svuint32_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_index_s64)))
+svint64_t svldnt1sh_gather_index_s64(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_offset_u32)))
+svuint32_t svldnt1sh_gather_offset_u32(svbool_t, svuint32_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_offset_u64)))
+svuint64_t svldnt1sh_gather_offset_u64(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_offset_s32)))
+svint32_t svldnt1sh_gather_offset_s32(svbool_t, svuint32_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_offset_s64)))
+svint64_t svldnt1sh_gather_offset_s64(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_u32)))
+svuint32_t svldnt1sh_gather_u32(svbool_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_u64)))
+svuint64_t svldnt1sh_gather_u64(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32base_s32)))
+svint32_t svldnt1sh_gather_s32(svbool_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64base_s64)))
+svint64_t svldnt1sh_gather_s64(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_s64index_u64)))
+svuint64_t svldnt1sh_gather_index_u64(svbool_t, int16_t const *, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_s64index_s64)))
+svint64_t svldnt1sh_gather_index_s64(svbool_t, int16_t const *, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64index_u64)))
+svuint64_t svldnt1sh_gather_index_u64(svbool_t, int16_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64index_s64)))
+svint64_t svldnt1sh_gather_index_s64(svbool_t, int16_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32offset_u32)))
+svuint32_t svldnt1sh_gather_offset_u32(svbool_t, int16_t const *, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u32offset_s32)))
+svint32_t svldnt1sh_gather_offset_s32(svbool_t, int16_t const *, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_s64offset_u64)))
+svuint64_t svldnt1sh_gather_offset_u64(svbool_t, int16_t const *, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_s64offset_s64)))
+svint64_t svldnt1sh_gather_offset_s64(svbool_t, int16_t const *, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64offset_u64)))
+svuint64_t svldnt1sh_gather_offset_u64(svbool_t, int16_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sh_gather_u64offset_s64)))
+svint64_t svldnt1sh_gather_offset_s64(svbool_t, int16_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_index_u64)))
+svuint64_t svldnt1sw_gather_index_u64(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_index_s64)))
+svint64_t svldnt1sw_gather_index_s64(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_offset_u64)))
+svuint64_t svldnt1sw_gather_offset_u64(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_offset_s64)))
+svint64_t svldnt1sw_gather_offset_s64(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_u64)))
+svuint64_t svldnt1sw_gather_u64(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64base_s64)))
+svint64_t svldnt1sw_gather_s64(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_s64index_u64)))
+svuint64_t svldnt1sw_gather_index_u64(svbool_t, int32_t const *, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_s64index_s64)))
+svint64_t svldnt1sw_gather_index_s64(svbool_t, int32_t const *, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64index_u64)))
+svuint64_t svldnt1sw_gather_index_u64(svbool_t, int32_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64index_s64)))
+svint64_t svldnt1sw_gather_index_s64(svbool_t, int32_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_s64offset_u64)))
+svuint64_t svldnt1sw_gather_offset_u64(svbool_t, int32_t const *, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_s64offset_s64)))
+svint64_t svldnt1sw_gather_offset_s64(svbool_t, int32_t const *, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64offset_u64)))
+svuint64_t svldnt1sw_gather_offset_u64(svbool_t, int32_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1sw_gather_u64offset_s64)))
+svint64_t svldnt1sw_gather_offset_s64(svbool_t, int32_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32base_offset_u32)))
+svuint32_t svldnt1ub_gather_offset_u32(svbool_t, svuint32_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64base_offset_u64)))
+svuint64_t svldnt1ub_gather_offset_u64(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32base_offset_s32)))
+svint32_t svldnt1ub_gather_offset_s32(svbool_t, svuint32_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64base_offset_s64)))
+svint64_t svldnt1ub_gather_offset_s64(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32base_u32)))
+svuint32_t svldnt1ub_gather_u32(svbool_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64base_u64)))
+svuint64_t svldnt1ub_gather_u64(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32base_s32)))
+svint32_t svldnt1ub_gather_s32(svbool_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64base_s64)))
+svint64_t svldnt1ub_gather_s64(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32offset_u32)))
+svuint32_t svldnt1ub_gather_offset_u32(svbool_t, uint8_t const *, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u32offset_s32)))
+svint32_t svldnt1ub_gather_offset_s32(svbool_t, uint8_t const *, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_s64offset_u64)))
+svuint64_t svldnt1ub_gather_offset_u64(svbool_t, uint8_t const *, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_s64offset_s64)))
+svint64_t svldnt1ub_gather_offset_s64(svbool_t, uint8_t const *, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64offset_u64)))
+svuint64_t svldnt1ub_gather_offset_u64(svbool_t, uint8_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1ub_gather_u64offset_s64)))
+svint64_t svldnt1ub_gather_offset_s64(svbool_t, uint8_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_index_u32)))
+svuint32_t svldnt1uh_gather_index_u32(svbool_t, svuint32_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_index_u64)))
+svuint64_t svldnt1uh_gather_index_u64(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_index_s32)))
+svint32_t svldnt1uh_gather_index_s32(svbool_t, svuint32_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_index_s64)))
+svint64_t svldnt1uh_gather_index_s64(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_offset_u32)))
+svuint32_t svldnt1uh_gather_offset_u32(svbool_t, svuint32_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_offset_u64)))
+svuint64_t svldnt1uh_gather_offset_u64(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_offset_s32)))
+svint32_t svldnt1uh_gather_offset_s32(svbool_t, svuint32_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_offset_s64)))
+svint64_t svldnt1uh_gather_offset_s64(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_u32)))
+svuint32_t svldnt1uh_gather_u32(svbool_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_u64)))
+svuint64_t svldnt1uh_gather_u64(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32base_s32)))
+svint32_t svldnt1uh_gather_s32(svbool_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64base_s64)))
+svint64_t svldnt1uh_gather_s64(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_s64index_u64)))
+svuint64_t svldnt1uh_gather_index_u64(svbool_t, uint16_t const *, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_s64index_s64)))
+svint64_t svldnt1uh_gather_index_s64(svbool_t, uint16_t const *, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64index_u64)))
+svuint64_t svldnt1uh_gather_index_u64(svbool_t, uint16_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64index_s64)))
+svint64_t svldnt1uh_gather_index_s64(svbool_t, uint16_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32offset_u32)))
+svuint32_t svldnt1uh_gather_offset_u32(svbool_t, uint16_t const *, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u32offset_s32)))
+svint32_t svldnt1uh_gather_offset_s32(svbool_t, uint16_t const *, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_s64offset_u64)))
+svuint64_t svldnt1uh_gather_offset_u64(svbool_t, uint16_t const *, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_s64offset_s64)))
+svint64_t svldnt1uh_gather_offset_s64(svbool_t, uint16_t const *, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64offset_u64)))
+svuint64_t svldnt1uh_gather_offset_u64(svbool_t, uint16_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uh_gather_u64offset_s64)))
+svint64_t svldnt1uh_gather_offset_s64(svbool_t, uint16_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_index_u64)))
+svuint64_t svldnt1uw_gather_index_u64(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_index_s64)))
+svint64_t svldnt1uw_gather_index_s64(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_offset_u64)))
+svuint64_t svldnt1uw_gather_offset_u64(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_offset_s64)))
+svint64_t svldnt1uw_gather_offset_s64(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_u64)))
+svuint64_t svldnt1uw_gather_u64(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64base_s64)))
+svint64_t svldnt1uw_gather_s64(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_s64index_u64)))
+svuint64_t svldnt1uw_gather_index_u64(svbool_t, uint32_t const *, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_s64index_s64)))
+svint64_t svldnt1uw_gather_index_s64(svbool_t, uint32_t const *, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64index_u64)))
+svuint64_t svldnt1uw_gather_index_u64(svbool_t, uint32_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64index_s64)))
+svint64_t svldnt1uw_gather_index_s64(svbool_t, uint32_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_s64offset_u64)))
+svuint64_t svldnt1uw_gather_offset_u64(svbool_t, uint32_t const *, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_s64offset_s64)))
+svint64_t svldnt1uw_gather_offset_s64(svbool_t, uint32_t const *, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64offset_u64)))
+svuint64_t svldnt1uw_gather_offset_u64(svbool_t, uint32_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1uw_gather_u64offset_s64)))
+svint64_t svldnt1uw_gather_offset_s64(svbool_t, uint32_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmatch_u8)))
+svbool_t svmatch(svbool_t, svuint8_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmatch_u16)))
+svbool_t svmatch(svbool_t, svuint16_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmatch_s8)))
+svbool_t svmatch(svbool_t, svint8_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmatch_s16)))
+svbool_t svmatch(svbool_t, svint16_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmatch_u8)))
+svbool_t svnmatch(svbool_t, svuint8_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmatch_u16)))
+svbool_t svnmatch(svbool_t, svuint16_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmatch_s8)))
+svbool_t svnmatch(svbool_t, svint8_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svnmatch_s16)))
+svbool_t svnmatch(svbool_t, svint16_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_index_u32)))
+void svstnt1_scatter_index(svbool_t, svuint32_t, int64_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_index_u64)))
+void svstnt1_scatter_index(svbool_t, svuint64_t, int64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_index_f64)))
+void svstnt1_scatter_index(svbool_t, svuint64_t, int64_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_index_f32)))
+void svstnt1_scatter_index(svbool_t, svuint32_t, int64_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_index_s32)))
+void svstnt1_scatter_index(svbool_t, svuint32_t, int64_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_index_s64)))
+void svstnt1_scatter_index(svbool_t, svuint64_t, int64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_offset_u32)))
+void svstnt1_scatter_offset(svbool_t, svuint32_t, int64_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_offset_u64)))
+void svstnt1_scatter_offset(svbool_t, svuint64_t, int64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_offset_f64)))
+void svstnt1_scatter_offset(svbool_t, svuint64_t, int64_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_offset_f32)))
+void svstnt1_scatter_offset(svbool_t, svuint32_t, int64_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_offset_s32)))
+void svstnt1_scatter_offset(svbool_t, svuint32_t, int64_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_offset_s64)))
+void svstnt1_scatter_offset(svbool_t, svuint64_t, int64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_u32)))
+void svstnt1_scatter(svbool_t, svuint32_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_u64)))
+void svstnt1_scatter(svbool_t, svuint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_f64)))
+void svstnt1_scatter(svbool_t, svuint64_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_f32)))
+void svstnt1_scatter(svbool_t, svuint32_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32base_s32)))
+void svstnt1_scatter(svbool_t, svuint32_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64base_s64)))
+void svstnt1_scatter(svbool_t, svuint64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64index_u64)))
+void svstnt1_scatter_index(svbool_t, uint64_t *, svint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64index_f64)))
+void svstnt1_scatter_index(svbool_t, float64_t *, svint64_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64index_s64)))
+void svstnt1_scatter_index(svbool_t, int64_t *, svint64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64index_u64)))
+void svstnt1_scatter_index(svbool_t, uint64_t *, svuint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64index_f64)))
+void svstnt1_scatter_index(svbool_t, float64_t *, svuint64_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64index_s64)))
+void svstnt1_scatter_index(svbool_t, int64_t *, svuint64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32offset_u32)))
+void svstnt1_scatter_offset(svbool_t, uint32_t *, svuint32_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32offset_f32)))
+void svstnt1_scatter_offset(svbool_t, float32_t *, svuint32_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u32offset_s32)))
+void svstnt1_scatter_offset(svbool_t, int32_t *, svuint32_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64offset_u64)))
+void svstnt1_scatter_offset(svbool_t, uint64_t *, svint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64offset_f64)))
+void svstnt1_scatter_offset(svbool_t, float64_t *, svint64_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_s64offset_s64)))
+void svstnt1_scatter_offset(svbool_t, int64_t *, svint64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64offset_u64)))
+void svstnt1_scatter_offset(svbool_t, uint64_t *, svuint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64offset_f64)))
+void svstnt1_scatter_offset(svbool_t, float64_t *, svuint64_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_scatter_u64offset_s64)))
+void svstnt1_scatter_offset(svbool_t, int64_t *, svuint64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32base_offset_u32)))
+void svstnt1b_scatter_offset(svbool_t, svuint32_t, int64_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64base_offset_u64)))
+void svstnt1b_scatter_offset(svbool_t, svuint64_t, int64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32base_offset_s32)))
+void svstnt1b_scatter_offset(svbool_t, svuint32_t, int64_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64base_offset_s64)))
+void svstnt1b_scatter_offset(svbool_t, svuint64_t, int64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32base_u32)))
+void svstnt1b_scatter(svbool_t, svuint32_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64base_u64)))
+void svstnt1b_scatter(svbool_t, svuint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32base_s32)))
+void svstnt1b_scatter(svbool_t, svuint32_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64base_s64)))
+void svstnt1b_scatter(svbool_t, svuint64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32offset_s32)))
+void svstnt1b_scatter_offset(svbool_t, int8_t *, svuint32_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u32offset_u32)))
+void svstnt1b_scatter_offset(svbool_t, uint8_t *, svuint32_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_s64offset_s64)))
+void svstnt1b_scatter_offset(svbool_t, int8_t *, svint64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_s64offset_u64)))
+void svstnt1b_scatter_offset(svbool_t, uint8_t *, svint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64offset_s64)))
+void svstnt1b_scatter_offset(svbool_t, int8_t *, svuint64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1b_scatter_u64offset_u64)))
+void svstnt1b_scatter_offset(svbool_t, uint8_t *, svuint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_index_u32)))
+void svstnt1h_scatter_index(svbool_t, svuint32_t, int64_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_index_u64)))
+void svstnt1h_scatter_index(svbool_t, svuint64_t, int64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_index_s32)))
+void svstnt1h_scatter_index(svbool_t, svuint32_t, int64_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_index_s64)))
+void svstnt1h_scatter_index(svbool_t, svuint64_t, int64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_offset_u32)))
+void svstnt1h_scatter_offset(svbool_t, svuint32_t, int64_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_offset_u64)))
+void svstnt1h_scatter_offset(svbool_t, svuint64_t, int64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_offset_s32)))
+void svstnt1h_scatter_offset(svbool_t, svuint32_t, int64_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_offset_s64)))
+void svstnt1h_scatter_offset(svbool_t, svuint64_t, int64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_u32)))
+void svstnt1h_scatter(svbool_t, svuint32_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_u64)))
+void svstnt1h_scatter(svbool_t, svuint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32base_s32)))
+void svstnt1h_scatter(svbool_t, svuint32_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64base_s64)))
+void svstnt1h_scatter(svbool_t, svuint64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_s64index_s64)))
+void svstnt1h_scatter_index(svbool_t, int16_t *, svint64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_s64index_u64)))
+void svstnt1h_scatter_index(svbool_t, uint16_t *, svint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64index_s64)))
+void svstnt1h_scatter_index(svbool_t, int16_t *, svuint64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64index_u64)))
+void svstnt1h_scatter_index(svbool_t, uint16_t *, svuint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32offset_s32)))
+void svstnt1h_scatter_offset(svbool_t, int16_t *, svuint32_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u32offset_u32)))
+void svstnt1h_scatter_offset(svbool_t, uint16_t *, svuint32_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_s64offset_s64)))
+void svstnt1h_scatter_offset(svbool_t, int16_t *, svint64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_s64offset_u64)))
+void svstnt1h_scatter_offset(svbool_t, uint16_t *, svint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64offset_s64)))
+void svstnt1h_scatter_offset(svbool_t, int16_t *, svuint64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1h_scatter_u64offset_u64)))
+void svstnt1h_scatter_offset(svbool_t, uint16_t *, svuint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_index_u64)))
+void svstnt1w_scatter_index(svbool_t, svuint64_t, int64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_index_s64)))
+void svstnt1w_scatter_index(svbool_t, svuint64_t, int64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_offset_u64)))
+void svstnt1w_scatter_offset(svbool_t, svuint64_t, int64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_offset_s64)))
+void svstnt1w_scatter_offset(svbool_t, svuint64_t, int64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_u64)))
+void svstnt1w_scatter(svbool_t, svuint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64base_s64)))
+void svstnt1w_scatter(svbool_t, svuint64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_s64index_s64)))
+void svstnt1w_scatter_index(svbool_t, int32_t *, svint64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_s64index_u64)))
+void svstnt1w_scatter_index(svbool_t, uint32_t *, svint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64index_s64)))
+void svstnt1w_scatter_index(svbool_t, int32_t *, svuint64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64index_u64)))
+void svstnt1w_scatter_index(svbool_t, uint32_t *, svuint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_s64offset_s64)))
+void svstnt1w_scatter_offset(svbool_t, int32_t *, svint64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_s64offset_u64)))
+void svstnt1w_scatter_offset(svbool_t, uint32_t *, svint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64offset_s64)))
+void svstnt1w_scatter_offset(svbool_t, int32_t *, svuint64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64offset_u64)))
+void svstnt1w_scatter_offset(svbool_t, uint32_t *, svuint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_f16_mf8_fpm)))
+svfloat16_t svmmla_f16_mf8_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, fpm_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_f16_mf8_fpm)))
+svfloat16_t svmmla_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, fpm_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_f32_mf8_fpm)))
+svfloat32_t svmmla_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_f32_mf8_fpm)))
+svfloat32_t svmmla_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_f16_mf8_fpm)))
+svfloat16_t svdot_f16_mf8_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, fpm_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_n_f16_mf8_fpm)))
+svfloat16_t svdot_n_f16_mf8_fpm(svfloat16_t, svmfloat8_t, mfloat8_t, fpm_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_f16_mf8_fpm)))
+svfloat16_t svdot_lane_f16_mf8_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_f16_mf8_fpm)))
+svfloat16_t svdot_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, fpm_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_n_f16_mf8_fpm)))
+svfloat16_t svdot_fpm(svfloat16_t, svmfloat8_t, mfloat8_t, fpm_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_f16_mf8_fpm)))
+svfloat16_t svdot_lane_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_f32_mf8_fpm)))
+svfloat32_t svdot_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_n_f32_mf8_fpm)))
+svfloat32_t svdot_n_f32_mf8_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_f32_mf8_fpm)))
+svfloat32_t svdot_lane_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_f32_mf8_fpm)))
+svfloat32_t svdot_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_n_f32_mf8_fpm)))
+svfloat32_t svdot_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_f32_mf8_fpm)))
+svfloat32_t svdot_lane_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_f16_mf8_fpm)))
+svfloat16_t svmlalb_f16_mf8_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, fpm_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_n_f16_mf8_fpm)))
+svfloat16_t svmlalb_n_f16_mf8_fpm(svfloat16_t, svmfloat8_t, mfloat8_t, fpm_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_lane_f16_mf8_fpm)))
+svfloat16_t svmlalb_lane_f16_mf8_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbb_f32_mf8_fpm)))
+svfloat32_t svmlallbb_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbb_n_f32_mf8_fpm)))
+svfloat32_t svmlallbb_n_f32_mf8_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbb_lane_f32_mf8_fpm)))
+svfloat32_t svmlallbb_lane_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbt_f32_mf8_fpm)))
+svfloat32_t svmlallbt_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbt_n_f32_mf8_fpm)))
+svfloat32_t svmlallbt_n_f32_mf8_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbt_lane_f32_mf8_fpm)))
+svfloat32_t svmlallbt_lane_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltb_f32_mf8_fpm)))
+svfloat32_t svmlalltb_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltb_n_f32_mf8_fpm)))
+svfloat32_t svmlalltb_n_f32_mf8_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltb_lane_f32_mf8_fpm)))
+svfloat32_t svmlalltb_lane_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltt_f32_mf8_fpm)))
+svfloat32_t svmlalltt_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltt_n_f32_mf8_fpm)))
+svfloat32_t svmlalltt_n_f32_mf8_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltt_lane_f32_mf8_fpm)))
+svfloat32_t svmlalltt_lane_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_f16_mf8_fpm)))
+svfloat16_t svmlalt_f16_mf8_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, fpm_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_n_f16_mf8_fpm)))
+svfloat16_t svmlalt_n_f16_mf8_fpm(svfloat16_t, svmfloat8_t, mfloat8_t, fpm_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_lane_f16_mf8_fpm)))
+svfloat16_t svmlalt_lane_f16_mf8_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_f16_mf8_fpm)))
+svfloat16_t svmlalb_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, fpm_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_n_f16_mf8_fpm)))
+svfloat16_t svmlalb_fpm(svfloat16_t, svmfloat8_t, mfloat8_t, fpm_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_lane_f16_mf8_fpm)))
+svfloat16_t svmlalb_lane_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbb_f32_mf8_fpm)))
+svfloat32_t svmlallbb_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbb_n_f32_mf8_fpm)))
+svfloat32_t svmlallbb_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbb_lane_f32_mf8_fpm)))
+svfloat32_t svmlallbb_lane_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbt_f32_mf8_fpm)))
+svfloat32_t svmlallbt_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbt_n_f32_mf8_fpm)))
+svfloat32_t svmlallbt_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbt_lane_f32_mf8_fpm)))
+svfloat32_t svmlallbt_lane_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltb_f32_mf8_fpm)))
+svfloat32_t svmlalltb_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltb_n_f32_mf8_fpm)))
+svfloat32_t svmlalltb_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltb_lane_f32_mf8_fpm)))
+svfloat32_t svmlalltb_lane_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltt_f32_mf8_fpm)))
+svfloat32_t svmlalltt_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltt_n_f32_mf8_fpm)))
+svfloat32_t svmlalltt_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltt_lane_f32_mf8_fpm)))
+svfloat32_t svmlalltt_lane_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_f16_mf8_fpm)))
+svfloat16_t svmlalt_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, fpm_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_n_f16_mf8_fpm)))
+svfloat16_t svmlalt_fpm(svfloat16_t, svmfloat8_t, mfloat8_t, fpm_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_lane_f16_mf8_fpm)))
+svfloat16_t svmlalt_lane_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_u32)))
+svuint32_t svld1q_gather_u64base_index_u32(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_u64)))
+svuint64_t svld1q_gather_u64base_index_u64(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_u16)))
+svuint16_t svld1q_gather_u64base_index_u16(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_bf16)))
+svbfloat16_t svld1q_gather_u64base_index_bf16(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_f64)))
+svfloat64_t svld1q_gather_u64base_index_f64(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_f32)))
+svfloat32_t svld1q_gather_u64base_index_f32(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_f16)))
+svfloat16_t svld1q_gather_u64base_index_f16(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_s32)))
+svint32_t svld1q_gather_u64base_index_s32(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_s64)))
+svint64_t svld1q_gather_u64base_index_s64(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_s16)))
+svint16_t svld1q_gather_u64base_index_s16(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u8)))
+svuint8_t svld1q_gather_u64base_offset_u8(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u32)))
+svuint32_t svld1q_gather_u64base_offset_u32(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u64)))
+svuint64_t svld1q_gather_u64base_offset_u64(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u16)))
+svuint16_t svld1q_gather_u64base_offset_u16(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_bf16)))
+svbfloat16_t svld1q_gather_u64base_offset_bf16(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s8)))
+svint8_t svld1q_gather_u64base_offset_s8(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_f64)))
+svfloat64_t svld1q_gather_u64base_offset_f64(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_f32)))
+svfloat32_t svld1q_gather_u64base_offset_f32(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_f16)))
+svfloat16_t svld1q_gather_u64base_offset_f16(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s32)))
+svint32_t svld1q_gather_u64base_offset_s32(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s64)))
+svint64_t svld1q_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_mf8)))
+svmfloat8_t svld1q_gather_u64base_offset_mf8(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s16)))
+svint16_t svld1q_gather_u64base_offset_s16(svbool_t, svuint64_t, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u8)))
+svuint8_t svld1q_gather_u64base_u8(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u32)))
+svuint32_t svld1q_gather_u64base_u32(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u64)))
+svuint64_t svld1q_gather_u64base_u64(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u16)))
+svuint16_t svld1q_gather_u64base_u16(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_bf16)))
+svbfloat16_t svld1q_gather_u64base_bf16(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s8)))
+svint8_t svld1q_gather_u64base_s8(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_f64)))
+svfloat64_t svld1q_gather_u64base_f64(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_f32)))
+svfloat32_t svld1q_gather_u64base_f32(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_f16)))
+svfloat16_t svld1q_gather_u64base_f16(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s32)))
+svint32_t svld1q_gather_u64base_s32(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s64)))
+svint64_t svld1q_gather_u64base_s64(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_mf8)))
+svmfloat8_t svld1q_gather_u64base_mf8(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s16)))
+svint16_t svld1q_gather_u64base_s16(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_u32)))
+svuint32_t svld1q_gather_u64index_u32(svbool_t, uint32_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_u64)))
+svuint64_t svld1q_gather_u64index_u64(svbool_t, uint64_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_u16)))
+svuint16_t svld1q_gather_u64index_u16(svbool_t, uint16_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_bf16)))
+svbfloat16_t svld1q_gather_u64index_bf16(svbool_t, bfloat16_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_f64)))
+svfloat64_t svld1q_gather_u64index_f64(svbool_t, float64_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_f32)))
+svfloat32_t svld1q_gather_u64index_f32(svbool_t, float32_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_f16)))
+svfloat16_t svld1q_gather_u64index_f16(svbool_t, float16_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_s32)))
+svint32_t svld1q_gather_u64index_s32(svbool_t, int32_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_s64)))
+svint64_t svld1q_gather_u64index_s64(svbool_t, int64_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_s16)))
+svint16_t svld1q_gather_u64index_s16(svbool_t, int16_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u8)))
+svuint8_t svld1q_gather_u64offset_u8(svbool_t, uint8_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u32)))
+svuint32_t svld1q_gather_u64offset_u32(svbool_t, uint32_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u64)))
+svuint64_t svld1q_gather_u64offset_u64(svbool_t, uint64_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u16)))
+svuint16_t svld1q_gather_u64offset_u16(svbool_t, uint16_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_bf16)))
+svbfloat16_t svld1q_gather_u64offset_bf16(svbool_t, bfloat16_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s8)))
+svint8_t svld1q_gather_u64offset_s8(svbool_t, int8_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_f64)))
+svfloat64_t svld1q_gather_u64offset_f64(svbool_t, float64_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_f32)))
+svfloat32_t svld1q_gather_u64offset_f32(svbool_t, float32_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_f16)))
+svfloat16_t svld1q_gather_u64offset_f16(svbool_t, float16_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s32)))
+svint32_t svld1q_gather_u64offset_s32(svbool_t, int32_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s64)))
+svint64_t svld1q_gather_u64offset_s64(svbool_t, int64_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_mf8)))
+svmfloat8_t svld1q_gather_u64offset_mf8(svbool_t, mfloat8_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s16)))
+svint16_t svld1q_gather_u64offset_s16(svbool_t, int16_t const *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_u64)))
+svuint64_t svld1udq_u64(svbool_t, uint64_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_f64)))
+svfloat64_t svld1udq_f64(svbool_t, float64_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_s64)))
+svint64_t svld1udq_s64(svbool_t, int64_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_vnum_u64)))
+svuint64_t svld1udq_vnum_u64(svbool_t, uint64_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_vnum_f64)))
+svfloat64_t svld1udq_vnum_f64(svbool_t, float64_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_vnum_s64)))
+svint64_t svld1udq_vnum_s64(svbool_t, int64_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_u32)))
+svuint32_t svld1uwq_u32(svbool_t, uint32_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_f32)))
+svfloat32_t svld1uwq_f32(svbool_t, float32_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_s32)))
+svint32_t svld1uwq_s32(svbool_t, int32_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_vnum_u32)))
+svuint32_t svld1uwq_vnum_u32(svbool_t, uint32_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_vnum_f32)))
+svfloat32_t svld1uwq_vnum_f32(svbool_t, float32_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_vnum_s32)))
+svint32_t svld1uwq_vnum_s32(svbool_t, int32_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_u64)))
+void svst1dq_u64(svbool_t, uint64_t *, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_f64)))
+void svst1dq_f64(svbool_t, float64_t *, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_s64)))
+void svst1dq_s64(svbool_t, int64_t *, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_vnum_u64)))
+void svst1dq_vnum_u64(svbool_t, uint64_t *, int64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_vnum_f64)))
+void svst1dq_vnum_f64(svbool_t, float64_t *, int64_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_vnum_s64)))
+void svst1dq_vnum_s64(svbool_t, int64_t *, int64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u8)))
+void svst1q_scatter_u64base_u8(svbool_t, svuint64_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u32)))
+void svst1q_scatter_u64base_u32(svbool_t, svuint64_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u64)))
+void svst1q_scatter_u64base_u64(svbool_t, svuint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u16)))
+void svst1q_scatter_u64base_u16(svbool_t, svuint64_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_bf16)))
+void svst1q_scatter_u64base_bf16(svbool_t, svuint64_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s8)))
+void svst1q_scatter_u64base_s8(svbool_t, svuint64_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_f64)))
+void svst1q_scatter_u64base_f64(svbool_t, svuint64_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_f32)))
+void svst1q_scatter_u64base_f32(svbool_t, svuint64_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_f16)))
+void svst1q_scatter_u64base_f16(svbool_t, svuint64_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s32)))
+void svst1q_scatter_u64base_s32(svbool_t, svuint64_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s64)))
+void svst1q_scatter_u64base_s64(svbool_t, svuint64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_mf8)))
+void svst1q_scatter_u64base_mf8(svbool_t, svuint64_t, svmfloat8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s16)))
+void svst1q_scatter_u64base_s16(svbool_t, svuint64_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_u32)))
+void svst1q_scatter_u64base_index_u32(svbool_t, svuint64_t, int64_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_u64)))
+void svst1q_scatter_u64base_index_u64(svbool_t, svuint64_t, int64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_u16)))
+void svst1q_scatter_u64base_index_u16(svbool_t, svuint64_t, int64_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_bf16)))
+void svst1q_scatter_u64base_index_bf16(svbool_t, svuint64_t, int64_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_f64)))
+void svst1q_scatter_u64base_index_f64(svbool_t, svuint64_t, int64_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_f32)))
+void svst1q_scatter_u64base_index_f32(svbool_t, svuint64_t, int64_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_f16)))
+void svst1q_scatter_u64base_index_f16(svbool_t, svuint64_t, int64_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_s32)))
+void svst1q_scatter_u64base_index_s32(svbool_t, svuint64_t, int64_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_s64)))
+void svst1q_scatter_u64base_index_s64(svbool_t, svuint64_t, int64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_s16)))
+void svst1q_scatter_u64base_index_s16(svbool_t, svuint64_t, int64_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u8)))
+void svst1q_scatter_u64base_offset_u8(svbool_t, svuint64_t, int64_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u32)))
+void svst1q_scatter_u64base_offset_u32(svbool_t, svuint64_t, int64_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u64)))
+void svst1q_scatter_u64base_offset_u64(svbool_t, svuint64_t, int64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u16)))
+void svst1q_scatter_u64base_offset_u16(svbool_t, svuint64_t, int64_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_bf16)))
+void svst1q_scatter_u64base_offset_bf16(svbool_t, svuint64_t, int64_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s8)))
+void svst1q_scatter_u64base_offset_s8(svbool_t, svuint64_t, int64_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_f64)))
+void svst1q_scatter_u64base_offset_f64(svbool_t, svuint64_t, int64_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_f32)))
+void svst1q_scatter_u64base_offset_f32(svbool_t, svuint64_t, int64_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_f16)))
+void svst1q_scatter_u64base_offset_f16(svbool_t, svuint64_t, int64_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s32)))
+void svst1q_scatter_u64base_offset_s32(svbool_t, svuint64_t, int64_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s64)))
+void svst1q_scatter_u64base_offset_s64(svbool_t, svuint64_t, int64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_mf8)))
+void svst1q_scatter_u64base_offset_mf8(svbool_t, svuint64_t, int64_t, svmfloat8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s16)))
+void svst1q_scatter_u64base_offset_s16(svbool_t, svuint64_t, int64_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_u32)))
+void svst1q_scatter_s64index_u32(svbool_t, uint32_t *, svint64_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_u64)))
+void svst1q_scatter_s64index_u64(svbool_t, uint64_t *, svint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_u16)))
+void svst1q_scatter_s64index_u16(svbool_t, uint16_t *, svint64_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_bf16)))
+void svst1q_scatter_s64index_bf16(svbool_t, bfloat16_t *, svint64_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_f64)))
+void svst1q_scatter_s64index_f64(svbool_t, float64_t *, svint64_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_f32)))
+void svst1q_scatter_s64index_f32(svbool_t, float32_t *, svint64_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_f16)))
+void svst1q_scatter_s64index_f16(svbool_t, float16_t *, svint64_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_s32)))
+void svst1q_scatter_s64index_s32(svbool_t, int32_t *, svint64_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_s64)))
+void svst1q_scatter_s64index_s64(svbool_t, int64_t *, svint64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_s16)))
+void svst1q_scatter_s64index_s16(svbool_t, int16_t *, svint64_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_u32)))
+void svst1q_scatter_u64index_u32(svbool_t, uint32_t *, svuint64_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_u64)))
+void svst1q_scatter_u64index_u64(svbool_t, uint64_t *, svuint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_u16)))
+void svst1q_scatter_u64index_u16(svbool_t, uint16_t *, svuint64_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_bf16)))
+void svst1q_scatter_u64index_bf16(svbool_t, bfloat16_t *, svuint64_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_f64)))
+void svst1q_scatter_u64index_f64(svbool_t, float64_t *, svuint64_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_f32)))
+void svst1q_scatter_u64index_f32(svbool_t, float32_t *, svuint64_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_f16)))
+void svst1q_scatter_u64index_f16(svbool_t, float16_t *, svuint64_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_s32)))
+void svst1q_scatter_u64index_s32(svbool_t, int32_t *, svuint64_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_s64)))
+void svst1q_scatter_u64index_s64(svbool_t, int64_t *, svuint64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_s16)))
+void svst1q_scatter_u64index_s16(svbool_t, int16_t *, svuint64_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_u8)))
+void svst1q_scatter_s64offset_u8(svbool_t, uint8_t *, svint64_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_u32)))
+void svst1q_scatter_s64offset_u32(svbool_t, uint32_t *, svint64_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_u64)))
+void svst1q_scatter_s64offset_u64(svbool_t, uint64_t *, svint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_u16)))
+void svst1q_scatter_s64offset_u16(svbool_t, uint16_t *, svint64_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_bf16)))
+void svst1q_scatter_s64offset_bf16(svbool_t, bfloat16_t *, svint64_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_s8)))
+void svst1q_scatter_s64offset_s8(svbool_t, int8_t *, svint64_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_f64)))
+void svst1q_scatter_s64offset_f64(svbool_t, float64_t *, svint64_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_f32)))
+void svst1q_scatter_s64offset_f32(svbool_t, float32_t *, svint64_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_f16)))
+void svst1q_scatter_s64offset_f16(svbool_t, float16_t *, svint64_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_s32)))
+void svst1q_scatter_s64offset_s32(svbool_t, int32_t *, svint64_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_s64)))
+void svst1q_scatter_s64offset_s64(svbool_t, int64_t *, svint64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_mf8)))
+void svst1q_scatter_s64offset_mf8(svbool_t, mfloat8_t *, svint64_t, svmfloat8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_s16)))
+void svst1q_scatter_s64offset_s16(svbool_t, int16_t *, svint64_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u8)))
+void svst1q_scatter_u64offset_u8(svbool_t, uint8_t *, svuint64_t, svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u32)))
+void svst1q_scatter_u64offset_u32(svbool_t, uint32_t *, svuint64_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u64)))
+void svst1q_scatter_u64offset_u64(svbool_t, uint64_t *, svuint64_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u16)))
+void svst1q_scatter_u64offset_u16(svbool_t, uint16_t *, svuint64_t, svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_bf16)))
+void svst1q_scatter_u64offset_bf16(svbool_t, bfloat16_t *, svuint64_t, svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s8)))
+void svst1q_scatter_u64offset_s8(svbool_t, int8_t *, svuint64_t, svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_f64)))
+void svst1q_scatter_u64offset_f64(svbool_t, float64_t *, svuint64_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_f32)))
+void svst1q_scatter_u64offset_f32(svbool_t, float32_t *, svuint64_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_f16)))
+void svst1q_scatter_u64offset_f16(svbool_t, float16_t *, svuint64_t, svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s32)))
+void svst1q_scatter_u64offset_s32(svbool_t, int32_t *, svuint64_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s64)))
+void svst1q_scatter_u64offset_s64(svbool_t, int64_t *, svuint64_t, svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_mf8)))
+void svst1q_scatter_u64offset_mf8(svbool_t, mfloat8_t *, svuint64_t, svmfloat8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s16)))
+void svst1q_scatter_u64offset_s16(svbool_t, int16_t *, svuint64_t, svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_u32)))
+void svst1wq_u32(svbool_t, uint32_t *, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_f32)))
+void svst1wq_f32(svbool_t, float32_t *, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_s32)))
+void svst1wq_s32(svbool_t, int32_t *, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_u32)))
+void svst1wq_vnum_u32(svbool_t, uint32_t *, int64_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_f32)))
+void svst1wq_vnum_f32(svbool_t, float32_t *, int64_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_s32)))
+void svst1wq_vnum_s32(svbool_t, int32_t *, int64_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_u32)))
+svuint32_t svld1q_gather_index_u32(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_u64)))
+svuint64_t svld1q_gather_index_u64(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_u16)))
+svuint16_t svld1q_gather_index_u16(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_bf16)))
+svbfloat16_t svld1q_gather_index_bf16(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_f64)))
+svfloat64_t svld1q_gather_index_f64(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_f32)))
+svfloat32_t svld1q_gather_index_f32(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_f16)))
+svfloat16_t svld1q_gather_index_f16(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_s32)))
+svint32_t svld1q_gather_index_s32(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_s64)))
+svint64_t svld1q_gather_index_s64(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_s16)))
+svint16_t svld1q_gather_index_s16(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u8)))
+svuint8_t svld1q_gather_offset_u8(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u32)))
+svuint32_t svld1q_gather_offset_u32(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u64)))
+svuint64_t svld1q_gather_offset_u64(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_u16)))
+svuint16_t svld1q_gather_offset_u16(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_bf16)))
+svbfloat16_t svld1q_gather_offset_bf16(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s8)))
+svint8_t svld1q_gather_offset_s8(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_f64)))
+svfloat64_t svld1q_gather_offset_f64(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_f32)))
+svfloat32_t svld1q_gather_offset_f32(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_f16)))
+svfloat16_t svld1q_gather_offset_f16(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s32)))
+svint32_t svld1q_gather_offset_s32(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s64)))
+svint64_t svld1q_gather_offset_s64(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_mf8)))
+svmfloat8_t svld1q_gather_offset_mf8(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s16)))
+svint16_t svld1q_gather_offset_s16(svbool_t, svuint64_t, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u8)))
+svuint8_t svld1q_gather_u8(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u32)))
+svuint32_t svld1q_gather_u32(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u64)))
+svuint64_t svld1q_gather_u64(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u16)))
+svuint16_t svld1q_gather_u16(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_bf16)))
+svbfloat16_t svld1q_gather_bf16(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s8)))
+svint8_t svld1q_gather_s8(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_f64)))
+svfloat64_t svld1q_gather_f64(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_f32)))
+svfloat32_t svld1q_gather_f32(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_f16)))
+svfloat16_t svld1q_gather_f16(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s32)))
+svint32_t svld1q_gather_s32(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s64)))
+svint64_t svld1q_gather_s64(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_mf8)))
+svmfloat8_t svld1q_gather_mf8(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s16)))
+svint16_t svld1q_gather_s16(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_u32)))
+svuint32_t svld1q_gather_index(svbool_t, uint32_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_u64)))
+svuint64_t svld1q_gather_index(svbool_t, uint64_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_u16)))
+svuint16_t svld1q_gather_index(svbool_t, uint16_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_bf16)))
+svbfloat16_t svld1q_gather_index(svbool_t, bfloat16_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_f64)))
+svfloat64_t svld1q_gather_index(svbool_t, float64_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_f32)))
+svfloat32_t svld1q_gather_index(svbool_t, float32_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_f16)))
+svfloat16_t svld1q_gather_index(svbool_t, float16_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_s32)))
+svint32_t svld1q_gather_index(svbool_t, int32_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_s64)))
+svint64_t svld1q_gather_index(svbool_t, int64_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_s16)))
+svint16_t svld1q_gather_index(svbool_t, int16_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u8)))
+svuint8_t svld1q_gather_offset(svbool_t, uint8_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u32)))
+svuint32_t svld1q_gather_offset(svbool_t, uint32_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u64)))
+svuint64_t svld1q_gather_offset(svbool_t, uint64_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_u16)))
+svuint16_t svld1q_gather_offset(svbool_t, uint16_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_bf16)))
+svbfloat16_t svld1q_gather_offset(svbool_t, bfloat16_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s8)))
+svint8_t svld1q_gather_offset(svbool_t, int8_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_f64)))
+svfloat64_t svld1q_gather_offset(svbool_t, float64_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_f32)))
+svfloat32_t svld1q_gather_offset(svbool_t, float32_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_f16)))
+svfloat16_t svld1q_gather_offset(svbool_t, float16_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s32)))
+svint32_t svld1q_gather_offset(svbool_t, int32_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s64)))
+svint64_t svld1q_gather_offset(svbool_t, int64_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_mf8)))
+svmfloat8_t svld1q_gather_offset(svbool_t, mfloat8_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s16)))
+svint16_t svld1q_gather_offset(svbool_t, int16_t const *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_u64)))
+svuint64_t svld1udq(svbool_t, uint64_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_f64)))
+svfloat64_t svld1udq(svbool_t, float64_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_s64)))
+svint64_t svld1udq(svbool_t, int64_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_vnum_u64)))
+svuint64_t svld1udq_vnum(svbool_t, uint64_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_vnum_f64)))
+svfloat64_t svld1udq_vnum(svbool_t, float64_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_vnum_s64)))
+svint64_t svld1udq_vnum(svbool_t, int64_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_u32)))
+svuint32_t svld1uwq(svbool_t, uint32_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_f32)))
+svfloat32_t svld1uwq(svbool_t, float32_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_s32)))
+svint32_t svld1uwq(svbool_t, int32_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_vnum_u32)))
+svuint32_t svld1uwq_vnum(svbool_t, uint32_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_vnum_f32)))
+svfloat32_t svld1uwq_vnum(svbool_t, float32_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_vnum_s32)))
+svint32_t svld1uwq_vnum(svbool_t, int32_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_u64)))
+void svst1dq(svbool_t, uint64_t *, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_f64)))
+void svst1dq(svbool_t, float64_t *, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_s64)))
+void svst1dq(svbool_t, int64_t *, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_vnum_u64)))
+void svst1dq_vnum(svbool_t, uint64_t *, int64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_vnum_f64)))
+void svst1dq_vnum(svbool_t, float64_t *, int64_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_vnum_s64)))
+void svst1dq_vnum(svbool_t, int64_t *, int64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u8)))
+void svst1q_scatter(svbool_t, svuint64_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u32)))
+void svst1q_scatter(svbool_t, svuint64_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u64)))
+void svst1q_scatter(svbool_t, svuint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u16)))
+void svst1q_scatter(svbool_t, svuint64_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_bf16)))
+void svst1q_scatter(svbool_t, svuint64_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s8)))
+void svst1q_scatter(svbool_t, svuint64_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_f64)))
+void svst1q_scatter(svbool_t, svuint64_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_f32)))
+void svst1q_scatter(svbool_t, svuint64_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_f16)))
+void svst1q_scatter(svbool_t, svuint64_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s32)))
+void svst1q_scatter(svbool_t, svuint64_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s64)))
+void svst1q_scatter(svbool_t, svuint64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_mf8)))
+void svst1q_scatter(svbool_t, svuint64_t, svmfloat8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s16)))
+void svst1q_scatter(svbool_t, svuint64_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_u32)))
+void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_u64)))
+void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_u16)))
+void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_bf16)))
+void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_f64)))
+void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_f32)))
+void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_f16)))
+void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_s32)))
+void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_s64)))
+void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_s16)))
+void svst1q_scatter_index(svbool_t, svuint64_t, int64_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u8)))
+void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u32)))
+void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u64)))
+void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_u16)))
+void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_bf16)))
+void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s8)))
+void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_f64)))
+void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_f32)))
+void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_f16)))
+void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s32)))
+void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s64)))
+void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_mf8)))
+void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svmfloat8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s16)))
+void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_u32)))
+void svst1q_scatter_index(svbool_t, uint32_t *, svint64_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_u64)))
+void svst1q_scatter_index(svbool_t, uint64_t *, svint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_u16)))
+void svst1q_scatter_index(svbool_t, uint16_t *, svint64_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_bf16)))
+void svst1q_scatter_index(svbool_t, bfloat16_t *, svint64_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_f64)))
+void svst1q_scatter_index(svbool_t, float64_t *, svint64_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_f32)))
+void svst1q_scatter_index(svbool_t, float32_t *, svint64_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_f16)))
+void svst1q_scatter_index(svbool_t, float16_t *, svint64_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_s32)))
+void svst1q_scatter_index(svbool_t, int32_t *, svint64_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_s64)))
+void svst1q_scatter_index(svbool_t, int64_t *, svint64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_s16)))
+void svst1q_scatter_index(svbool_t, int16_t *, svint64_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_u32)))
+void svst1q_scatter_index(svbool_t, uint32_t *, svuint64_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_u64)))
+void svst1q_scatter_index(svbool_t, uint64_t *, svuint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_u16)))
+void svst1q_scatter_index(svbool_t, uint16_t *, svuint64_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_bf16)))
+void svst1q_scatter_index(svbool_t, bfloat16_t *, svuint64_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_f64)))
+void svst1q_scatter_index(svbool_t, float64_t *, svuint64_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_f32)))
+void svst1q_scatter_index(svbool_t, float32_t *, svuint64_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_f16)))
+void svst1q_scatter_index(svbool_t, float16_t *, svuint64_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_s32)))
+void svst1q_scatter_index(svbool_t, int32_t *, svuint64_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_s64)))
+void svst1q_scatter_index(svbool_t, int64_t *, svuint64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_s16)))
+void svst1q_scatter_index(svbool_t, int16_t *, svuint64_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_u8)))
+void svst1q_scatter_offset(svbool_t, uint8_t *, svint64_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_u32)))
+void svst1q_scatter_offset(svbool_t, uint32_t *, svint64_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_u64)))
+void svst1q_scatter_offset(svbool_t, uint64_t *, svint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_u16)))
+void svst1q_scatter_offset(svbool_t, uint16_t *, svint64_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_bf16)))
+void svst1q_scatter_offset(svbool_t, bfloat16_t *, svint64_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_s8)))
+void svst1q_scatter_offset(svbool_t, int8_t *, svint64_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_f64)))
+void svst1q_scatter_offset(svbool_t, float64_t *, svint64_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_f32)))
+void svst1q_scatter_offset(svbool_t, float32_t *, svint64_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_f16)))
+void svst1q_scatter_offset(svbool_t, float16_t *, svint64_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_s32)))
+void svst1q_scatter_offset(svbool_t, int32_t *, svint64_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_s64)))
+void svst1q_scatter_offset(svbool_t, int64_t *, svint64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_mf8)))
+void svst1q_scatter_offset(svbool_t, mfloat8_t *, svint64_t, svmfloat8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_s16)))
+void svst1q_scatter_offset(svbool_t, int16_t *, svint64_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u8)))
+void svst1q_scatter_offset(svbool_t, uint8_t *, svuint64_t, svuint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u32)))
+void svst1q_scatter_offset(svbool_t, uint32_t *, svuint64_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u64)))
+void svst1q_scatter_offset(svbool_t, uint64_t *, svuint64_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u16)))
+void svst1q_scatter_offset(svbool_t, uint16_t *, svuint64_t, svuint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_bf16)))
+void svst1q_scatter_offset(svbool_t, bfloat16_t *, svuint64_t, svbfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s8)))
+void svst1q_scatter_offset(svbool_t, int8_t *, svuint64_t, svint8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_f64)))
+void svst1q_scatter_offset(svbool_t, float64_t *, svuint64_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_f32)))
+void svst1q_scatter_offset(svbool_t, float32_t *, svuint64_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_f16)))
+void svst1q_scatter_offset(svbool_t, float16_t *, svuint64_t, svfloat16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s32)))
+void svst1q_scatter_offset(svbool_t, int32_t *, svuint64_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s64)))
+void svst1q_scatter_offset(svbool_t, int64_t *, svuint64_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_mf8)))
+void svst1q_scatter_offset(svbool_t, mfloat8_t *, svuint64_t, svmfloat8_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s16)))
+void svst1q_scatter_offset(svbool_t, int16_t *, svuint64_t, svint16_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_u32)))
+void svst1wq(svbool_t, uint32_t *, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_f32)))
+void svst1wq(svbool_t, float32_t *, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_s32)))
+void svst1wq(svbool_t, int32_t *, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_u32)))
+void svst1wq_vnum(svbool_t, uint32_t *, int64_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_f32)))
+void svst1wq_vnum(svbool_t, float32_t *, int64_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_s32)))
+void svst1wq_vnum(svbool_t, int32_t *, int64_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntp_c8)))
+uint64_t svcntp_c8(svcount_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntp_c32)))
+uint64_t svcntp_c32(svcount_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntp_c64)))
+uint64_t svcntp_c64(svcount_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntp_c16)))
+uint64_t svcntp_c16(svcount_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u8_x2)))
+svuint8x2_t svld1_u8_x2(svcount_t, uint8_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s8_x2)))
+svint8x2_t svld1_s8_x2(svcount_t, int8_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_mf8_x2)))
+svmfloat8x2_t svld1_mf8_x2(svcount_t, mfloat8_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u64_x2)))
+svuint64x2_t svld1_u64_x2(svcount_t, uint64_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f64_x2)))
+svfloat64x2_t svld1_f64_x2(svcount_t, float64_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s64_x2)))
+svint64x2_t svld1_s64_x2(svcount_t, int64_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u16_x2)))
+svuint16x2_t svld1_u16_x2(svcount_t, uint16_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_bf16_x2)))
+svbfloat16x2_t svld1_bf16_x2(svcount_t, bfloat16_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f16_x2)))
+svfloat16x2_t svld1_f16_x2(svcount_t, float16_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s16_x2)))
+svint16x2_t svld1_s16_x2(svcount_t, int16_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u32_x2)))
+svuint32x2_t svld1_u32_x2(svcount_t, uint32_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f32_x2)))
+svfloat32x2_t svld1_f32_x2(svcount_t, float32_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s32_x2)))
+svint32x2_t svld1_s32_x2(svcount_t, int32_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u8_x4)))
+svuint8x4_t svld1_u8_x4(svcount_t, uint8_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s8_x4)))
+svint8x4_t svld1_s8_x4(svcount_t, int8_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_mf8_x4)))
+svmfloat8x4_t svld1_mf8_x4(svcount_t, mfloat8_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u64_x4)))
+svuint64x4_t svld1_u64_x4(svcount_t, uint64_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f64_x4)))
+svfloat64x4_t svld1_f64_x4(svcount_t, float64_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s64_x4)))
+svint64x4_t svld1_s64_x4(svcount_t, int64_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u16_x4)))
+svuint16x4_t svld1_u16_x4(svcount_t, uint16_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_bf16_x4)))
+svbfloat16x4_t svld1_bf16_x4(svcount_t, bfloat16_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f16_x4)))
+svfloat16x4_t svld1_f16_x4(svcount_t, float16_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s16_x4)))
+svint16x4_t svld1_s16_x4(svcount_t, int16_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u32_x4)))
+svuint32x4_t svld1_u32_x4(svcount_t, uint32_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f32_x4)))
+svfloat32x4_t svld1_f32_x4(svcount_t, float32_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s32_x4)))
+svint32x4_t svld1_s32_x4(svcount_t, int32_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u8_x2)))
+svuint8x2_t svld1_vnum_u8_x2(svcount_t, uint8_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s8_x2)))
+svint8x2_t svld1_vnum_s8_x2(svcount_t, int8_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_mf8_x2)))
+svmfloat8x2_t svld1_vnum_mf8_x2(svcount_t, mfloat8_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u64_x2)))
+svuint64x2_t svld1_vnum_u64_x2(svcount_t, uint64_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f64_x2)))
+svfloat64x2_t svld1_vnum_f64_x2(svcount_t, float64_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s64_x2)))
+svint64x2_t svld1_vnum_s64_x2(svcount_t, int64_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u16_x2)))
+svuint16x2_t svld1_vnum_u16_x2(svcount_t, uint16_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_bf16_x2)))
+svbfloat16x2_t svld1_vnum_bf16_x2(svcount_t, bfloat16_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f16_x2)))
+svfloat16x2_t svld1_vnum_f16_x2(svcount_t, float16_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s16_x2)))
+svint16x2_t svld1_vnum_s16_x2(svcount_t, int16_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u32_x2)))
+svuint32x2_t svld1_vnum_u32_x2(svcount_t, uint32_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f32_x2)))
+svfloat32x2_t svld1_vnum_f32_x2(svcount_t, float32_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s32_x2)))
+svint32x2_t svld1_vnum_s32_x2(svcount_t, int32_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u8_x4)))
+svuint8x4_t svld1_vnum_u8_x4(svcount_t, uint8_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s8_x4)))
+svint8x4_t svld1_vnum_s8_x4(svcount_t, int8_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_mf8_x4)))
+svmfloat8x4_t svld1_vnum_mf8_x4(svcount_t, mfloat8_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u64_x4)))
+svuint64x4_t svld1_vnum_u64_x4(svcount_t, uint64_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f64_x4)))
+svfloat64x4_t svld1_vnum_f64_x4(svcount_t, float64_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s64_x4)))
+svint64x4_t svld1_vnum_s64_x4(svcount_t, int64_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u16_x4)))
+svuint16x4_t svld1_vnum_u16_x4(svcount_t, uint16_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_bf16_x4)))
+svbfloat16x4_t svld1_vnum_bf16_x4(svcount_t, bfloat16_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f16_x4)))
+svfloat16x4_t svld1_vnum_f16_x4(svcount_t, float16_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s16_x4)))
+svint16x4_t svld1_vnum_s16_x4(svcount_t, int16_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u32_x4)))
+svuint32x4_t svld1_vnum_u32_x4(svcount_t, uint32_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f32_x4)))
+svfloat32x4_t svld1_vnum_f32_x4(svcount_t, float32_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s32_x4)))
+svint32x4_t svld1_vnum_s32_x4(svcount_t, int32_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u8_x2)))
+svuint8x2_t svldnt1_u8_x2(svcount_t, uint8_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s8_x2)))
+svint8x2_t svldnt1_s8_x2(svcount_t, int8_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_mf8_x2)))
+svmfloat8x2_t svldnt1_mf8_x2(svcount_t, mfloat8_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u64_x2)))
+svuint64x2_t svldnt1_u64_x2(svcount_t, uint64_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f64_x2)))
+svfloat64x2_t svldnt1_f64_x2(svcount_t, float64_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s64_x2)))
+svint64x2_t svldnt1_s64_x2(svcount_t, int64_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u16_x2)))
+svuint16x2_t svldnt1_u16_x2(svcount_t, uint16_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_bf16_x2)))
+svbfloat16x2_t svldnt1_bf16_x2(svcount_t, bfloat16_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f16_x2)))
+svfloat16x2_t svldnt1_f16_x2(svcount_t, float16_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s16_x2)))
+svint16x2_t svldnt1_s16_x2(svcount_t, int16_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u32_x2)))
+svuint32x2_t svldnt1_u32_x2(svcount_t, uint32_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f32_x2)))
+svfloat32x2_t svldnt1_f32_x2(svcount_t, float32_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s32_x2)))
+svint32x2_t svldnt1_s32_x2(svcount_t, int32_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u8_x4)))
+svuint8x4_t svldnt1_u8_x4(svcount_t, uint8_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s8_x4)))
+svint8x4_t svldnt1_s8_x4(svcount_t, int8_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_mf8_x4)))
+svmfloat8x4_t svldnt1_mf8_x4(svcount_t, mfloat8_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u64_x4)))
+svuint64x4_t svldnt1_u64_x4(svcount_t, uint64_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f64_x4)))
+svfloat64x4_t svldnt1_f64_x4(svcount_t, float64_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s64_x4)))
+svint64x4_t svldnt1_s64_x4(svcount_t, int64_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u16_x4)))
+svuint16x4_t svldnt1_u16_x4(svcount_t, uint16_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_bf16_x4)))
+svbfloat16x4_t svldnt1_bf16_x4(svcount_t, bfloat16_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f16_x4)))
+svfloat16x4_t svldnt1_f16_x4(svcount_t, float16_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s16_x4)))
+svint16x4_t svldnt1_s16_x4(svcount_t, int16_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u32_x4)))
+svuint32x4_t svldnt1_u32_x4(svcount_t, uint32_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f32_x4)))
+svfloat32x4_t svldnt1_f32_x4(svcount_t, float32_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s32_x4)))
+svint32x4_t svldnt1_s32_x4(svcount_t, int32_t const *);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u8_x2)))
+svuint8x2_t svldnt1_vnum_u8_x2(svcount_t, uint8_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s8_x2)))
+svint8x2_t svldnt1_vnum_s8_x2(svcount_t, int8_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_mf8_x2)))
+svmfloat8x2_t svldnt1_vnum_mf8_x2(svcount_t, mfloat8_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u64_x2)))
+svuint64x2_t svldnt1_vnum_u64_x2(svcount_t, uint64_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f64_x2)))
+svfloat64x2_t svldnt1_vnum_f64_x2(svcount_t, float64_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s64_x2)))
+svint64x2_t svldnt1_vnum_s64_x2(svcount_t, int64_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u16_x2)))
+svuint16x2_t svldnt1_vnum_u16_x2(svcount_t, uint16_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_bf16_x2)))
+svbfloat16x2_t svldnt1_vnum_bf16_x2(svcount_t, bfloat16_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f16_x2)))
+svfloat16x2_t svldnt1_vnum_f16_x2(svcount_t, float16_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s16_x2)))
+svint16x2_t svldnt1_vnum_s16_x2(svcount_t, int16_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u32_x2)))
+svuint32x2_t svldnt1_vnum_u32_x2(svcount_t, uint32_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f32_x2)))
+svfloat32x2_t svldnt1_vnum_f32_x2(svcount_t, float32_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s32_x2)))
+svint32x2_t svldnt1_vnum_s32_x2(svcount_t, int32_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u8_x4)))
+svuint8x4_t svldnt1_vnum_u8_x4(svcount_t, uint8_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s8_x4)))
+svint8x4_t svldnt1_vnum_s8_x4(svcount_t, int8_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_mf8_x4)))
+svmfloat8x4_t svldnt1_vnum_mf8_x4(svcount_t, mfloat8_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u64_x4)))
+svuint64x4_t svldnt1_vnum_u64_x4(svcount_t, uint64_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f64_x4)))
+svfloat64x4_t svldnt1_vnum_f64_x4(svcount_t, float64_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s64_x4)))
+svint64x4_t svldnt1_vnum_s64_x4(svcount_t, int64_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u16_x4)))
+svuint16x4_t svldnt1_vnum_u16_x4(svcount_t, uint16_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_bf16_x4)))
+svbfloat16x4_t svldnt1_vnum_bf16_x4(svcount_t, bfloat16_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f16_x4)))
+svfloat16x4_t svldnt1_vnum_f16_x4(svcount_t, float16_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s16_x4)))
+svint16x4_t svldnt1_vnum_s16_x4(svcount_t, int16_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u32_x4)))
+svuint32x4_t svldnt1_vnum_u32_x4(svcount_t, uint32_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f32_x4)))
+svfloat32x4_t svldnt1_vnum_f32_x4(svcount_t, float32_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s32_x4)))
+svint32x4_t svldnt1_vnum_s32_x4(svcount_t, int32_t const *, int64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c8)))
+svbool_t svpext_lane_c8(svcount_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c32)))
+svbool_t svpext_lane_c32(svcount_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c64)))
+svbool_t svpext_lane_c64(svcount_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c16)))
+svbool_t svpext_lane_c16(svcount_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c8_x2)))
+svboolx2_t svpext_lane_c8_x2(svcount_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c32_x2)))
+svboolx2_t svpext_lane_c32_x2(svcount_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c64_x2)))
+svboolx2_t svpext_lane_c64_x2(svcount_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpext_lane_c16_x2)))
+svboolx2_t svpext_lane_c16_x2(svcount_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpfalse_c)))
+svcount_t svpfalse_c(void);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpsel_lane_c16)))
+svcount_t svpsel_lane_c16(svcount_t, svbool_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpsel_lane_c32)))
+svcount_t svpsel_lane_c32(svcount_t, svbool_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpsel_lane_c64)))
+svcount_t svpsel_lane_c64(svcount_t, svbool_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpsel_lane_c8)))
+svcount_t svpsel_lane_c8(svcount_t, svbool_t, uint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_c8)))
+svcount_t svptrue_c8(void);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_c32)))
+svcount_t svptrue_c32(void);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_c64)))
+svcount_t svptrue_c64(void);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_c16)))
+svcount_t svptrue_c16(void);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svreinterpret_b)))
+svbool_t svreinterpret_b(svcount_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svreinterpret_c)))
+svcount_t svreinterpret_c(svbool_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u8_x2)))
+void svst1_u8_x2(svcount_t, uint8_t *, svuint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s8_x2)))
+void svst1_s8_x2(svcount_t, int8_t *, svint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_mf8_x2)))
+void svst1_mf8_x2(svcount_t, mfloat8_t *, svmfloat8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u64_x2)))
+void svst1_u64_x2(svcount_t, uint64_t *, svuint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f64_x2)))
+void svst1_f64_x2(svcount_t, float64_t *, svfloat64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s64_x2)))
+void svst1_s64_x2(svcount_t, int64_t *, svint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u16_x2)))
+void svst1_u16_x2(svcount_t, uint16_t *, svuint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_bf16_x2)))
+void svst1_bf16_x2(svcount_t, bfloat16_t *, svbfloat16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f16_x2)))
+void svst1_f16_x2(svcount_t, float16_t *, svfloat16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s16_x2)))
+void svst1_s16_x2(svcount_t, int16_t *, svint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u32_x2)))
+void svst1_u32_x2(svcount_t, uint32_t *, svuint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f32_x2)))
+void svst1_f32_x2(svcount_t, float32_t *, svfloat32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s32_x2)))
+void svst1_s32_x2(svcount_t, int32_t *, svint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u8_x4)))
+void svst1_u8_x4(svcount_t, uint8_t *, svuint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s8_x4)))
+void svst1_s8_x4(svcount_t, int8_t *, svint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_mf8_x4)))
+void svst1_mf8_x4(svcount_t, mfloat8_t *, svmfloat8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u64_x4)))
+void svst1_u64_x4(svcount_t, uint64_t *, svuint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f64_x4)))
+void svst1_f64_x4(svcount_t, float64_t *, svfloat64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s64_x4)))
+void svst1_s64_x4(svcount_t, int64_t *, svint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u16_x4)))
+void svst1_u16_x4(svcount_t, uint16_t *, svuint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_bf16_x4)))
+void svst1_bf16_x4(svcount_t, bfloat16_t *, svbfloat16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f16_x4)))
+void svst1_f16_x4(svcount_t, float16_t *, svfloat16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s16_x4)))
+void svst1_s16_x4(svcount_t, int16_t *, svint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u32_x4)))
+void svst1_u32_x4(svcount_t, uint32_t *, svuint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f32_x4)))
+void svst1_f32_x4(svcount_t, float32_t *, svfloat32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s32_x4)))
+void svst1_s32_x4(svcount_t, int32_t *, svint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u8_x2)))
+void svst1_vnum_u8_x2(svcount_t, uint8_t *, int64_t, svuint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s8_x2)))
+void svst1_vnum_s8_x2(svcount_t, int8_t *, int64_t, svint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_mf8_x2)))
+void svst1_vnum_mf8_x2(svcount_t, mfloat8_t *, int64_t, svmfloat8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u64_x2)))
+void svst1_vnum_u64_x2(svcount_t, uint64_t *, int64_t, svuint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f64_x2)))
+void svst1_vnum_f64_x2(svcount_t, float64_t *, int64_t, svfloat64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s64_x2)))
+void svst1_vnum_s64_x2(svcount_t, int64_t *, int64_t, svint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u16_x2)))
+void svst1_vnum_u16_x2(svcount_t, uint16_t *, int64_t, svuint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_bf16_x2)))
+void svst1_vnum_bf16_x2(svcount_t, bfloat16_t *, int64_t, svbfloat16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f16_x2)))
+void svst1_vnum_f16_x2(svcount_t, float16_t *, int64_t, svfloat16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s16_x2)))
+void svst1_vnum_s16_x2(svcount_t, int16_t *, int64_t, svint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u32_x2)))
+void svst1_vnum_u32_x2(svcount_t, uint32_t *, int64_t, svuint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f32_x2)))
+void svst1_vnum_f32_x2(svcount_t, float32_t *, int64_t, svfloat32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s32_x2)))
+void svst1_vnum_s32_x2(svcount_t, int32_t *, int64_t, svint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u8_x4)))
+void svst1_vnum_u8_x4(svcount_t, uint8_t *, int64_t, svuint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s8_x4)))
+void svst1_vnum_s8_x4(svcount_t, int8_t *, int64_t, svint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_mf8_x4)))
+void svst1_vnum_mf8_x4(svcount_t, mfloat8_t *, int64_t, svmfloat8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u64_x4)))
+void svst1_vnum_u64_x4(svcount_t, uint64_t *, int64_t, svuint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f64_x4)))
+void svst1_vnum_f64_x4(svcount_t, float64_t *, int64_t, svfloat64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s64_x4)))
+void svst1_vnum_s64_x4(svcount_t, int64_t *, int64_t, svint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u16_x4)))
+void svst1_vnum_u16_x4(svcount_t, uint16_t *, int64_t, svuint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_bf16_x4)))
+void svst1_vnum_bf16_x4(svcount_t, bfloat16_t *, int64_t, svbfloat16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f16_x4)))
+void svst1_vnum_f16_x4(svcount_t, float16_t *, int64_t, svfloat16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s16_x4)))
+void svst1_vnum_s16_x4(svcount_t, int16_t *, int64_t, svint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u32_x4)))
+void svst1_vnum_u32_x4(svcount_t, uint32_t *, int64_t, svuint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f32_x4)))
+void svst1_vnum_f32_x4(svcount_t, float32_t *, int64_t, svfloat32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s32_x4)))
+void svst1_vnum_s32_x4(svcount_t, int32_t *, int64_t, svint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u8_x2)))
+void svstnt1_u8_x2(svcount_t, uint8_t *, svuint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s8_x2)))
+void svstnt1_s8_x2(svcount_t, int8_t *, svint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_mf8_x2)))
+void svstnt1_mf8_x2(svcount_t, mfloat8_t *, svmfloat8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u64_x2)))
+void svstnt1_u64_x2(svcount_t, uint64_t *, svuint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f64_x2)))
+void svstnt1_f64_x2(svcount_t, float64_t *, svfloat64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s64_x2)))
+void svstnt1_s64_x2(svcount_t, int64_t *, svint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u16_x2)))
+void svstnt1_u16_x2(svcount_t, uint16_t *, svuint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_bf16_x2)))
+void svstnt1_bf16_x2(svcount_t, bfloat16_t *, svbfloat16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f16_x2)))
+void svstnt1_f16_x2(svcount_t, float16_t *, svfloat16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s16_x2)))
+void svstnt1_s16_x2(svcount_t, int16_t *, svint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u32_x2)))
+void svstnt1_u32_x2(svcount_t, uint32_t *, svuint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f32_x2)))
+void svstnt1_f32_x2(svcount_t, float32_t *, svfloat32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s32_x2)))
+void svstnt1_s32_x2(svcount_t, int32_t *, svint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u8_x4)))
+void svstnt1_u8_x4(svcount_t, uint8_t *, svuint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s8_x4)))
+void svstnt1_s8_x4(svcount_t, int8_t *, svint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_mf8_x4)))
+void svstnt1_mf8_x4(svcount_t, mfloat8_t *, svmfloat8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u64_x4)))
+void svstnt1_u64_x4(svcount_t, uint64_t *, svuint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f64_x4)))
+void svstnt1_f64_x4(svcount_t, float64_t *, svfloat64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s64_x4)))
+void svstnt1_s64_x4(svcount_t, int64_t *, svint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u16_x4)))
+void svstnt1_u16_x4(svcount_t, uint16_t *, svuint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_bf16_x4)))
+void svstnt1_bf16_x4(svcount_t, bfloat16_t *, svbfloat16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f16_x4)))
+void svstnt1_f16_x4(svcount_t, float16_t *, svfloat16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s16_x4)))
+void svstnt1_s16_x4(svcount_t, int16_t *, svint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u32_x4)))
+void svstnt1_u32_x4(svcount_t, uint32_t *, svuint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f32_x4)))
+void svstnt1_f32_x4(svcount_t, float32_t *, svfloat32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s32_x4)))
+void svstnt1_s32_x4(svcount_t, int32_t *, svint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u8_x2)))
+void svstnt1_vnum_u8_x2(svcount_t, uint8_t *, int64_t, svuint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s8_x2)))
+void svstnt1_vnum_s8_x2(svcount_t, int8_t *, int64_t, svint8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_mf8_x2)))
+void svstnt1_vnum_mf8_x2(svcount_t, mfloat8_t *, int64_t, svmfloat8x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u64_x2)))
+void svstnt1_vnum_u64_x2(svcount_t, uint64_t *, int64_t, svuint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f64_x2)))
+void svstnt1_vnum_f64_x2(svcount_t, float64_t *, int64_t, svfloat64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s64_x2)))
+void svstnt1_vnum_s64_x2(svcount_t, int64_t *, int64_t, svint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u16_x2)))
+void svstnt1_vnum_u16_x2(svcount_t, uint16_t *, int64_t, svuint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_bf16_x2)))
+void svstnt1_vnum_bf16_x2(svcount_t, bfloat16_t *, int64_t, svbfloat16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f16_x2)))
+void svstnt1_vnum_f16_x2(svcount_t, float16_t *, int64_t, svfloat16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s16_x2)))
+void svstnt1_vnum_s16_x2(svcount_t, int16_t *, int64_t, svint16x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u32_x2)))
+void svstnt1_vnum_u32_x2(svcount_t, uint32_t *, int64_t, svuint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f32_x2)))
+void svstnt1_vnum_f32_x2(svcount_t, float32_t *, int64_t, svfloat32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s32_x2)))
+void svstnt1_vnum_s32_x2(svcount_t, int32_t *, int64_t, svint32x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u8_x4)))
+void svstnt1_vnum_u8_x4(svcount_t, uint8_t *, int64_t, svuint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s8_x4)))
+void svstnt1_vnum_s8_x4(svcount_t, int8_t *, int64_t, svint8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_mf8_x4)))
+void svstnt1_vnum_mf8_x4(svcount_t, mfloat8_t *, int64_t, svmfloat8x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u64_x4)))
+void svstnt1_vnum_u64_x4(svcount_t, uint64_t *, int64_t, svuint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f64_x4)))
+void svstnt1_vnum_f64_x4(svcount_t, float64_t *, int64_t, svfloat64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s64_x4)))
+void svstnt1_vnum_s64_x4(svcount_t, int64_t *, int64_t, svint64x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u16_x4)))
+void svstnt1_vnum_u16_x4(svcount_t, uint16_t *, int64_t, svuint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_bf16_x4)))
+void svstnt1_vnum_bf16_x4(svcount_t, bfloat16_t *, int64_t, svbfloat16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f16_x4)))
+void svstnt1_vnum_f16_x4(svcount_t, float16_t *, int64_t, svfloat16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s16_x4)))
+void svstnt1_vnum_s16_x4(svcount_t, int16_t *, int64_t, svint16x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u32_x4)))
+void svstnt1_vnum_u32_x4(svcount_t, uint32_t *, int64_t, svuint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f32_x4)))
+void svstnt1_vnum_f32_x4(svcount_t, float32_t *, int64_t, svfloat32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s32_x4)))
+void svstnt1_vnum_s32_x4(svcount_t, int32_t *, int64_t, svint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c8_s64)))
+svcount_t svwhilege_c8_s64(int64_t, int64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c32_s64)))
+svcount_t svwhilege_c32_s64(int64_t, int64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c64_s64)))
+svcount_t svwhilege_c64_s64(int64_t, int64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c16_s64)))
+svcount_t svwhilege_c16_s64(int64_t, int64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c8_u64)))
+svcount_t svwhilege_c8_u64(uint64_t, uint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c32_u64)))
+svcount_t svwhilege_c32_u64(uint64_t, uint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c64_u64)))
+svcount_t svwhilege_c64_u64(uint64_t, uint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c16_u64)))
+svcount_t svwhilege_c16_u64(uint64_t, uint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c8_s64)))
+svcount_t svwhilegt_c8_s64(int64_t, int64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c32_s64)))
+svcount_t svwhilegt_c32_s64(int64_t, int64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c64_s64)))
+svcount_t svwhilegt_c64_s64(int64_t, int64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c16_s64)))
+svcount_t svwhilegt_c16_s64(int64_t, int64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c8_u64)))
+svcount_t svwhilegt_c8_u64(uint64_t, uint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c32_u64)))
+svcount_t svwhilegt_c32_u64(uint64_t, uint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c64_u64)))
+svcount_t svwhilegt_c64_u64(uint64_t, uint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c16_u64)))
+svcount_t svwhilegt_c16_u64(uint64_t, uint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c8_s64)))
+svcount_t svwhilele_c8_s64(int64_t, int64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c32_s64)))
+svcount_t svwhilele_c32_s64(int64_t, int64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c64_s64)))
+svcount_t svwhilele_c64_s64(int64_t, int64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c16_s64)))
+svcount_t svwhilele_c16_s64(int64_t, int64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c8_u64)))
+svcount_t svwhilele_c8_u64(uint64_t, uint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c32_u64)))
+svcount_t svwhilele_c32_u64(uint64_t, uint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c64_u64)))
+svcount_t svwhilele_c64_u64(uint64_t, uint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c16_u64)))
+svcount_t svwhilele_c16_u64(uint64_t, uint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c8_u64)))
+svcount_t svwhilelt_c8_u64(uint64_t, uint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c32_u64)))
+svcount_t svwhilelt_c32_u64(uint64_t, uint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c64_u64)))
+svcount_t svwhilelt_c64_u64(uint64_t, uint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c16_u64)))
+svcount_t svwhilelt_c16_u64(uint64_t, uint64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c8_s64)))
+svcount_t svwhilelt_c8_s64(int64_t, int64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c32_s64)))
+svcount_t svwhilelt_c32_s64(int64_t, int64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c64_s64)))
+svcount_t svwhilelt_c64_s64(int64_t, int64_t, uint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c16_s64)))
+svcount_t svwhilelt_c16_s64(int64_t, int64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u8_x2)))
+svuint8x2_t svld1_x2(svcount_t, uint8_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s8_x2)))
+svint8x2_t svld1_x2(svcount_t, int8_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_mf8_x2)))
+svmfloat8x2_t svld1_x2(svcount_t, mfloat8_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u64_x2)))
+svuint64x2_t svld1_x2(svcount_t, uint64_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f64_x2)))
+svfloat64x2_t svld1_x2(svcount_t, float64_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s64_x2)))
+svint64x2_t svld1_x2(svcount_t, int64_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u16_x2)))
+svuint16x2_t svld1_x2(svcount_t, uint16_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_bf16_x2)))
+svbfloat16x2_t svld1_x2(svcount_t, bfloat16_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f16_x2)))
+svfloat16x2_t svld1_x2(svcount_t, float16_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s16_x2)))
+svint16x2_t svld1_x2(svcount_t, int16_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u32_x2)))
+svuint32x2_t svld1_x2(svcount_t, uint32_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f32_x2)))
+svfloat32x2_t svld1_x2(svcount_t, float32_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s32_x2)))
+svint32x2_t svld1_x2(svcount_t, int32_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u8_x4)))
+svuint8x4_t svld1_x4(svcount_t, uint8_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s8_x4)))
+svint8x4_t svld1_x4(svcount_t, int8_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_mf8_x4)))
+svmfloat8x4_t svld1_x4(svcount_t, mfloat8_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u64_x4)))
+svuint64x4_t svld1_x4(svcount_t, uint64_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f64_x4)))
+svfloat64x4_t svld1_x4(svcount_t, float64_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s64_x4)))
+svint64x4_t svld1_x4(svcount_t, int64_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u16_x4)))
+svuint16x4_t svld1_x4(svcount_t, uint16_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_bf16_x4)))
+svbfloat16x4_t svld1_x4(svcount_t, bfloat16_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f16_x4)))
+svfloat16x4_t svld1_x4(svcount_t, float16_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s16_x4)))
+svint16x4_t svld1_x4(svcount_t, int16_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u32_x4)))
+svuint32x4_t svld1_x4(svcount_t, uint32_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f32_x4)))
+svfloat32x4_t svld1_x4(svcount_t, float32_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s32_x4)))
+svint32x4_t svld1_x4(svcount_t, int32_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u8_x2)))
+svuint8x2_t svld1_vnum_x2(svcount_t, uint8_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s8_x2)))
+svint8x2_t svld1_vnum_x2(svcount_t, int8_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_mf8_x2)))
+svmfloat8x2_t svld1_vnum_x2(svcount_t, mfloat8_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u64_x2)))
+svuint64x2_t svld1_vnum_x2(svcount_t, uint64_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f64_x2)))
+svfloat64x2_t svld1_vnum_x2(svcount_t, float64_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s64_x2)))
+svint64x2_t svld1_vnum_x2(svcount_t, int64_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u16_x2)))
+svuint16x2_t svld1_vnum_x2(svcount_t, uint16_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_bf16_x2)))
+svbfloat16x2_t svld1_vnum_x2(svcount_t, bfloat16_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f16_x2)))
+svfloat16x2_t svld1_vnum_x2(svcount_t, float16_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s16_x2)))
+svint16x2_t svld1_vnum_x2(svcount_t, int16_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u32_x2)))
+svuint32x2_t svld1_vnum_x2(svcount_t, uint32_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f32_x2)))
+svfloat32x2_t svld1_vnum_x2(svcount_t, float32_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s32_x2)))
+svint32x2_t svld1_vnum_x2(svcount_t, int32_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u8_x4)))
+svuint8x4_t svld1_vnum_x4(svcount_t, uint8_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s8_x4)))
+svint8x4_t svld1_vnum_x4(svcount_t, int8_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_mf8_x4)))
+svmfloat8x4_t svld1_vnum_x4(svcount_t, mfloat8_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u64_x4)))
+svuint64x4_t svld1_vnum_x4(svcount_t, uint64_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f64_x4)))
+svfloat64x4_t svld1_vnum_x4(svcount_t, float64_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s64_x4)))
+svint64x4_t svld1_vnum_x4(svcount_t, int64_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u16_x4)))
+svuint16x4_t svld1_vnum_x4(svcount_t, uint16_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_bf16_x4)))
+svbfloat16x4_t svld1_vnum_x4(svcount_t, bfloat16_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f16_x4)))
+svfloat16x4_t svld1_vnum_x4(svcount_t, float16_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s16_x4)))
+svint16x4_t svld1_vnum_x4(svcount_t, int16_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u32_x4)))
+svuint32x4_t svld1_vnum_x4(svcount_t, uint32_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f32_x4)))
+svfloat32x4_t svld1_vnum_x4(svcount_t, float32_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s32_x4)))
+svint32x4_t svld1_vnum_x4(svcount_t, int32_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u8_x2)))
+svuint8x2_t svldnt1_x2(svcount_t, uint8_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s8_x2)))
+svint8x2_t svldnt1_x2(svcount_t, int8_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_mf8_x2)))
+svmfloat8x2_t svldnt1_x2(svcount_t, mfloat8_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u64_x2)))
+svuint64x2_t svldnt1_x2(svcount_t, uint64_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f64_x2)))
+svfloat64x2_t svldnt1_x2(svcount_t, float64_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s64_x2)))
+svint64x2_t svldnt1_x2(svcount_t, int64_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u16_x2)))
+svuint16x2_t svldnt1_x2(svcount_t, uint16_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_bf16_x2)))
+svbfloat16x2_t svldnt1_x2(svcount_t, bfloat16_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f16_x2)))
+svfloat16x2_t svldnt1_x2(svcount_t, float16_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s16_x2)))
+svint16x2_t svldnt1_x2(svcount_t, int16_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u32_x2)))
+svuint32x2_t svldnt1_x2(svcount_t, uint32_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f32_x2)))
+svfloat32x2_t svldnt1_x2(svcount_t, float32_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s32_x2)))
+svint32x2_t svldnt1_x2(svcount_t, int32_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u8_x4)))
+svuint8x4_t svldnt1_x4(svcount_t, uint8_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s8_x4)))
+svint8x4_t svldnt1_x4(svcount_t, int8_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_mf8_x4)))
+svmfloat8x4_t svldnt1_x4(svcount_t, mfloat8_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u64_x4)))
+svuint64x4_t svldnt1_x4(svcount_t, uint64_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f64_x4)))
+svfloat64x4_t svldnt1_x4(svcount_t, float64_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s64_x4)))
+svint64x4_t svldnt1_x4(svcount_t, int64_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u16_x4)))
+svuint16x4_t svldnt1_x4(svcount_t, uint16_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_bf16_x4)))
+svbfloat16x4_t svldnt1_x4(svcount_t, bfloat16_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f16_x4)))
+svfloat16x4_t svldnt1_x4(svcount_t, float16_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s16_x4)))
+svint16x4_t svldnt1_x4(svcount_t, int16_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u32_x4)))
+svuint32x4_t svldnt1_x4(svcount_t, uint32_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f32_x4)))
+svfloat32x4_t svldnt1_x4(svcount_t, float32_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s32_x4)))
+svint32x4_t svldnt1_x4(svcount_t, int32_t const *);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u8_x2)))
+svuint8x2_t svldnt1_vnum_x2(svcount_t, uint8_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s8_x2)))
+svint8x2_t svldnt1_vnum_x2(svcount_t, int8_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_mf8_x2)))
+svmfloat8x2_t svldnt1_vnum_x2(svcount_t, mfloat8_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u64_x2)))
+svuint64x2_t svldnt1_vnum_x2(svcount_t, uint64_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f64_x2)))
+svfloat64x2_t svldnt1_vnum_x2(svcount_t, float64_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s64_x2)))
+svint64x2_t svldnt1_vnum_x2(svcount_t, int64_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u16_x2)))
+svuint16x2_t svldnt1_vnum_x2(svcount_t, uint16_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_bf16_x2)))
+svbfloat16x2_t svldnt1_vnum_x2(svcount_t, bfloat16_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f16_x2)))
+svfloat16x2_t svldnt1_vnum_x2(svcount_t, float16_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s16_x2)))
+svint16x2_t svldnt1_vnum_x2(svcount_t, int16_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u32_x2)))
+svuint32x2_t svldnt1_vnum_x2(svcount_t, uint32_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f32_x2)))
+svfloat32x2_t svldnt1_vnum_x2(svcount_t, float32_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s32_x2)))
+svint32x2_t svldnt1_vnum_x2(svcount_t, int32_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u8_x4)))
+svuint8x4_t svldnt1_vnum_x4(svcount_t, uint8_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s8_x4)))
+svint8x4_t svldnt1_vnum_x4(svcount_t, int8_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_mf8_x4)))
+svmfloat8x4_t svldnt1_vnum_x4(svcount_t, mfloat8_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u64_x4)))
+svuint64x4_t svldnt1_vnum_x4(svcount_t, uint64_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f64_x4)))
+svfloat64x4_t svldnt1_vnum_x4(svcount_t, float64_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s64_x4)))
+svint64x4_t svldnt1_vnum_x4(svcount_t, int64_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u16_x4)))
+svuint16x4_t svldnt1_vnum_x4(svcount_t, uint16_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_bf16_x4)))
+svbfloat16x4_t svldnt1_vnum_x4(svcount_t, bfloat16_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f16_x4)))
+svfloat16x4_t svldnt1_vnum_x4(svcount_t, float16_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s16_x4)))
+svint16x4_t svldnt1_vnum_x4(svcount_t, int16_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u32_x4)))
+svuint32x4_t svldnt1_vnum_x4(svcount_t, uint32_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f32_x4)))
+svfloat32x4_t svldnt1_vnum_x4(svcount_t, float32_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s32_x4)))
+svint32x4_t svldnt1_vnum_x4(svcount_t, int32_t const *, int64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svreinterpret_b)))
+svbool_t svreinterpret(svcount_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svreinterpret_c)))
+svcount_t svreinterpret(svbool_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u8_x2)))
+void svst1(svcount_t, uint8_t *, svuint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s8_x2)))
+void svst1(svcount_t, int8_t *, svint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_mf8_x2)))
+void svst1(svcount_t, mfloat8_t *, svmfloat8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u64_x2)))
+void svst1(svcount_t, uint64_t *, svuint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f64_x2)))
+void svst1(svcount_t, float64_t *, svfloat64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s64_x2)))
+void svst1(svcount_t, int64_t *, svint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u16_x2)))
+void svst1(svcount_t, uint16_t *, svuint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_bf16_x2)))
+void svst1(svcount_t, bfloat16_t *, svbfloat16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f16_x2)))
+void svst1(svcount_t, float16_t *, svfloat16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s16_x2)))
+void svst1(svcount_t, int16_t *, svint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u32_x2)))
+void svst1(svcount_t, uint32_t *, svuint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f32_x2)))
+void svst1(svcount_t, float32_t *, svfloat32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s32_x2)))
+void svst1(svcount_t, int32_t *, svint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u8_x4)))
+void svst1(svcount_t, uint8_t *, svuint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s8_x4)))
+void svst1(svcount_t, int8_t *, svint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_mf8_x4)))
+void svst1(svcount_t, mfloat8_t *, svmfloat8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u64_x4)))
+void svst1(svcount_t, uint64_t *, svuint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f64_x4)))
+void svst1(svcount_t, float64_t *, svfloat64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s64_x4)))
+void svst1(svcount_t, int64_t *, svint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u16_x4)))
+void svst1(svcount_t, uint16_t *, svuint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_bf16_x4)))
+void svst1(svcount_t, bfloat16_t *, svbfloat16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f16_x4)))
+void svst1(svcount_t, float16_t *, svfloat16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s16_x4)))
+void svst1(svcount_t, int16_t *, svint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u32_x4)))
+void svst1(svcount_t, uint32_t *, svuint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f32_x4)))
+void svst1(svcount_t, float32_t *, svfloat32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s32_x4)))
+void svst1(svcount_t, int32_t *, svint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u8_x2)))
+void svst1_vnum(svcount_t, uint8_t *, int64_t, svuint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s8_x2)))
+void svst1_vnum(svcount_t, int8_t *, int64_t, svint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_mf8_x2)))
+void svst1_vnum(svcount_t, mfloat8_t *, int64_t, svmfloat8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u64_x2)))
+void svst1_vnum(svcount_t, uint64_t *, int64_t, svuint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f64_x2)))
+void svst1_vnum(svcount_t, float64_t *, int64_t, svfloat64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s64_x2)))
+void svst1_vnum(svcount_t, int64_t *, int64_t, svint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u16_x2)))
+void svst1_vnum(svcount_t, uint16_t *, int64_t, svuint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_bf16_x2)))
+void svst1_vnum(svcount_t, bfloat16_t *, int64_t, svbfloat16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f16_x2)))
+void svst1_vnum(svcount_t, float16_t *, int64_t, svfloat16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s16_x2)))
+void svst1_vnum(svcount_t, int16_t *, int64_t, svint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u32_x2)))
+void svst1_vnum(svcount_t, uint32_t *, int64_t, svuint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f32_x2)))
+void svst1_vnum(svcount_t, float32_t *, int64_t, svfloat32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s32_x2)))
+void svst1_vnum(svcount_t, int32_t *, int64_t, svint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u8_x4)))
+void svst1_vnum(svcount_t, uint8_t *, int64_t, svuint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s8_x4)))
+void svst1_vnum(svcount_t, int8_t *, int64_t, svint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_mf8_x4)))
+void svst1_vnum(svcount_t, mfloat8_t *, int64_t, svmfloat8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u64_x4)))
+void svst1_vnum(svcount_t, uint64_t *, int64_t, svuint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f64_x4)))
+void svst1_vnum(svcount_t, float64_t *, int64_t, svfloat64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s64_x4)))
+void svst1_vnum(svcount_t, int64_t *, int64_t, svint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u16_x4)))
+void svst1_vnum(svcount_t, uint16_t *, int64_t, svuint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_bf16_x4)))
+void svst1_vnum(svcount_t, bfloat16_t *, int64_t, svbfloat16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f16_x4)))
+void svst1_vnum(svcount_t, float16_t *, int64_t, svfloat16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s16_x4)))
+void svst1_vnum(svcount_t, int16_t *, int64_t, svint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u32_x4)))
+void svst1_vnum(svcount_t, uint32_t *, int64_t, svuint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f32_x4)))
+void svst1_vnum(svcount_t, float32_t *, int64_t, svfloat32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s32_x4)))
+void svst1_vnum(svcount_t, int32_t *, int64_t, svint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u8_x2)))
+void svstnt1(svcount_t, uint8_t *, svuint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s8_x2)))
+void svstnt1(svcount_t, int8_t *, svint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_mf8_x2)))
+void svstnt1(svcount_t, mfloat8_t *, svmfloat8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u64_x2)))
+void svstnt1(svcount_t, uint64_t *, svuint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f64_x2)))
+void svstnt1(svcount_t, float64_t *, svfloat64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s64_x2)))
+void svstnt1(svcount_t, int64_t *, svint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u16_x2)))
+void svstnt1(svcount_t, uint16_t *, svuint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_bf16_x2)))
+void svstnt1(svcount_t, bfloat16_t *, svbfloat16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f16_x2)))
+void svstnt1(svcount_t, float16_t *, svfloat16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s16_x2)))
+void svstnt1(svcount_t, int16_t *, svint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u32_x2)))
+void svstnt1(svcount_t, uint32_t *, svuint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f32_x2)))
+void svstnt1(svcount_t, float32_t *, svfloat32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s32_x2)))
+void svstnt1(svcount_t, int32_t *, svint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u8_x4)))
+void svstnt1(svcount_t, uint8_t *, svuint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s8_x4)))
+void svstnt1(svcount_t, int8_t *, svint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_mf8_x4)))
+void svstnt1(svcount_t, mfloat8_t *, svmfloat8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u64_x4)))
+void svstnt1(svcount_t, uint64_t *, svuint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f64_x4)))
+void svstnt1(svcount_t, float64_t *, svfloat64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s64_x4)))
+void svstnt1(svcount_t, int64_t *, svint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u16_x4)))
+void svstnt1(svcount_t, uint16_t *, svuint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_bf16_x4)))
+void svstnt1(svcount_t, bfloat16_t *, svbfloat16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f16_x4)))
+void svstnt1(svcount_t, float16_t *, svfloat16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s16_x4)))
+void svstnt1(svcount_t, int16_t *, svint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u32_x4)))
+void svstnt1(svcount_t, uint32_t *, svuint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f32_x4)))
+void svstnt1(svcount_t, float32_t *, svfloat32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s32_x4)))
+void svstnt1(svcount_t, int32_t *, svint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u8_x2)))
+void svstnt1_vnum(svcount_t, uint8_t *, int64_t, svuint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s8_x2)))
+void svstnt1_vnum(svcount_t, int8_t *, int64_t, svint8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_mf8_x2)))
+void svstnt1_vnum(svcount_t, mfloat8_t *, int64_t, svmfloat8x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u64_x2)))
+void svstnt1_vnum(svcount_t, uint64_t *, int64_t, svuint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f64_x2)))
+void svstnt1_vnum(svcount_t, float64_t *, int64_t, svfloat64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s64_x2)))
+void svstnt1_vnum(svcount_t, int64_t *, int64_t, svint64x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u16_x2)))
+void svstnt1_vnum(svcount_t, uint16_t *, int64_t, svuint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_bf16_x2)))
+void svstnt1_vnum(svcount_t, bfloat16_t *, int64_t, svbfloat16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f16_x2)))
+void svstnt1_vnum(svcount_t, float16_t *, int64_t, svfloat16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s16_x2)))
+void svstnt1_vnum(svcount_t, int16_t *, int64_t, svint16x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u32_x2)))
+void svstnt1_vnum(svcount_t, uint32_t *, int64_t, svuint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f32_x2)))
+void svstnt1_vnum(svcount_t, float32_t *, int64_t, svfloat32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s32_x2)))
+void svstnt1_vnum(svcount_t, int32_t *, int64_t, svint32x2_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u8_x4)))
+void svstnt1_vnum(svcount_t, uint8_t *, int64_t, svuint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s8_x4)))
+void svstnt1_vnum(svcount_t, int8_t *, int64_t, svint8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_mf8_x4)))
+void svstnt1_vnum(svcount_t, mfloat8_t *, int64_t, svmfloat8x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u64_x4)))
+void svstnt1_vnum(svcount_t, uint64_t *, int64_t, svuint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f64_x4)))
+void svstnt1_vnum(svcount_t, float64_t *, int64_t, svfloat64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s64_x4)))
+void svstnt1_vnum(svcount_t, int64_t *, int64_t, svint64x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u16_x4)))
+void svstnt1_vnum(svcount_t, uint16_t *, int64_t, svuint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_bf16_x4)))
+void svstnt1_vnum(svcount_t, bfloat16_t *, int64_t, svbfloat16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f16_x4)))
+void svstnt1_vnum(svcount_t, float16_t *, int64_t, svfloat16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s16_x4)))
+void svstnt1_vnum(svcount_t, int16_t *, int64_t, svint16x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u32_x4)))
+void svstnt1_vnum(svcount_t, uint32_t *, int64_t, svuint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f32_x4)))
+void svstnt1_vnum(svcount_t, float32_t *, int64_t, svfloat32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s32_x4)))
+void svstnt1_vnum(svcount_t, int32_t *, int64_t, svint32x4_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c8_s64)))
+svcount_t svwhilege_c8(int64_t, int64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c32_s64)))
+svcount_t svwhilege_c32(int64_t, int64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c64_s64)))
+svcount_t svwhilege_c64(int64_t, int64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c16_s64)))
+svcount_t svwhilege_c16(int64_t, int64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c8_u64)))
+svcount_t svwhilege_c8(uint64_t, uint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c32_u64)))
+svcount_t svwhilege_c32(uint64_t, uint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c64_u64)))
+svcount_t svwhilege_c64(uint64_t, uint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c16_u64)))
+svcount_t svwhilege_c16(uint64_t, uint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c8_s64)))
+svcount_t svwhilegt_c8(int64_t, int64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c32_s64)))
+svcount_t svwhilegt_c32(int64_t, int64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c64_s64)))
+svcount_t svwhilegt_c64(int64_t, int64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c16_s64)))
+svcount_t svwhilegt_c16(int64_t, int64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c8_u64)))
+svcount_t svwhilegt_c8(uint64_t, uint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c32_u64)))
+svcount_t svwhilegt_c32(uint64_t, uint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c64_u64)))
+svcount_t svwhilegt_c64(uint64_t, uint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c16_u64)))
+svcount_t svwhilegt_c16(uint64_t, uint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c8_s64)))
+svcount_t svwhilele_c8(int64_t, int64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c32_s64)))
+svcount_t svwhilele_c32(int64_t, int64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c64_s64)))
+svcount_t svwhilele_c64(int64_t, int64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c16_s64)))
+svcount_t svwhilele_c16(int64_t, int64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c8_u64)))
+svcount_t svwhilele_c8(uint64_t, uint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c32_u64)))
+svcount_t svwhilele_c32(uint64_t, uint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c64_u64)))
+svcount_t svwhilele_c64(uint64_t, uint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c16_u64)))
+svcount_t svwhilele_c16(uint64_t, uint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c8_u64)))
+svcount_t svwhilelt_c8(uint64_t, uint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c32_u64)))
+svcount_t svwhilelt_c32(uint64_t, uint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c64_u64)))
+svcount_t svwhilelt_c64(uint64_t, uint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c16_u64)))
+svcount_t svwhilelt_c16(uint64_t, uint64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c8_s64)))
+svcount_t svwhilelt_c8(int64_t, int64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c32_s64)))
+svcount_t svwhilelt_c32(int64_t, int64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c64_s64)))
+svcount_t svwhilelt_c64(int64_t, int64_t, uint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c16_s64)))
+svcount_t svwhilelt_c16(int64_t, int64_t, uint64_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f64_m)))
 svfloat64_t svabd_n_f64_m(svbool_t, svfloat64_t, float64_t);
 __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f32_m)))
@@ -31877,6 +32233,30 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpa_f32)))
 svfloat32_t svexpa(svuint32_t);
 __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpa_f16)))
 svfloat16_t svexpa(svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_u32)))
+svuint32_t svcompact_u32(svbool_t, svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_u64)))
+svuint64_t svcompact_u64(svbool_t, svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_f64)))
+svfloat64_t svcompact_f64(svbool_t, svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_f32)))
+svfloat32_t svcompact_f32(svbool_t, svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_s32)))
+svint32_t svcompact_s32(svbool_t, svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_s64)))
+svint64_t svcompact_s64(svbool_t, svint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_u32)))
+svuint32_t svcompact(svbool_t, svuint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_u64)))
+svuint64_t svcompact(svbool_t, svuint64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_f64)))
+svfloat64_t svcompact(svbool_t, svfloat64_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_f32)))
+svfloat32_t svcompact(svbool_t, svfloat32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_s32)))
+svint32_t svcompact(svbool_t, svint32_t);
+__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_s64)))
+svint64_t svcompact(svbool_t, svint64_t);
 #define svcvtnt_bf16_x      svcvtnt_bf16_m
 #define svcvtnt_bf16_f32_x  svcvtnt_bf16_f32_m
 #define svcvtnt_f16_x      svcvtnt_f16_m
diff --git a/lib/include/avx10_2_512bf16intrin.h b/lib/include/avx10_2_512bf16intrin.h
index 75290d22ef..3e9f27443e 100644
--- a/lib/include/avx10_2_512bf16intrin.h
+++ b/lib/include/avx10_2_512bf16intrin.h
@@ -21,9 +21,15 @@ typedef __bf16 __m512bh_u __attribute__((__vector_size__(64), __aligned__(1)));
 
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS512                                                  \
-  __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-512"),    \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx10.2"),        \
                  __min_vector_width__(512)))
 
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 constexpr
+#else
+#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512
+#endif
+
 static __inline __m512bh __DEFAULT_FN_ATTRS512 _mm512_setzero_pbh(void) {
   return __builtin_bit_cast(__m512bh, _mm512_setzero_ps());
 }
@@ -167,13 +173,13 @@ _mm512_mask_blend_pbh(__mmask32 __U, __m512bh __A, __m512bh __W) {
                                                 (__v32bf)__A);
 }
 
-static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_permutex2var_pbh(__m512bh __A, __m512i __I, __m512bh __B) {
   return (__m512bh)__builtin_ia32_vpermi2varhi512((__v32hi)__A, (__v32hi)__I,
                                                   (__v32hi)__B);
 }
 
-static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_permutexvar_pbh(__m512i __A, __m512bh __B) {
   return (__m512bh)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A);
 }
@@ -423,7 +429,7 @@ _mm512_maskz_rsqrt_pbh(__mmask32 __U, __m512bh __A) {
       (__v32bf)_mm512_setzero_pbh(), (__mmask32)(__U)))
 
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_sqrt_pbh(__m512bh __A) {
-  return (__m512bh)__builtin_ia32_vsqrtbf16512((__v32bf)__A);
+  return __builtin_elementwise_sqrt(__A);
 }
 
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
@@ -441,8 +447,8 @@ _mm512_maskz_sqrt_pbh(__mmask32 __U, __m512bh __A) {
 
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_fmadd_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
-  return (__m512bh)__builtin_ia32_vfmaddbf16512((__v32bf)__A, (__v32bf)__B,
-                                                (__v32bf)__C);
+  return (__m512bh)__builtin_elementwise_fma((__v32bf)__A, (__v32bf)__B,
+                                             (__v32bf)__C);
 }
 
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
@@ -469,8 +475,8 @@ static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fmadd_pbh(
 
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_fmsub_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
-  return (__m512bh)__builtin_ia32_vfmaddbf16512((__v32bf)__A, (__v32bf)__B,
-                                                -(__v32bf)__C);
+  return (__m512bh)__builtin_elementwise_fma((__v32bf)__A, (__v32bf)__B,
+                                             -(__v32bf)__C);
 }
 
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
@@ -497,8 +503,8 @@ static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsub_pbh(
 
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_fnmadd_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
-  return (__m512bh)__builtin_ia32_vfmaddbf16512((__v32bf)__A, -(__v32bf)__B,
-                                                (__v32bf)__C);
+  return (__m512bh)__builtin_elementwise_fma((__v32bf)__A, -(__v32bf)__B,
+                                             (__v32bf)__C);
 }
 
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fnmadd_pbh(
@@ -527,8 +533,8 @@ static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmadd_pbh(
 
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_fnmsub_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
-  return (__m512bh)__builtin_ia32_vfmaddbf16512((__v32bf)__A, -(__v32bf)__B,
-                                                -(__v32bf)__C);
+  return (__m512bh)__builtin_elementwise_fma((__v32bf)__A, -(__v32bf)__B,
+                                             -(__v32bf)__C);
 }
 
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fnmsub_pbh(
@@ -555,6 +561,7 @@ static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmsub_pbh(
       (__v32bf)_mm512_setzero_pbh());
 }
 
+#undef __DEFAULT_FN_ATTRS512_CONSTEXPR
 #undef __DEFAULT_FN_ATTRS512
 
 #endif
diff --git a/lib/include/avx10_2_512convertintrin.h b/lib/include/avx10_2_512convertintrin.h
index ee8cbf28ca..ffaed08cee 100644
--- a/lib/include/avx10_2_512convertintrin.h
+++ b/lib/include/avx10_2_512convertintrin.h
@@ -18,7 +18,7 @@
 
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS512                                                  \
-  __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-512"),    \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx10.2"),        \
                  __min_vector_width__(512)))
 
 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_cvtx2ps_ph(__m512 __A,
diff --git a/lib/include/avx10_2_512niintrin.h b/lib/include/avx10_2_512niintrin.h
index 7e614f7740..b2215b72c5 100644
--- a/lib/include/avx10_2_512niintrin.h
+++ b/lib/include/avx10_2_512niintrin.h
@@ -17,7 +17,7 @@
 #define __AVX10_2_512NIINTRIN_H
 
 #define __DEFAULT_FN_ATTRS                                                     \
-  __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-512"),    \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx10.2"),        \
                  __min_vector_width__(512)))
 
 /* VNNI FP16 */
@@ -64,8 +64,8 @@ static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_dpph_ps(__mmask16 __U,
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbssd_epi32(__m512i __W,
                                                                  __m512i __A,
                                                                  __m512i __B) {
-  return (__m512i)__builtin_ia32_vpdpbssd512((__v16si)__W, (__v16si)__A,
-                                             (__v16si)__B);
+  return (__m512i)__builtin_ia32_vpdpbssd512((__v16si)__W, (__v64qi)__A,
+                                             (__v64qi)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -84,8 +84,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbssd_epi32(
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbssds_epi32(__m512i __W,
                                                                   __m512i __A,
                                                                   __m512i __B) {
-  return (__m512i)__builtin_ia32_vpdpbssds512((__v16si)__W, (__v16si)__A,
-                                              (__v16si)__B);
+  return (__m512i)__builtin_ia32_vpdpbssds512((__v16si)__W, (__v64qi)__A,
+                                              (__v64qi)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpbssds_epi32(
@@ -104,8 +104,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbssds_epi32(
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbsud_epi32(__m512i __W,
                                                                  __m512i __A,
                                                                  __m512i __B) {
-  return (__m512i)__builtin_ia32_vpdpbsud512((__v16si)__W, (__v16si)__A,
-                                             (__v16si)__B);
+  return (__m512i)__builtin_ia32_vpdpbsud512((__v16si)__W, (__v64qi)__A,
+                                             (__v64qu)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -124,8 +124,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbsud_epi32(
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbsuds_epi32(__m512i __W,
                                                                   __m512i __A,
                                                                   __m512i __B) {
-  return (__m512i)__builtin_ia32_vpdpbsuds512((__v16si)__W, (__v16si)__A,
-                                              (__v16si)__B);
+  return (__m512i)__builtin_ia32_vpdpbsuds512((__v16si)__W, (__v64qi)__A,
+                                              (__v64qu)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpbsuds_epi32(
@@ -144,8 +144,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbsuds_epi32(
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbuud_epi32(__m512i __W,
                                                                  __m512i __A,
                                                                  __m512i __B) {
-  return (__m512i)__builtin_ia32_vpdpbuud512((__v16si)__W, (__v16si)__A,
-                                             (__v16si)__B);
+  return (__m512i)__builtin_ia32_vpdpbuud512((__v16si)__W, (__v64qu)__A,
+                                             (__v64qu)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -164,8 +164,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbuud_epi32(
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbuuds_epi32(__m512i __W,
                                                                   __m512i __A,
                                                                   __m512i __B) {
-  return (__m512i)__builtin_ia32_vpdpbuuds512((__v16si)__W, (__v16si)__A,
-                                              (__v16si)__B);
+  return (__m512i)__builtin_ia32_vpdpbuuds512((__v16si)__W, (__v64qu)__A,
+                                              (__v64qu)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpbuuds_epi32(
@@ -185,8 +185,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbuuds_epi32(
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwsud_epi32(__m512i __A,
                                                                  __m512i __B,
                                                                  __m512i __C) {
-  return (__m512i)__builtin_ia32_vpdpwsud512((__v16si)__A, (__v16si)__B,
-                                             (__v16si)__C);
+  return (__m512i)__builtin_ia32_vpdpwsud512((__v16si)__A, (__v32hi)__B,
+                                             (__v32hu)__C);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -197,7 +197,7 @@ _mm512_mask_dpwsud_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) {
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwsud_epi32(
-    __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) {
+    __mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
   return (__m512i)__builtin_ia32_selectd_512(
       (__mmask16)__U, (__v16si)_mm512_dpwsud_epi32(__A, __B, __C),
       (__v16si)_mm512_setzero_si512());
@@ -206,8 +206,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwsud_epi32(
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwsuds_epi32(__m512i __A,
                                                                   __m512i __B,
                                                                   __m512i __C) {
-  return (__m512i)__builtin_ia32_vpdpwsuds512((__v16si)__A, (__v16si)__B,
-                                              (__v16si)__C);
+  return (__m512i)__builtin_ia32_vpdpwsuds512((__v16si)__A, (__v32hi)__B,
+                                              (__v32hu)__C);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpwsuds_epi32(
@@ -218,7 +218,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpwsuds_epi32(
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwsuds_epi32(
-    __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) {
+    __mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
   return (__m512i)__builtin_ia32_selectd_512(
       (__mmask16)__U, (__v16si)_mm512_dpwsuds_epi32(__A, __B, __C),
       (__v16si)_mm512_setzero_si512());
@@ -227,8 +227,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwsuds_epi32(
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwusd_epi32(__m512i __A,
                                                                  __m512i __B,
                                                                  __m512i __C) {
-  return (__m512i)__builtin_ia32_vpdpwusd512((__v16si)__A, (__v16si)__B,
-                                             (__v16si)__C);
+  return (__m512i)__builtin_ia32_vpdpwusd512((__v16si)__A, (__v32hu)__B,
+                                             (__v32hi)__C);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -239,7 +239,7 @@ _mm512_mask_dpwusd_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) {
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwusd_epi32(
-    __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) {
+    __mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
   return (__m512i)__builtin_ia32_selectd_512(
       (__mmask16)__U, (__v16si)_mm512_dpwusd_epi32(__A, __B, __C),
       (__v16si)_mm512_setzero_si512());
@@ -248,8 +248,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwusd_epi32(
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwusds_epi32(__m512i __A,
                                                                   __m512i __B,
                                                                   __m512i __C) {
-  return (__m512i)__builtin_ia32_vpdpwusds512((__v16si)__A, (__v16si)__B,
-                                              (__v16si)__C);
+  return (__m512i)__builtin_ia32_vpdpwusds512((__v16si)__A, (__v32hu)__B,
+                                              (__v32hi)__C);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpwusds_epi32(
@@ -260,7 +260,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpwusds_epi32(
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwusds_epi32(
-    __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) {
+    __mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
   return (__m512i)__builtin_ia32_selectd_512(
       (__mmask16)__U, (__v16si)_mm512_dpwusds_epi32(__A, __B, __C),
       (__v16si)_mm512_setzero_si512());
@@ -269,8 +269,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwusds_epi32(
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwuud_epi32(__m512i __A,
                                                                  __m512i __B,
                                                                  __m512i __C) {
-  return (__m512i)__builtin_ia32_vpdpwuud512((__v16si)__A, (__v16si)__B,
-                                             (__v16si)__C);
+  return (__m512i)__builtin_ia32_vpdpwuud512((__v16si)__A, (__v32hu)__B,
+                                             (__v32hu)__C);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -281,7 +281,7 @@ _mm512_mask_dpwuud_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) {
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwuud_epi32(
-    __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) {
+    __mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
   return (__m512i)__builtin_ia32_selectd_512(
       (__mmask16)__U, (__v16si)_mm512_dpwuud_epi32(__A, __B, __C),
       (__v16si)_mm512_setzero_si512());
@@ -290,8 +290,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwuud_epi32(
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwuuds_epi32(__m512i __A,
                                                                   __m512i __B,
                                                                   __m512i __C) {
-  return (__m512i)__builtin_ia32_vpdpwuuds512((__v16si)__A, (__v16si)__B,
-                                              (__v16si)__C);
+  return (__m512i)__builtin_ia32_vpdpwuuds512((__v16si)__A, (__v32hu)__B,
+                                              (__v32hu)__C);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpwuuds_epi32(
@@ -302,7 +302,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpwuuds_epi32(
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwuuds_epi32(
-    __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) {
+    __mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
   return (__m512i)__builtin_ia32_selectd_512(
       (__mmask16)__U, (__v16si)_mm512_dpwuuds_epi32(__A, __B, __C),
       (__v16si)_mm512_setzero_si512());
diff --git a/lib/include/avx10_2_512satcvtdsintrin.h b/lib/include/avx10_2_512satcvtdsintrin.h
index 012a6282b5..3688f4c0df 100644
--- a/lib/include/avx10_2_512satcvtdsintrin.h
+++ b/lib/include/avx10_2_512satcvtdsintrin.h
@@ -16,7 +16,7 @@
 
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS                                                     \
-  __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-512"),    \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx10.2"),        \
                  __min_vector_width__(512)))
 
 // 512 bit : Double -> Int
diff --git a/lib/include/avx10_2bf16intrin.h b/lib/include/avx10_2bf16intrin.h
index 66797ae00f..179ec53402 100644
--- a/lib/include/avx10_2bf16intrin.h
+++ b/lib/include/avx10_2bf16intrin.h
@@ -21,12 +21,20 @@ typedef __bf16 __m256bh_u __attribute__((__vector_size__(32), __aligned__(1)));
 
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS256                                                  \
-  __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"),    \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx10.2"),        \
                  __min_vector_width__(256)))
 #define __DEFAULT_FN_ATTRS128                                                  \
-  __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"),    \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx10.2"),        \
                  __min_vector_width__(128)))
 
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
+#else
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
+#endif
+
 static __inline __m256bh __DEFAULT_FN_ATTRS256 _mm256_setzero_pbh(void) {
   return __builtin_bit_cast(__m256bh, _mm256_setzero_ps());
 }
@@ -213,12 +221,12 @@ static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_move_sbh(__m128bh __a,
   return __a;
 }
 
-static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_move_sbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) {
   return __builtin_ia32_selectsbf_128(__U, _mm_move_sbh(__A, __B), __W);
 }
 
-static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_move_sbh(__mmask8 __U, __m128bh __A, __m128bh __B) {
   return __builtin_ia32_selectsbf_128(__U, _mm_move_sbh(__A, __B),
                                       _mm_setzero_pbh());
@@ -287,24 +295,24 @@ _mm256_mask_blend_pbh(__mmask16 __U, __m256bh __A, __m256bh __W) {
                                                 (__v16bf)__A);
 }
 
-static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_permutex2var_pbh(__m128bh __A, __m128i __I, __m128bh __B) {
   return (__m128bh)__builtin_ia32_vpermi2varhi128((__v8hi)__A, (__v8hi)__I,
                                                   (__v8hi)__B);
 }
 
-static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_permutex2var_pbh(__m256bh __A, __m256i __I, __m256bh __B) {
   return (__m256bh)__builtin_ia32_vpermi2varhi256((__v16hi)__A, (__v16hi)__I,
                                                   (__v16hi)__B);
 }
 
-static __inline__ __m128bh __DEFAULT_FN_ATTRS128
+static __inline__ __m128bh __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_permutexvar_pbh(__m128i __A, __m128bh __B) {
   return (__m128bh)__builtin_ia32_permvarhi128((__v8hi)__B, (__v8hi)__A);
 }
 
-static __inline__ __m256bh __DEFAULT_FN_ATTRS256
+static __inline__ __m256bh __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_permutexvar_pbh(__m256i __A, __m256bh __B) {
   return (__m256bh)__builtin_ia32_permvarhi256((__v16hi)__B, (__v16hi)__A);
 }
@@ -519,34 +527,34 @@ _mm_maskz_min_pbh(__mmask8 __U, __m128bh __A, __m128bh __B) {
       (__mmask8)__U, (__v8bf)_mm_min_pbh(__A, __B), (__v8bf)_mm_setzero_pbh());
 }
 
-static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comieq_sbh(__m128bh A,
-                                                           __m128bh B) {
-  return __builtin_ia32_vcomisbf16eq((__v8bf)A, (__v8bf)B);
+static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comieq_sbh(__m128bh __A,
+                                                           __m128bh __B) {
+  return __builtin_ia32_vcomisbf16eq((__v8bf)__A, (__v8bf)__B);
 }
 
-static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comilt_sbh(__m128bh A,
-                                                           __m128bh B) {
-  return __builtin_ia32_vcomisbf16lt((__v8bf)A, (__v8bf)B);
+static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comilt_sbh(__m128bh __A,
+                                                           __m128bh __B) {
+  return __builtin_ia32_vcomisbf16lt((__v8bf)__A, (__v8bf)__B);
 }
 
-static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comile_sbh(__m128bh A,
-                                                           __m128bh B) {
-  return __builtin_ia32_vcomisbf16le((__v8bf)A, (__v8bf)B);
+static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comile_sbh(__m128bh __A,
+                                                           __m128bh __B) {
+  return __builtin_ia32_vcomisbf16le((__v8bf)__A, (__v8bf)__B);
 }
 
-static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comigt_sbh(__m128bh A,
-                                                           __m128bh B) {
-  return __builtin_ia32_vcomisbf16gt((__v8bf)A, (__v8bf)B);
+static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comigt_sbh(__m128bh __A,
+                                                           __m128bh __B) {
+  return __builtin_ia32_vcomisbf16gt((__v8bf)__A, (__v8bf)__B);
 }
 
-static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comige_sbh(__m128bh A,
-                                                           __m128bh B) {
-  return __builtin_ia32_vcomisbf16ge((__v8bf)A, (__v8bf)B);
+static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comige_sbh(__m128bh __A,
+                                                           __m128bh __B) {
+  return __builtin_ia32_vcomisbf16ge((__v8bf)__A, (__v8bf)__B);
 }
 
-static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comineq_sbh(__m128bh A,
-                                                            __m128bh B) {
-  return __builtin_ia32_vcomisbf16neq((__v8bf)A, (__v8bf)B);
+static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comineq_sbh(__m128bh __A,
+                                                            __m128bh __B) {
+  return __builtin_ia32_vcomisbf16neq((__v8bf)__A, (__v8bf)__B);
 }
 
 #define _mm256_cmp_pbh_mask(__A, __B, __P)                                     \
@@ -818,7 +826,7 @@ _mm_maskz_rsqrt_pbh(__mmask8 __U, __m128bh __A) {
       (__v8bf)_mm_setzero_pbh(), (__mmask8)(__U)))
 
 static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_sqrt_pbh(__m256bh __A) {
-  return (__m256bh)__builtin_ia32_vsqrtbf16256((__v16bf)__A);
+  return __builtin_elementwise_sqrt(__A);
 }
 
 static __inline__ __m256bh __DEFAULT_FN_ATTRS256
@@ -835,7 +843,7 @@ _mm256_maskz_sqrt_pbh(__mmask16 __U, __m256bh __A) {
 }
 
 static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_sqrt_pbh(__m128bh __A) {
-  return (__m128bh)__builtin_ia32_vsqrtbf16((__v8bf)__A);
+  return __builtin_elementwise_sqrt(__A);
 }
 
 static __inline__ __m128bh __DEFAULT_FN_ATTRS128
@@ -852,8 +860,8 @@ _mm_maskz_sqrt_pbh(__mmask8 __U, __m128bh __A) {
 
 static __inline__ __m256bh __DEFAULT_FN_ATTRS256
 _mm256_fmadd_pbh(__m256bh __A, __m256bh __B, __m256bh __C) {
-  return (__m256bh)__builtin_ia32_vfmaddbf16256((__v16bf)__A, (__v16bf)__B,
-                                                (__v16bf)__C);
+  return (__m256bh)__builtin_elementwise_fma((__v16bf)__A, (__v16bf)__B,
+                                             (__v16bf)__C);
 }
 
 static __inline__ __m256bh __DEFAULT_FN_ATTRS256
@@ -880,8 +888,8 @@ static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fmadd_pbh(
 
 static __inline__ __m256bh __DEFAULT_FN_ATTRS256
 _mm256_fmsub_pbh(__m256bh __A, __m256bh __B, __m256bh __C) {
-  return (__m256bh)__builtin_ia32_vfmaddbf16256((__v16bf)__A, (__v16bf)__B,
-                                                -(__v16bf)__C);
+  return (__m256bh)__builtin_elementwise_fma((__v16bf)__A, (__v16bf)__B,
+                                             -(__v16bf)__C);
 }
 
 static __inline__ __m256bh __DEFAULT_FN_ATTRS256
@@ -908,8 +916,8 @@ static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsub_pbh(
 
 static __inline__ __m256bh __DEFAULT_FN_ATTRS256
 _mm256_fnmadd_pbh(__m256bh __A, __m256bh __B, __m256bh __C) {
-  return (__m256bh)__builtin_ia32_vfmaddbf16256((__v16bf)__A, -(__v16bf)__B,
-                                                (__v16bf)__C);
+  return (__m256bh)__builtin_elementwise_fma((__v16bf)__A, -(__v16bf)__B,
+                                             (__v16bf)__C);
 }
 
 static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask_fnmadd_pbh(
@@ -938,8 +946,8 @@ static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmadd_pbh(
 
 static __inline__ __m256bh __DEFAULT_FN_ATTRS256
 _mm256_fnmsub_pbh(__m256bh __A, __m256bh __B, __m256bh __C) {
-  return (__m256bh)__builtin_ia32_vfmaddbf16256((__v16bf)__A, -(__v16bf)__B,
-                                                -(__v16bf)__C);
+  return (__m256bh)__builtin_elementwise_fma((__v16bf)__A, -(__v16bf)__B,
+                                             -(__v16bf)__C);
 }
 
 static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask_fnmsub_pbh(
@@ -969,8 +977,8 @@ static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmsub_pbh(
 static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fmadd_pbh(__m128bh __A,
                                                                __m128bh __B,
                                                                __m128bh __C) {
-  return (__m128bh)__builtin_ia32_vfmaddbf16128((__v8bf)__A, (__v8bf)__B,
-                                                (__v8bf)__C);
+  return (__m128bh)__builtin_elementwise_fma((__v8bf)__A, (__v8bf)__B,
+                                             (__v8bf)__C);
 }
 
 static __inline__ __m128bh __DEFAULT_FN_ATTRS128
@@ -997,8 +1005,8 @@ _mm_maskz_fmadd_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) {
 static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fmsub_pbh(__m128bh __A,
                                                                __m128bh __B,
                                                                __m128bh __C) {
-  return (__m128bh)__builtin_ia32_vfmaddbf16128((__v8bf)__A, (__v8bf)__B,
-                                                -(__v8bf)__C);
+  return (__m128bh)__builtin_elementwise_fma((__v8bf)__A, (__v8bf)__B,
+                                             -(__v8bf)__C);
 }
 
 static __inline__ __m128bh __DEFAULT_FN_ATTRS128
@@ -1025,8 +1033,8 @@ _mm_maskz_fmsub_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) {
 static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fnmadd_pbh(__m128bh __A,
                                                                 __m128bh __B,
                                                                 __m128bh __C) {
-  return (__m128bh)__builtin_ia32_vfmaddbf16128((__v8bf)__A, -(__v8bf)__B,
-                                                (__v8bf)__C);
+  return (__m128bh)__builtin_elementwise_fma((__v8bf)__A, -(__v8bf)__B,
+                                             (__v8bf)__C);
 }
 
 static __inline__ __m128bh __DEFAULT_FN_ATTRS128
@@ -1053,8 +1061,8 @@ _mm_maskz_fnmadd_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) {
 static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fnmsub_pbh(__m128bh __A,
                                                                 __m128bh __B,
                                                                 __m128bh __C) {
-  return (__m128bh)__builtin_ia32_vfmaddbf16128((__v8bf)__A, -(__v8bf)__B,
-                                                -(__v8bf)__C);
+  return (__m128bh)__builtin_elementwise_fma((__v8bf)__A, -(__v8bf)__B,
+                                             -(__v8bf)__C);
 }
 
 static __inline__ __m128bh __DEFAULT_FN_ATTRS128
@@ -1080,6 +1088,7 @@ _mm_maskz_fnmsub_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) {
 
 #undef __DEFAULT_FN_ATTRS128
 #undef __DEFAULT_FN_ATTRS256
-
+#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
+#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
 #endif
 #endif
diff --git a/lib/include/avx10_2convertintrin.h b/lib/include/avx10_2convertintrin.h
index 19d91d41f7..2800ee7311 100644
--- a/lib/include/avx10_2convertintrin.h
+++ b/lib/include/avx10_2convertintrin.h
@@ -18,10 +18,10 @@
 
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS128                                                  \
-  __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"),    \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx10.2"),        \
                  __min_vector_width__(128)))
 #define __DEFAULT_FN_ATTRS256                                                  \
-  __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"),    \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx10.2"),        \
                  __min_vector_width__(256)))
 
 // clang-format off
diff --git a/lib/include/avx10_2copyintrin.h b/lib/include/avx10_2copyintrin.h
index 76b8f8ced5..37dc06ac9e 100644
--- a/lib/include/avx10_2copyintrin.h
+++ b/lib/include/avx10_2copyintrin.h
@@ -16,7 +16,7 @@
 
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS128                                                  \
-  __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"),    \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx10.2"),        \
                  __min_vector_width__(128)))
 
 /// Constructs a 128-bit integer vector, setting the lower 32 bits to the
diff --git a/lib/include/avx10_2niintrin.h b/lib/include/avx10_2niintrin.h
index 992be18f77..9a772ec434 100644
--- a/lib/include/avx10_2niintrin.h
+++ b/lib/include/avx10_2niintrin.h
@@ -16,10 +16,10 @@
 #define __AVX10_2NIINTRIN_H
 
 #define __DEFAULT_FN_ATTRS128                                                  \
-  __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"),    \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx10.2"),        \
                  __min_vector_width__(128)))
 #define __DEFAULT_FN_ATTRS256                                                  \
-  __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"),    \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx10.2"),        \
                  __min_vector_width__(256)))
 
 /* VNNI FP16 */
@@ -253,7 +253,7 @@ _mm_mask_dpwsud_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_dpwsud_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
+_mm_maskz_dpwsud_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
   return (__m128i)__builtin_ia32_selectd_128(
       (__mmask8)__U, (__v4si)_mm_dpwsud_epi32(__A, __B, __C),
       (__v4si)_mm_setzero_si128());
@@ -266,7 +266,7 @@ _mm256_mask_dpwsud_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_dpwsud_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
+_mm256_maskz_dpwsud_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
   return (__m256i)__builtin_ia32_selectd_256(
       (__mmask8)__U, (__v8si)_mm256_dpwsud_epi32(__A, __B, __C),
       (__v8si)_mm256_setzero_si256());
@@ -279,7 +279,7 @@ _mm_mask_dpwsuds_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_dpwsuds_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
+_mm_maskz_dpwsuds_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
   return (__m128i)__builtin_ia32_selectd_128(
       (__mmask8)__U, (__v4si)_mm_dpwsuds_epi32(__A, __B, __C),
       (__v4si)_mm_setzero_si128());
@@ -292,7 +292,7 @@ _mm256_mask_dpwsuds_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_dpwsuds_epi32(
-    __m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
+    __mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
   return (__m256i)__builtin_ia32_selectd_256(
       (__mmask8)__U, (__v8si)_mm256_dpwsuds_epi32(__A, __B, __C),
       (__v8si)_mm256_setzero_si256());
@@ -305,7 +305,7 @@ _mm_mask_dpwusd_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_dpwusd_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
+_mm_maskz_dpwusd_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
   return (__m128i)__builtin_ia32_selectd_128(
       (__mmask8)__U, (__v4si)_mm_dpwusd_epi32(__A, __B, __C),
       (__v4si)_mm_setzero_si128());
@@ -318,7 +318,7 @@ _mm256_mask_dpwusd_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_dpwusd_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
+_mm256_maskz_dpwusd_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
   return (__m256i)__builtin_ia32_selectd_256(
       (__mmask8)__U, (__v8si)_mm256_dpwusd_epi32(__A, __B, __C),
       (__v8si)_mm256_setzero_si256());
@@ -331,7 +331,7 @@ _mm_mask_dpwusds_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_dpwusds_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
+_mm_maskz_dpwusds_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
   return (__m128i)__builtin_ia32_selectd_128(
       (__mmask8)__U, (__v4si)_mm_dpwusds_epi32(__A, __B, __C),
       (__v4si)_mm_setzero_si128());
@@ -344,7 +344,7 @@ _mm256_mask_dpwusds_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_dpwusds_epi32(
-    __m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
+    __mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
   return (__m256i)__builtin_ia32_selectd_256(
       (__mmask8)__U, (__v8si)_mm256_dpwusds_epi32(__A, __B, __C),
       (__v8si)_mm256_setzero_si256());
@@ -357,7 +357,7 @@ _mm_mask_dpwuud_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_dpwuud_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
+_mm_maskz_dpwuud_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
   return (__m128i)__builtin_ia32_selectd_128(
       (__mmask8)__U, (__v4si)_mm_dpwuud_epi32(__A, __B, __C),
       (__v4si)_mm_setzero_si128());
@@ -370,7 +370,7 @@ _mm256_mask_dpwuud_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_dpwuud_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
+_mm256_maskz_dpwuud_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
   return (__m256i)__builtin_ia32_selectd_256(
       (__mmask8)__U, (__v8si)_mm256_dpwuud_epi32(__A, __B, __C),
       (__v8si)_mm256_setzero_si256());
@@ -383,7 +383,7 @@ _mm_mask_dpwuuds_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_dpwuuds_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
+_mm_maskz_dpwuuds_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
   return (__m128i)__builtin_ia32_selectd_128(
       (__mmask8)__U, (__v4si)_mm_dpwuuds_epi32(__A, __B, __C),
       (__v4si)_mm_setzero_si128());
@@ -396,7 +396,7 @@ _mm256_mask_dpwuuds_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_dpwuuds_epi32(
-    __m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
+    __mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
   return (__m256i)__builtin_ia32_selectd_256(
       (__mmask8)__U, (__v8si)_mm256_dpwuuds_epi32(__A, __B, __C),
       (__v8si)_mm256_setzero_si256());
diff --git a/lib/include/avx10_2satcvtdsintrin.h b/lib/include/avx10_2satcvtdsintrin.h
index cc840368c3..57d299606a 100644
--- a/lib/include/avx10_2satcvtdsintrin.h
+++ b/lib/include/avx10_2satcvtdsintrin.h
@@ -17,11 +17,11 @@
 
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS256                                                  \
-  __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"),    \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx10.2"),        \
                  __min_vector_width__(256)))
 
 #define __DEFAULT_FN_ATTRS128                                                  \
-  __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"),    \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx10.2"),        \
                  __min_vector_width__(128)))
 
 #define _mm_cvtts_roundsd_i32(__A, __R)                                        \
diff --git a/lib/include/avx2intrin.h b/lib/include/avx2intrin.h
index dc9fc07314..d3ceb2327a 100644
--- a/lib/include/avx2intrin.h
+++ b/lib/include/avx2intrin.h
@@ -15,20 +15,19 @@
 #define __AVX2INTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#if defined(__EVEX512__) && !defined(__AVX10_1_512__)
-#define __DEFAULT_FN_ATTRS256                                                  \
-  __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx2,no-evex512"), __min_vector_width__(256)))
-#define __DEFAULT_FN_ATTRS128                                                  \
-  __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx2,no-evex512"), __min_vector_width__(128)))
-#else
 #define __DEFAULT_FN_ATTRS256                                                  \
   __attribute__((__always_inline__, __nodebug__, __target__("avx2"),           \
                  __min_vector_width__(256)))
 #define __DEFAULT_FN_ATTRS128                                                  \
   __attribute__((__always_inline__, __nodebug__, __target__("avx2"),           \
                  __min_vector_width__(128)))
+
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
+#else
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
 #endif
 
 /* SSE4 Multiple Packed Sums of Absolute Difference.  */
@@ -104,10 +103,9 @@
 /// \param __a
 ///    A 256-bit integer vector.
 /// \returns A 256-bit integer vector containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_abs_epi8(__m256i __a)
-{
-    return (__m256i)__builtin_elementwise_abs((__v32qs)__a);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_abs_epi8(__m256i __a) {
+  return (__m256i)__builtin_elementwise_abs((__v32qs)__a);
 }
 
 /// Computes the absolute value of each signed 16-bit element in the 256-bit
@@ -121,10 +119,9 @@ _mm256_abs_epi8(__m256i __a)
 /// \param __a
 ///    A 256-bit vector of [16 x i16].
 /// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_abs_epi16(__m256i __a)
-{
-    return (__m256i)__builtin_elementwise_abs((__v16hi)__a);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_abs_epi16(__m256i __a) {
+  return (__m256i)__builtin_elementwise_abs((__v16hi)__a);
 }
 
 /// Computes the absolute value of each signed 32-bit element in the 256-bit
@@ -138,10 +135,9 @@ _mm256_abs_epi16(__m256i __a)
 /// \param __a
 ///    A 256-bit vector of [8 x i32].
 /// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_abs_epi32(__m256i __a)
-{
-    return (__m256i)__builtin_elementwise_abs((__v8si)__a);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_abs_epi32(__m256i __a) {
+  return (__m256i)__builtin_elementwise_abs((__v8si)__a);
 }
 
 /// Converts the elements of two 256-bit vectors of [16 x i16] to 8-bit
@@ -169,9 +165,8 @@ _mm256_abs_epi32(__m256i __a)
 ///    A 256-bit vector of [16 x i16] used to generate result[127:64] and
 ///    result[255:192].
 /// \returns A 256-bit integer vector containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_packs_epi16(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_packs_epi16(__m256i __a, __m256i __b) {
   return (__m256i)__builtin_ia32_packsswb256((__v16hi)__a, (__v16hi)__b);
 }
 
@@ -201,9 +196,8 @@ _mm256_packs_epi16(__m256i __a, __m256i __b)
 ///    A 256-bit vector of [8 x i32] used to generate result[127:64] and
 ///    result[255:192].
 /// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_packs_epi32(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_packs_epi32(__m256i __a, __m256i __b) {
   return (__m256i)__builtin_ia32_packssdw256((__v8si)__a, (__v8si)__b);
 }
 
@@ -232,9 +226,8 @@ _mm256_packs_epi32(__m256i __a, __m256i __b)
 ///    A 256-bit vector of [16 x i16] used to generate result[127:64] and
 ///    result[255:192].
 /// \returns A 256-bit integer vector containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_packus_epi16(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_packus_epi16(__m256i __a, __m256i __b) {
   return (__m256i)__builtin_ia32_packuswb256((__v16hi)__a, (__v16hi)__b);
 }
 
@@ -264,9 +257,8 @@ _mm256_packus_epi16(__m256i __a, __m256i __b)
 ///    A 256-bit vector of [8 x i32] used to generate result[127:64] and
 ///    result[255:192].
 /// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_packus_epi32(__m256i __V1, __m256i __V2)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_packus_epi32(__m256i __V1, __m256i __V2) {
   return (__m256i) __builtin_ia32_packusdw256((__v8si)__V1, (__v8si)__V2);
 }
 
@@ -283,9 +275,8 @@ _mm256_packus_epi32(__m256i __V1, __m256i __V2)
 /// \param __b
 ///    A 256-bit integer vector containing one of the source operands.
 /// \returns A 256-bit integer vector containing the sums.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_add_epi8(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_add_epi8(__m256i __a, __m256i __b) {
   return (__m256i)((__v32qu)__a + (__v32qu)__b);
 }
 
@@ -302,9 +293,8 @@ _mm256_add_epi8(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [16 x i16] containing one of the source operands.
 /// \returns A 256-bit vector of [16 x i16] containing the sums.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_add_epi16(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_add_epi16(__m256i __a, __m256i __b) {
   return (__m256i)((__v16hu)__a + (__v16hu)__b);
 }
 
@@ -321,9 +311,8 @@ _mm256_add_epi16(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [8 x i32] containing one of the source operands.
 /// \returns A 256-bit vector of [8 x i32] containing the sums.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_add_epi32(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_add_epi32(__m256i __a, __m256i __b) {
   return (__m256i)((__v8su)__a + (__v8su)__b);
 }
 
@@ -340,9 +329,8 @@ _mm256_add_epi32(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [4 x i64] containing one of the source operands.
 /// \returns A 256-bit vector of [4 x i64] containing the sums.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_add_epi64(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_add_epi64(__m256i __a, __m256i __b) {
   return (__m256i)((__v4du)__a + (__v4du)__b);
 }
 
@@ -359,9 +347,8 @@ _mm256_add_epi64(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit integer vector containing one of the source operands.
 /// \returns A 256-bit integer vector containing the sums.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_adds_epi8(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_adds_epi8(__m256i __a, __m256i __b) {
   return (__m256i)__builtin_elementwise_add_sat((__v32qs)__a, (__v32qs)__b);
 }
 
@@ -377,9 +364,8 @@ _mm256_adds_epi8(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [16 x i16] containing one of the source operands.
 /// \returns A 256-bit vector of [16 x i16] containing the sums.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_adds_epi16(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_adds_epi16(__m256i __a, __m256i __b) {
   return (__m256i)__builtin_elementwise_add_sat((__v16hi)__a, (__v16hi)__b);
 }
 
@@ -396,9 +382,8 @@ _mm256_adds_epi16(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit integer vector containing one of the source operands.
 /// \returns A 256-bit integer vector containing the sums.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_adds_epu8(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_adds_epu8(__m256i __a, __m256i __b) {
   return (__m256i)__builtin_elementwise_add_sat((__v32qu)__a, (__v32qu)__b);
 }
 
@@ -414,9 +399,8 @@ _mm256_adds_epu8(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [16 x i16] containing one of the source operands.
 /// \returns A 256-bit vector of [16 x i16] containing the sums.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_adds_epu16(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_adds_epu16(__m256i __a, __m256i __b) {
   return (__m256i)__builtin_elementwise_add_sat((__v16hu)__a, (__v16hu)__b);
 }
 
@@ -460,7 +444,7 @@ _mm256_adds_epu16(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit integer vector.
 /// \returns A 256-bit integer vector containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_and_si256(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v4du)__a & (__v4du)__b);
@@ -478,7 +462,7 @@ _mm256_and_si256(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit integer vector.
 /// \returns A 256-bit integer vector containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_andnot_si256(__m256i __a, __m256i __b)
 {
   return (__m256i)(~(__v4du)__a & (__v4du)__b);
@@ -504,10 +488,9 @@ _mm256_andnot_si256(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit integer vector.
 /// \returns A 256-bit integer vector containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_avg_epu8(__m256i __a, __m256i __b)
-{
-  return (__m256i)__builtin_ia32_pavgb256((__v32qi)__a, (__v32qi)__b);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_avg_epu8(__m256i __a, __m256i __b) {
+  return (__m256i)__builtin_ia32_pavgb256((__v32qu)__a, (__v32qu)__b);
 }
 
 /// Computes the averages of the corresponding unsigned 16-bit integers in
@@ -530,10 +513,9 @@ _mm256_avg_epu8(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [16 x i16].
 /// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_avg_epu16(__m256i __a, __m256i __b)
-{
-  return (__m256i)__builtin_ia32_pavgw256((__v16hi)__a, (__v16hi)__b);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_avg_epu16(__m256i __a, __m256i __b) {
+  return (__m256i)__builtin_ia32_pavgw256((__v16hu)__a, (__v16hu)__b);
 }
 
 /// Merges 8-bit integer values from either of the two 256-bit vectors
@@ -565,9 +547,8 @@ _mm256_avg_epu16(__m256i __a, __m256i __b)
 ///    is 0, the byte is copied from \a __V1; otherwise, it is copied from
 ///    \a __V2.
 /// \returns A 256-bit integer vector containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M) {
   return (__m256i)__builtin_ia32_pblendvb256((__v32qi)__V1, (__v32qi)__V2,
                                               (__v32qi)__M);
 }
@@ -633,7 +614,7 @@ _mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M)
 /// \param __b
 ///    A 256-bit integer vector containing one of the inputs.
 /// \returns A 256-bit integer vector containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_cmpeq_epi8(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v32qi)__a == (__v32qi)__b);
@@ -659,7 +640,7 @@ _mm256_cmpeq_epi8(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [16 x i16] containing one of the inputs.
 /// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_cmpeq_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v16hi)__a == (__v16hi)__b);
@@ -685,7 +666,7 @@ _mm256_cmpeq_epi16(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [8 x i32] containing one of the inputs.
 /// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_cmpeq_epi32(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v8si)__a == (__v8si)__b);
@@ -711,7 +692,7 @@ _mm256_cmpeq_epi32(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [4 x i64] containing one of the inputs.
 /// \returns A 256-bit vector of [4 x i64] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_cmpeq_epi64(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v4di)__a == (__v4di)__b);
@@ -737,7 +718,7 @@ _mm256_cmpeq_epi64(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit integer vector containing one of the inputs.
 /// \returns A 256-bit integer vector containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_cmpgt_epi8(__m256i __a, __m256i __b)
 {
   /* This function always performs a signed comparison, but __v32qi is a char
@@ -765,7 +746,7 @@ _mm256_cmpgt_epi8(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [16 x i16] containing one of the inputs.
 /// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_cmpgt_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v16hi)__a > (__v16hi)__b);
@@ -791,7 +772,7 @@ _mm256_cmpgt_epi16(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [8 x i32] containing one of the inputs.
 /// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_cmpgt_epi32(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v8si)__a > (__v8si)__b);
@@ -817,7 +798,7 @@ _mm256_cmpgt_epi32(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [4 x i64] containing one of the inputs.
 /// \returns A 256-bit vector of [4 x i64] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_cmpgt_epi64(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v4di)__a > (__v4di)__b);
@@ -853,10 +834,9 @@ _mm256_cmpgt_epi64(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [16 x i16] containing one of the source operands.
 /// \returns A 256-bit vector of [16 x i16] containing the sums.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_hadd_epi16(__m256i __a, __m256i __b)
-{
-    return (__m256i)__builtin_ia32_phaddw256((__v16hi)__a, (__v16hi)__b);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_hadd_epi16(__m256i __a, __m256i __b) {
+  return (__m256i)__builtin_ia32_phaddw256((__v16hi)__a, (__v16hi)__b);
 }
 
 /// Horizontally adds the adjacent pairs of 32-bit integers from two 256-bit
@@ -885,10 +865,9 @@ _mm256_hadd_epi16(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [8 x i32] containing one of the source operands.
 /// \returns A 256-bit vector of [8 x i32] containing the sums.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_hadd_epi32(__m256i __a, __m256i __b)
-{
-    return (__m256i)__builtin_ia32_phaddd256((__v8si)__a, (__v8si)__b);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_hadd_epi32(__m256i __a, __m256i __b) {
+  return (__m256i)__builtin_ia32_phaddd256((__v8si)__a, (__v8si)__b);
 }
 
 /// Horizontally adds the adjacent pairs of 16-bit integers from two 256-bit
@@ -920,10 +899,9 @@ _mm256_hadd_epi32(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [16 x i16] containing one of the source operands.
 /// \returns A 256-bit vector of [16 x i16] containing the sums.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_hadds_epi16(__m256i __a, __m256i __b)
-{
-    return (__m256i)__builtin_ia32_phaddsw256((__v16hi)__a, (__v16hi)__b);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_hadds_epi16(__m256i __a, __m256i __b) {
+  return (__m256i)__builtin_ia32_phaddsw256((__v16hi)__a, (__v16hi)__b);
 }
 
 /// Horizontally subtracts adjacent pairs of 16-bit integers from two 256-bit
@@ -956,10 +934,9 @@ _mm256_hadds_epi16(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [16 x i16] containing one of the source operands.
 /// \returns A 256-bit vector of [16 x i16] containing the differences.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_hsub_epi16(__m256i __a, __m256i __b)
-{
-    return (__m256i)__builtin_ia32_phsubw256((__v16hi)__a, (__v16hi)__b);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_hsub_epi16(__m256i __a, __m256i __b) {
+  return (__m256i)__builtin_ia32_phsubw256((__v16hi)__a, (__v16hi)__b);
 }
 
 /// Horizontally subtracts adjacent pairs of 32-bit integers from two 256-bit
@@ -988,10 +965,9 @@ _mm256_hsub_epi16(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [8 x i32] containing one of the source operands.
 /// \returns A 256-bit vector of [8 x i32] containing the differences.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_hsub_epi32(__m256i __a, __m256i __b)
-{
-    return (__m256i)__builtin_ia32_phsubd256((__v8si)__a, (__v8si)__b);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_hsub_epi32(__m256i __a, __m256i __b) {
+  return (__m256i)__builtin_ia32_phsubd256((__v8si)__a, (__v8si)__b);
 }
 
 /// Horizontally subtracts adjacent pairs of 16-bit integers from two 256-bit
@@ -1024,10 +1000,9 @@ _mm256_hsub_epi32(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [16 x i16] containing one of the source operands.
 /// \returns A 256-bit vector of [16 x i16] containing the differences.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_hsubs_epi16(__m256i __a, __m256i __b)
-{
-    return (__m256i)__builtin_ia32_phsubsw256((__v16hi)__a, (__v16hi)__b);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_hsubs_epi16(__m256i __a, __m256i __b) {
+  return (__m256i)__builtin_ia32_phsubsw256((__v16hi)__a, (__v16hi)__b);
 }
 
 /// Multiplies each unsigned byte from the 256-bit integer vector in \a __a
@@ -1054,10 +1029,9 @@ _mm256_hsubs_epi16(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector containing one of the source operands.
 /// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maddubs_epi16(__m256i __a, __m256i __b)
-{
-    return (__m256i)__builtin_ia32_pmaddubsw256((__v32qi)__a, (__v32qi)__b);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maddubs_epi16(__m256i __a, __m256i __b) {
+  return (__m256i)__builtin_ia32_pmaddubsw256((__v32qi)__a, (__v32qi)__b);
 }
 
 /// Multiplies corresponding 16-bit elements of two 256-bit vectors of
@@ -1086,9 +1060,8 @@ _mm256_maddubs_epi16(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [16 x i16] containing one of the source operands.
 /// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_madd_epi16(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_madd_epi16(__m256i __a, __m256i __b) {
   return (__m256i)__builtin_ia32_pmaddwd256((__v16hi)__a, (__v16hi)__b);
 }
 
@@ -1105,9 +1078,8 @@ _mm256_madd_epi16(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit integer vector.
 /// \returns A 256-bit integer vector containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_max_epi8(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_max_epi8(__m256i __a, __m256i __b) {
   return (__m256i)__builtin_elementwise_max((__v32qs)__a, (__v32qs)__b);
 }
 
@@ -1124,9 +1096,8 @@ _mm256_max_epi8(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [16 x i16].
 /// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_max_epi16(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_max_epi16(__m256i __a, __m256i __b) {
   return (__m256i)__builtin_elementwise_max((__v16hi)__a, (__v16hi)__b);
 }
 
@@ -1143,9 +1114,8 @@ _mm256_max_epi16(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [8 x i32].
 /// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_max_epi32(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_max_epi32(__m256i __a, __m256i __b) {
   return (__m256i)__builtin_elementwise_max((__v8si)__a, (__v8si)__b);
 }
 
@@ -1162,9 +1132,8 @@ _mm256_max_epi32(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit integer vector.
 /// \returns A 256-bit integer vector containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_max_epu8(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_max_epu8(__m256i __a, __m256i __b) {
   return (__m256i)__builtin_elementwise_max((__v32qu)__a, (__v32qu)__b);
 }
 
@@ -1181,9 +1150,8 @@ _mm256_max_epu8(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [16 x i16].
 /// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_max_epu16(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_max_epu16(__m256i __a, __m256i __b) {
   return (__m256i)__builtin_elementwise_max((__v16hu)__a, (__v16hu)__b);
 }
 
@@ -1200,9 +1168,8 @@ _mm256_max_epu16(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [8 x i32].
 /// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_max_epu32(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_max_epu32(__m256i __a, __m256i __b) {
   return (__m256i)__builtin_elementwise_max((__v8su)__a, (__v8su)__b);
 }
 
@@ -1219,9 +1186,8 @@ _mm256_max_epu32(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit integer vector.
 /// \returns A 256-bit integer vector containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_min_epi8(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_min_epi8(__m256i __a, __m256i __b) {
   return (__m256i)__builtin_elementwise_min((__v32qs)__a, (__v32qs)__b);
 }
 
@@ -1238,9 +1204,8 @@ _mm256_min_epi8(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [16 x i16].
 /// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_min_epi16(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_min_epi16(__m256i __a, __m256i __b) {
   return (__m256i)__builtin_elementwise_min((__v16hi)__a, (__v16hi)__b);
 }
 
@@ -1257,9 +1222,8 @@ _mm256_min_epi16(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [8 x i32].
 /// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_min_epi32(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_min_epi32(__m256i __a, __m256i __b) {
   return (__m256i)__builtin_elementwise_min((__v8si)__a, (__v8si)__b);
 }
 
@@ -1276,9 +1240,8 @@ _mm256_min_epi32(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit integer vector.
 /// \returns A 256-bit integer vector containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_min_epu8(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_min_epu8(__m256i __a, __m256i __b) {
   return (__m256i)__builtin_elementwise_min((__v32qu)__a, (__v32qu)__b);
 }
 
@@ -1295,9 +1258,8 @@ _mm256_min_epu8(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [16 x i16].
 /// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_min_epu16(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_min_epu16(__m256i __a, __m256i __b) {
   return (__m256i)__builtin_elementwise_min((__v16hu)__a, (__v16hu)__b);
 }
 
@@ -1314,9 +1276,8 @@ _mm256_min_epu16(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [8 x i32].
 /// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_min_epu32(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_min_epu32(__m256i __a, __m256i __b) {
   return (__m256i)__builtin_elementwise_min((__v8su)__a, (__v8su)__b);
 }
 
@@ -1337,9 +1298,8 @@ _mm256_min_epu32(__m256i __a, __m256i __b)
 /// \param __a
 ///    A 256-bit integer vector containing the source bytes.
 /// \returns The 32-bit integer mask.
-static __inline__ int __DEFAULT_FN_ATTRS256
-_mm256_movemask_epi8(__m256i __a)
-{
+static __inline__ int __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_movemask_epi8(__m256i __a) {
   return __builtin_ia32_pmovmskb256((__v32qi)__a);
 }
 
@@ -1363,9 +1323,8 @@ _mm256_movemask_epi8(__m256i __a)
 ///    A 128-bit integer vector containing the source bytes.
 /// \returns A 256-bit vector of [16 x i16] containing the sign-extended
 ///    values.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_cvtepi8_epi16(__m128i __V)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_cvtepi8_epi16(__m128i __V) {
   /* This function always performs a signed extension, but __v16qi is a char
      which may be signed or unsigned, so use __v16qs. */
   return (__m256i)__builtin_convertvector((__v16qs)__V, __v16hi);
@@ -1391,9 +1350,8 @@ _mm256_cvtepi8_epi16(__m128i __V)
 ///    A 128-bit integer vector containing the source bytes.
 /// \returns A 256-bit vector of [8 x i32] containing the sign-extended
 ///    values.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_cvtepi8_epi32(__m128i __V)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_cvtepi8_epi32(__m128i __V) {
   /* This function always performs a signed extension, but __v16qi is a char
      which may be signed or unsigned, so use __v16qs. */
   return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8si);
@@ -1418,9 +1376,8 @@ _mm256_cvtepi8_epi32(__m128i __V)
 ///    A 128-bit integer vector containing the source bytes.
 /// \returns A 256-bit vector of [4 x i64] containing the sign-extended
 ///    values.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_cvtepi8_epi64(__m128i __V)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_cvtepi8_epi64(__m128i __V) {
   /* This function always performs a signed extension, but __v16qi is a char
      which may be signed or unsigned, so use __v16qs. */
   return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3), __v4di);
@@ -1446,9 +1403,8 @@ _mm256_cvtepi8_epi64(__m128i __V)
 ///    A 128-bit vector of [8 x i16] containing the source values.
 /// \returns A 256-bit vector of [8 x i32] containing the sign-extended
 ///    values.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_cvtepi16_epi32(__m128i __V)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_cvtepi16_epi32(__m128i __V) {
   return (__m256i)__builtin_convertvector((__v8hi)__V, __v8si);
 }
 
@@ -1471,9 +1427,8 @@ _mm256_cvtepi16_epi32(__m128i __V)
 ///    A 128-bit vector of [8 x i16] containing the source values.
 /// \returns A 256-bit vector of [4 x i64] containing the sign-extended
 ///    values.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_cvtepi16_epi64(__m128i __V)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_cvtepi16_epi64(__m128i __V) {
   return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1, 2, 3), __v4di);
 }
 
@@ -1496,9 +1451,8 @@ _mm256_cvtepi16_epi64(__m128i __V)
 ///    A 128-bit vector of [4 x i32] containing the source values.
 /// \returns A 256-bit vector of [4 x i64] containing the sign-extended
 ///    values.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_cvtepi32_epi64(__m128i __V)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_cvtepi32_epi64(__m128i __V) {
   return (__m256i)__builtin_convertvector((__v4si)__V, __v4di);
 }
 
@@ -1522,9 +1476,8 @@ _mm256_cvtepi32_epi64(__m128i __V)
 ///    A 128-bit integer vector containing the source bytes.
 /// \returns A 256-bit vector of [16 x i16] containing the zero-extended
 ///    values.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_cvtepu8_epi16(__m128i __V)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_cvtepu8_epi16(__m128i __V) {
   return (__m256i)__builtin_convertvector((__v16qu)__V, __v16hi);
 }
 
@@ -1548,9 +1501,8 @@ _mm256_cvtepu8_epi16(__m128i __V)
 ///    A 128-bit integer vector containing the source bytes.
 /// \returns A 256-bit vector of [8 x i32] containing the zero-extended
 ///    values.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_cvtepu8_epi32(__m128i __V)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_cvtepu8_epi32(__m128i __V) {
   return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8si);
 }
 
@@ -1573,9 +1525,8 @@ _mm256_cvtepu8_epi32(__m128i __V)
 ///    A 128-bit integer vector containing the source bytes.
 /// \returns A 256-bit vector of [4 x i64] containing the zero-extended
 ///    values.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_cvtepu8_epi64(__m128i __V)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_cvtepu8_epi64(__m128i __V) {
   return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3), __v4di);
 }
 
@@ -1599,9 +1550,8 @@ _mm256_cvtepu8_epi64(__m128i __V)
 ///    A 128-bit vector of [8 x i16] containing the source values.
 /// \returns A 256-bit vector of [8 x i32] containing the zero-extended
 ///    values.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_cvtepu16_epi32(__m128i __V)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_cvtepu16_epi32(__m128i __V) {
   return (__m256i)__builtin_convertvector((__v8hu)__V, __v8si);
 }
 
@@ -1624,9 +1574,8 @@ _mm256_cvtepu16_epi32(__m128i __V)
 ///    A 128-bit vector of [8 x i16] containing the source values.
 /// \returns A 256-bit vector of [4 x i64] containing the zero-extended
 ///    values.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_cvtepu16_epi64(__m128i __V)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_cvtepu16_epi64(__m128i __V) {
   return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1, 2, 3), __v4di);
 }
 
@@ -1649,9 +1598,8 @@ _mm256_cvtepu16_epi64(__m128i __V)
 ///    A 128-bit vector of [4 x i32] containing the source values.
 /// \returns A 256-bit vector of [4 x i64] containing the zero-extended
 ///    values.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_cvtepu32_epi64(__m128i __V)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_cvtepu32_epi64(__m128i __V) {
   return (__m256i)__builtin_convertvector((__v4su)__V, __v4di);
 }
 
@@ -1675,9 +1623,8 @@ _mm256_cvtepu32_epi64(__m128i __V)
 /// \param __b
 ///    A 256-bit vector of [8 x i32] containing one of the source operands.
 /// \returns A 256-bit vector of [4 x i64] containing the products.
-static __inline__  __m256i __DEFAULT_FN_ATTRS256
-_mm256_mul_epi32(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mul_epi32(__m256i __a, __m256i __b) {
   return (__m256i)__builtin_ia32_pmuldq256((__v8si)__a, (__v8si)__b);
 }
 
@@ -1702,9 +1649,8 @@ _mm256_mul_epi32(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [16 x i16] containing one of the source operands.
 /// \returns A 256-bit vector of [16 x i16] containing the rounded products.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mulhrs_epi16(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mulhrs_epi16(__m256i __a, __m256i __b) {
   return (__m256i)__builtin_ia32_pmulhrsw256((__v16hi)__a, (__v16hi)__b);
 }
 
@@ -1721,10 +1667,9 @@ _mm256_mulhrs_epi16(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [16 x i16] containing one of the source operands.
 /// \returns A 256-bit vector of [16 x i16] containing the products.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mulhi_epu16(__m256i __a, __m256i __b)
-{
-  return (__m256i)__builtin_ia32_pmulhuw256((__v16hi)__a, (__v16hi)__b);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mulhi_epu16(__m256i __a, __m256i __b) {
+  return (__m256i)__builtin_ia32_pmulhuw256((__v16hu)__a, (__v16hu)__b);
 }
 
 /// Multiplies signed 16-bit integer elements of two 256-bit vectors of
@@ -1740,7 +1685,7 @@ _mm256_mulhi_epu16(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [16 x i16] containing one of the source operands.
 /// \returns A 256-bit vector of [16 x i16] containing the products.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mulhi_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pmulhw256((__v16hi)__a, (__v16hi)__b);
@@ -1759,7 +1704,7 @@ _mm256_mulhi_epi16(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [16 x i16] containing one of the source operands.
 /// \returns A 256-bit vector of [16 x i16] containing the products.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mullo_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v16hu)__a * (__v16hu)__b);
@@ -1778,9 +1723,8 @@ _mm256_mullo_epi16(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [8 x i32] containing one of the source operands.
 /// \returns A 256-bit vector of [8 x i32] containing the products.
-static __inline__  __m256i __DEFAULT_FN_ATTRS256
-_mm256_mullo_epi32 (__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mullo_epi32(__m256i __a, __m256i __b) {
   return (__m256i)((__v8su)__a * (__v8su)__b);
 }
 
@@ -1804,9 +1748,8 @@ _mm256_mullo_epi32 (__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [8 x i32] containing one of the source operands.
 /// \returns A 256-bit vector of [4 x i64] containing the products.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mul_epu32(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mul_epu32(__m256i __a, __m256i __b) {
   return __builtin_ia32_pmuludq256((__v8si)__a, (__v8si)__b);
 }
 
@@ -1822,7 +1765,7 @@ _mm256_mul_epu32(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit integer vector.
 /// \returns A 256-bit integer vector containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_or_si256(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v4du)__a | (__v4du)__b);
@@ -1906,9 +1849,8 @@ _mm256_sad_epu8(__m256i __a, __m256i __b)
 ///    control byte specify the index (within the same 128-bit half) of \a __a
 ///    to copy to the result byte.
 /// \returns A 256-bit integer vector containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_shuffle_epi8(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_shuffle_epi8(__m256i __a, __m256i __b) {
   return (__m256i)__builtin_ia32_pshufb256((__v32qi)__a, (__v32qi)__b);
 }
 
@@ -2033,10 +1975,9 @@ _mm256_shuffle_epi8(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit integer vector].
 /// \returns A 256-bit integer vector containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_sign_epi8(__m256i __a, __m256i __b)
-{
-    return (__m256i)__builtin_ia32_psignb256((__v32qi)__a, (__v32qi)__b);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_sign_epi8(__m256i __a, __m256i __b) {
+  return (__m256i)__builtin_ia32_psignb256((__v32qi)__a, (__v32qi)__b);
 }
 
 /// Sets each element of the result to the corresponding element of the
@@ -2054,10 +1995,9 @@ _mm256_sign_epi8(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [16 x i16].
 /// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_sign_epi16(__m256i __a, __m256i __b)
-{
-    return (__m256i)__builtin_ia32_psignw256((__v16hi)__a, (__v16hi)__b);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_sign_epi16(__m256i __a, __m256i __b) {
+  return (__m256i)__builtin_ia32_psignw256((__v16hi)__a, (__v16hi)__b);
 }
 
 /// Sets each element of the result to the corresponding element of the
@@ -2075,10 +2015,9 @@ _mm256_sign_epi16(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [8 x i32].
 /// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_sign_epi32(__m256i __a, __m256i __b)
-{
-    return (__m256i)__builtin_ia32_psignd256((__v8si)__a, (__v8si)__b);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_sign_epi32(__m256i __a, __m256i __b) {
+  return (__m256i)__builtin_ia32_psignd256((__v8si)__a, (__v8si)__b);
 }
 
 /// Shifts each 128-bit half of the 256-bit integer vector \a a left by
@@ -2098,8 +2037,9 @@ _mm256_sign_epi32(__m256i __a, __m256i __b)
 /// \param imm
 ///     An unsigned immediate value specifying the shift count (in bytes).
 /// \returns A 256-bit integer vector containing the result.
-#define _mm256_slli_si256(a, imm) \
-  ((__m256i)__builtin_ia32_pslldqi256_byteshift((__v4di)(__m256i)(a), (int)(imm)))
+#define _mm256_slli_si256(a, imm)                                              \
+  ((__m256i)__builtin_ia32_pslldqi256_byteshift((__v32qi)(__m256i)(a),         \
+                                                (int)(imm)))
 
 /// Shifts each 128-bit half of the 256-bit integer vector \a a left by
 ///    \a imm bytes, shifting in zero bytes, and returns the result. If \a imm
@@ -2118,8 +2058,9 @@ _mm256_sign_epi32(__m256i __a, __m256i __b)
 /// \param imm
 ///    An unsigned immediate value specifying the shift count (in bytes).
 /// \returns A 256-bit integer vector containing the result.
-#define _mm256_bslli_epi128(a, imm) \
-  ((__m256i)__builtin_ia32_pslldqi256_byteshift((__v4di)(__m256i)(a), (int)(imm)))
+#define _mm256_bslli_epi128(a, imm)                                            \
+  ((__m256i)__builtin_ia32_pslldqi256_byteshift((__v32qi)(__m256i)(a),         \
+                                                (int)(imm)))
 
 /// Shifts each 16-bit element of the 256-bit vector of [16 x i16] in \a __a
 ///    left by \a __count bits, shifting in zero bits, and returns the result.
@@ -2134,9 +2075,8 @@ _mm256_sign_epi32(__m256i __a, __m256i __b)
 /// \param __count
 ///    An unsigned integer value specifying the shift count (in bits).
 /// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_slli_epi16(__m256i __a, int __count)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_slli_epi16(__m256i __a, int __count) {
   return (__m256i)__builtin_ia32_psllwi256((__v16hi)__a, __count);
 }
 
@@ -2155,9 +2095,8 @@ _mm256_slli_epi16(__m256i __a, int __count)
 ///    A 128-bit vector of [2 x i64] whose lower element gives the unsigned
 ///    shift count (in bits). The upper element is ignored.
 /// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_sll_epi16(__m256i __a, __m128i __count)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_sll_epi16(__m256i __a, __m128i __count) {
   return (__m256i)__builtin_ia32_psllw256((__v16hi)__a, (__v8hi)__count);
 }
 
@@ -2174,9 +2113,8 @@ _mm256_sll_epi16(__m256i __a, __m128i __count)
 /// \param __count
 ///    An unsigned integer value specifying the shift count (in bits).
 /// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_slli_epi32(__m256i __a, int __count)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_slli_epi32(__m256i __a, int __count) {
   return (__m256i)__builtin_ia32_pslldi256((__v8si)__a, __count);
 }
 
@@ -2195,9 +2133,8 @@ _mm256_slli_epi32(__m256i __a, int __count)
 ///    A 128-bit vector of [2 x i64] whose lower element gives the unsigned
 ///    shift count (in bits). The upper element is ignored.
 /// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_sll_epi32(__m256i __a, __m128i __count)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_sll_epi32(__m256i __a, __m128i __count) {
   return (__m256i)__builtin_ia32_pslld256((__v8si)__a, (__v4si)__count);
 }
 
@@ -2214,9 +2151,8 @@ _mm256_sll_epi32(__m256i __a, __m128i __count)
 /// \param __count
 ///    An unsigned integer value specifying the shift count (in bits).
 /// \returns A 256-bit vector of [4 x i64] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_slli_epi64(__m256i __a, int __count)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_slli_epi64(__m256i __a, int __count) {
   return __builtin_ia32_psllqi256((__v4di)__a, __count);
 }
 
@@ -2235,9 +2171,8 @@ _mm256_slli_epi64(__m256i __a, int __count)
 ///    A 128-bit vector of [2 x i64] whose lower element gives the unsigned
 ///    shift count (in bits). The upper element is ignored.
 /// \returns A 256-bit vector of [4 x i64] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_sll_epi64(__m256i __a, __m128i __count)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_sll_epi64(__m256i __a, __m128i __count) {
   return __builtin_ia32_psllq256((__v4di)__a, __count);
 }
 
@@ -2255,9 +2190,8 @@ _mm256_sll_epi64(__m256i __a, __m128i __count)
 /// \param __count
 ///    An unsigned integer value specifying the shift count (in bits).
 /// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_srai_epi16(__m256i __a, int __count)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_srai_epi16(__m256i __a, int __count) {
   return (__m256i)__builtin_ia32_psrawi256((__v16hi)__a, __count);
 }
 
@@ -2277,9 +2211,8 @@ _mm256_srai_epi16(__m256i __a, int __count)
 ///    A 128-bit vector of [2 x i64] whose lower element gives the unsigned
 ///    shift count (in bits). The upper element is ignored.
 /// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_sra_epi16(__m256i __a, __m128i __count)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_sra_epi16(__m256i __a, __m128i __count) {
   return (__m256i)__builtin_ia32_psraw256((__v16hi)__a, (__v8hi)__count);
 }
 
@@ -2297,9 +2230,8 @@ _mm256_sra_epi16(__m256i __a, __m128i __count)
 /// \param __count
 ///    An unsigned integer value specifying the shift count (in bits).
 /// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_srai_epi32(__m256i __a, int __count)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_srai_epi32(__m256i __a, int __count) {
   return (__m256i)__builtin_ia32_psradi256((__v8si)__a, __count);
 }
 
@@ -2319,9 +2251,8 @@ _mm256_srai_epi32(__m256i __a, int __count)
 ///    A 128-bit vector of [2 x i64] whose lower element gives the unsigned
 ///    shift count (in bits). The upper element is ignored.
 /// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_sra_epi32(__m256i __a, __m128i __count)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_sra_epi32(__m256i __a, __m128i __count) {
   return (__m256i)__builtin_ia32_psrad256((__v8si)__a, (__v4si)__count);
 }
 
@@ -2342,8 +2273,9 @@ _mm256_sra_epi32(__m256i __a, __m128i __count)
 /// \param imm
 ///    An unsigned immediate value specifying the shift count (in bytes).
 /// \returns A 256-bit integer vector containing the result.
-#define _mm256_srli_si256(a, imm) \
-  ((__m256i)__builtin_ia32_psrldqi256_byteshift((__m256i)(a), (int)(imm)))
+#define _mm256_srli_si256(a, imm)                                              \
+  ((__m256i)__builtin_ia32_psrldqi256_byteshift((__v32qi)(__m256i)(a),         \
+                                                (int)(imm)))
 
 /// Shifts each 128-bit half of the 256-bit integer vector in \a a right by
 ///    \a imm bytes, shifting in zero bytes, and returns the result. If
@@ -2362,8 +2294,9 @@ _mm256_sra_epi32(__m256i __a, __m128i __count)
 /// \param imm
 ///     An unsigned immediate value specifying the shift count (in bytes).
 /// \returns A 256-bit integer vector containing the result.
-#define _mm256_bsrli_epi128(a, imm) \
-  ((__m256i)__builtin_ia32_psrldqi256_byteshift((__m256i)(a), (int)(imm)))
+#define _mm256_bsrli_epi128(a, imm)                                            \
+  ((__m256i)__builtin_ia32_psrldqi256_byteshift((__v32qi)(__m256i)(a),         \
+                                                (int)(imm)))
 
 /// Shifts each 16-bit element of the 256-bit vector of [16 x i16] in \a __a
 ///    right by \a __count bits, shifting in zero bits, and returns the result.
@@ -2378,9 +2311,8 @@ _mm256_sra_epi32(__m256i __a, __m128i __count)
 /// \param __count
 ///    An unsigned integer value specifying the shift count (in bits).
 /// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_srli_epi16(__m256i __a, int __count)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_srli_epi16(__m256i __a, int __count) {
   return (__m256i)__builtin_ia32_psrlwi256((__v16hi)__a, __count);
 }
 
@@ -2399,9 +2331,8 @@ _mm256_srli_epi16(__m256i __a, int __count)
 ///    A 128-bit vector of [2 x i64] whose lower element gives the unsigned
 ///    shift count (in bits). The upper element is ignored.
 /// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_srl_epi16(__m256i __a, __m128i __count)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_srl_epi16(__m256i __a, __m128i __count) {
   return (__m256i)__builtin_ia32_psrlw256((__v16hi)__a, (__v8hi)__count);
 }
 
@@ -2418,9 +2349,8 @@ _mm256_srl_epi16(__m256i __a, __m128i __count)
 /// \param __count
 ///    An unsigned integer value specifying the shift count (in bits).
 /// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_srli_epi32(__m256i __a, int __count)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_srli_epi32(__m256i __a, int __count) {
   return (__m256i)__builtin_ia32_psrldi256((__v8si)__a, __count);
 }
 
@@ -2439,9 +2369,8 @@ _mm256_srli_epi32(__m256i __a, int __count)
 ///    A 128-bit vector of [2 x i64] whose lower element gives the unsigned
 ///    shift count (in bits). The upper element is ignored.
 /// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_srl_epi32(__m256i __a, __m128i __count)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_srl_epi32(__m256i __a, __m128i __count) {
   return (__m256i)__builtin_ia32_psrld256((__v8si)__a, (__v4si)__count);
 }
 
@@ -2458,9 +2387,8 @@ _mm256_srl_epi32(__m256i __a, __m128i __count)
 /// \param __count
 ///    An unsigned integer value specifying the shift count (in bits).
 /// \returns A 256-bit vector of [4 x i64] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_srli_epi64(__m256i __a, int __count)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_srli_epi64(__m256i __a, int __count) {
   return __builtin_ia32_psrlqi256((__v4di)__a, __count);
 }
 
@@ -2479,9 +2407,8 @@ _mm256_srli_epi64(__m256i __a, int __count)
 ///    A 128-bit vector of [2 x i64] whose lower element gives the unsigned
 ///    shift count (in bits). The upper element is ignored.
 /// \returns A 256-bit vector of [4 x i64] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_srl_epi64(__m256i __a, __m128i __count)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_srl_epi64(__m256i __a, __m128i __count) {
   return __builtin_ia32_psrlq256((__v4di)__a, __count);
 }
 
@@ -2506,9 +2433,8 @@ _mm256_srl_epi64(__m256i __a, __m128i __count)
 /// \param __b
 ///    A 256-bit integer vector containing the subtrahends.
 /// \returns A 256-bit integer vector containing the differences.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_sub_epi8(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_sub_epi8(__m256i __a, __m256i __b) {
   return (__m256i)((__v32qu)__a - (__v32qu)__b);
 }
 
@@ -2533,9 +2459,8 @@ _mm256_sub_epi8(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [16 x i16] containing the subtrahends.
 /// \returns A 256-bit vector of [16 x i16] containing the differences.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_sub_epi16(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_sub_epi16(__m256i __a, __m256i __b) {
   return (__m256i)((__v16hu)__a - (__v16hu)__b);
 }
 
@@ -2559,9 +2484,8 @@ _mm256_sub_epi16(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [8 x i32] containing the subtrahends.
 /// \returns A 256-bit vector of [8 x i32] containing the differences.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_sub_epi32(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_sub_epi32(__m256i __a, __m256i __b) {
   return (__m256i)((__v8su)__a - (__v8su)__b);
 }
 
@@ -2585,9 +2509,8 @@ _mm256_sub_epi32(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [4 x i64] containing the subtrahends.
 /// \returns A 256-bit vector of [4 x i64] containing the differences.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_sub_epi64(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_sub_epi64(__m256i __a, __m256i __b) {
   return (__m256i)((__v4du)__a - (__v4du)__b);
 }
 
@@ -2611,9 +2534,8 @@ _mm256_sub_epi64(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit integer vector containing the subtrahends.
 /// \returns A 256-bit integer vector containing the differences.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_subs_epi8(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_subs_epi8(__m256i __a, __m256i __b) {
   return (__m256i)__builtin_elementwise_sub_sat((__v32qs)__a, (__v32qs)__b);
 }
 
@@ -2637,9 +2559,8 @@ _mm256_subs_epi8(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [16 x i16] containing the subtrahends.
 /// \returns A 256-bit vector of [16 x i16] containing the differences.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_subs_epi16(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_subs_epi16(__m256i __a, __m256i __b) {
   return (__m256i)__builtin_elementwise_sub_sat((__v16hi)__a, (__v16hi)__b);
 }
 
@@ -2664,9 +2585,8 @@ _mm256_subs_epi16(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit integer vector containing the subtrahends.
 /// \returns A 256-bit integer vector containing the differences.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_subs_epu8(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_subs_epu8(__m256i __a, __m256i __b) {
   return (__m256i)__builtin_elementwise_sub_sat((__v32qu)__a, (__v32qu)__b);
 }
 
@@ -2690,9 +2610,8 @@ _mm256_subs_epu8(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [16 x i16] containing the subtrahends.
 /// \returns A 256-bit vector of [16 x i16] containing the differences.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_subs_epu16(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_subs_epu16(__m256i __a, __m256i __b) {
   return (__m256i)__builtin_elementwise_sub_sat((__v16hu)__a, (__v16hu)__b);
 }
 
@@ -2724,9 +2643,8 @@ _mm256_subs_epu16(__m256i __a, __m256i __b)
 ///    A 256-bit integer vector used as the source for the odd-numbered bytes
 ///    of the result.
 /// \returns A 256-bit integer vector containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_unpackhi_epi8(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_unpackhi_epi8(__m256i __a, __m256i __b) {
   return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 8, 32+8, 9, 32+9, 10, 32+10, 11, 32+11, 12, 32+12, 13, 32+13, 14, 32+14, 15, 32+15, 24, 32+24, 25, 32+25, 26, 32+26, 27, 32+27, 28, 32+28, 29, 32+29, 30, 32+30, 31, 32+31);
 }
 
@@ -2759,9 +2677,8 @@ _mm256_unpackhi_epi8(__m256i __a, __m256i __b)
 ///    A 256-bit vector of [16 x i16] used as the source for the odd-numbered
 ///    elements of the result.
 /// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_unpackhi_epi16(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_unpackhi_epi16(__m256i __a, __m256i __b) {
   return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);
 }
 
@@ -2793,9 +2710,8 @@ _mm256_unpackhi_epi16(__m256i __a, __m256i __b)
 ///    A 256-bit vector of [8 x i32] used as the source for the odd-numbered
 ///    elements of the result.
 /// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_unpackhi_epi32(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_unpackhi_epi32(__m256i __a, __m256i __b) {
   return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 2, 8+2, 3, 8+3, 6, 8+6, 7, 8+7);
 }
 
@@ -2823,9 +2739,8 @@ _mm256_unpackhi_epi32(__m256i __a, __m256i __b)
 ///    A 256-bit vector of [4 x i64] used as the source for the odd-numbered
 ///    elements of the result.
 /// \returns A 256-bit vector of [4 x i64] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_unpackhi_epi64(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_unpackhi_epi64(__m256i __a, __m256i __b) {
   return (__m256i)__builtin_shufflevector((__v4di)__a, (__v4di)__b, 1, 4+1, 3, 4+3);
 }
 
@@ -2857,9 +2772,8 @@ _mm256_unpackhi_epi64(__m256i __a, __m256i __b)
 ///    A 256-bit integer vector used as the source for the odd-numbered bytes
 ///    of the result.
 /// \returns A 256-bit integer vector containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_unpacklo_epi8(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_unpacklo_epi8(__m256i __a, __m256i __b) {
   return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 0, 32+0, 1, 32+1, 2, 32+2, 3, 32+3, 4, 32+4, 5, 32+5, 6, 32+6, 7, 32+7, 16, 32+16, 17, 32+17, 18, 32+18, 19, 32+19, 20, 32+20, 21, 32+21, 22, 32+22, 23, 32+23);
 }
 
@@ -2892,9 +2806,8 @@ _mm256_unpacklo_epi8(__m256i __a, __m256i __b)
 ///    A 256-bit vector of [16 x i16] used as the source for the odd-numbered
 ///    elements of the result.
 /// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_unpacklo_epi16(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_unpacklo_epi16(__m256i __a, __m256i __b) {
   return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11);
 }
 
@@ -2926,9 +2839,8 @@ _mm256_unpacklo_epi16(__m256i __a, __m256i __b)
 ///    A 256-bit vector of [8 x i32] used as the source for the odd-numbered
 ///    elements of the result.
 /// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_unpacklo_epi32(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_unpacklo_epi32(__m256i __a, __m256i __b) {
   return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 0, 8+0, 1, 8+1, 4, 8+4, 5, 8+5);
 }
 
@@ -2956,9 +2868,8 @@ _mm256_unpacklo_epi32(__m256i __a, __m256i __b)
 ///    A 256-bit vector of [4 x i64] used as the source for the odd-numbered
 ///    elements of the result.
 /// \returns A 256-bit vector of [4 x i64] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_unpacklo_epi64(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_unpacklo_epi64(__m256i __a, __m256i __b) {
   return (__m256i)__builtin_shufflevector((__v4di)__a, (__v4di)__b, 0, 4+0, 2, 4+2);
 }
 
@@ -2974,7 +2885,7 @@ _mm256_unpacklo_epi64(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit integer vector.
 /// \returns A 256-bit integer vector containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_xor_si256(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v4du)__a ^ (__v4du)__b);
@@ -3009,9 +2920,8 @@ _mm256_stream_load_si256(const void *__V)
 /// \param __X
 ///    A 128-bit vector of [4 x float] whose low element will be broadcast.
 /// \returns A 128-bit vector of [4 x float] containing the result.
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_broadcastss_ps(__m128 __X)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_broadcastss_ps(__m128 __X) {
   return (__m128)__builtin_shufflevector((__v4sf)__X, (__v4sf)__X, 0, 0, 0, 0);
 }
 
@@ -3026,9 +2936,8 @@ _mm_broadcastss_ps(__m128 __X)
 /// \param __a
 ///    A 128-bit vector of [2 x double] whose low element will be broadcast.
 /// \returns A 128-bit vector of [2 x double] containing the result.
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_broadcastsd_pd(__m128d __a)
-{
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_broadcastsd_pd(__m128d __a) {
   return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0);
 }
 
@@ -3043,9 +2952,8 @@ _mm_broadcastsd_pd(__m128d __a)
 /// \param __X
 ///    A 128-bit vector of [4 x float] whose low element will be broadcast.
 /// \returns A 256-bit vector of [8 x float] containing the result.
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_broadcastss_ps(__m128 __X)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_broadcastss_ps(__m128 __X) {
   return (__m256)__builtin_shufflevector((__v4sf)__X, (__v4sf)__X, 0, 0, 0, 0, 0, 0, 0, 0);
 }
 
@@ -3060,9 +2968,8 @@ _mm256_broadcastss_ps(__m128 __X)
 /// \param __X
 ///    A 128-bit vector of [2 x double] whose low element will be broadcast.
 /// \returns A 256-bit vector of [4 x double] containing the result.
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_broadcastsd_pd(__m128d __X)
-{
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_broadcastsd_pd(__m128d __X) {
   return (__m256d)__builtin_shufflevector((__v2df)__X, (__v2df)__X, 0, 0, 0, 0);
 }
 
@@ -3076,9 +2983,8 @@ _mm256_broadcastsd_pd(__m128d __X)
 /// \param __X
 ///    A 128-bit integer vector to be broadcast.
 /// \returns A 256-bit integer vector containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_broadcastsi128_si256(__m128i __X)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_broadcastsi128_si256(__m128i __X) {
   return (__m256i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 1, 0, 1);
 }
 
@@ -3168,9 +3074,8 @@ _mm256_broadcastsi128_si256(__m128i __X)
 /// \param __X
 ///    A 128-bit integer vector whose low byte will be broadcast.
 /// \returns A 256-bit integer vector containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_broadcastb_epi8(__m128i __X)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_broadcastb_epi8(__m128i __X) {
   return (__m256i)__builtin_shufflevector((__v16qi)__X, (__v16qi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
 }
 
@@ -3184,9 +3089,8 @@ _mm256_broadcastb_epi8(__m128i __X)
 /// \param __X
 ///    A 128-bit vector of [8 x i16] whose low element will be broadcast.
 /// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_broadcastw_epi16(__m128i __X)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_broadcastw_epi16(__m128i __X) {
   return (__m256i)__builtin_shufflevector((__v8hi)__X, (__v8hi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
 }
 
@@ -3200,9 +3104,8 @@ _mm256_broadcastw_epi16(__m128i __X)
 /// \param __X
 ///    A 128-bit vector of [4 x i32] whose low element will be broadcast.
 /// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_broadcastd_epi32(__m128i __X)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_broadcastd_epi32(__m128i __X) {
   return (__m256i)__builtin_shufflevector((__v4si)__X, (__v4si)__X, 0, 0, 0, 0, 0, 0, 0, 0);
 }
 
@@ -3216,9 +3119,8 @@ _mm256_broadcastd_epi32(__m128i __X)
 /// \param __X
 ///    A 128-bit vector of [2 x i64] whose low element will be broadcast.
 /// \returns A 256-bit vector of [4 x i64] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_broadcastq_epi64(__m128i __X)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_broadcastq_epi64(__m128i __X) {
   return (__m256i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 0, 0, 0);
 }
 
@@ -3232,9 +3134,8 @@ _mm256_broadcastq_epi64(__m128i __X)
 /// \param __X
 ///    A 128-bit integer vector whose low byte will be broadcast.
 /// \returns A 128-bit integer vector containing the result.
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_broadcastb_epi8(__m128i __X)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_broadcastb_epi8(__m128i __X) {
   return (__m128i)__builtin_shufflevector((__v16qi)__X, (__v16qi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
 }
 
@@ -3248,9 +3149,8 @@ _mm_broadcastb_epi8(__m128i __X)
 /// \param __X
 ///    A 128-bit vector of [8 x i16] whose low element will be broadcast.
 /// \returns A 128-bit vector of [8 x i16] containing the result.
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_broadcastw_epi16(__m128i __X)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_broadcastw_epi16(__m128i __X) {
   return (__m128i)__builtin_shufflevector((__v8hi)__X, (__v8hi)__X, 0, 0, 0, 0, 0, 0, 0, 0);
 }
 
@@ -3264,9 +3164,8 @@ _mm_broadcastw_epi16(__m128i __X)
 /// \param __X
 ///    A 128-bit vector of [4 x i32] whose low element will be broadcast.
 /// \returns A 128-bit vector of [4 x i32] containing the result.
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_broadcastd_epi32(__m128i __X)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_broadcastd_epi32(__m128i __X) {
   return (__m128i)__builtin_shufflevector((__v4si)__X, (__v4si)__X, 0, 0, 0, 0);
 }
 
@@ -3280,9 +3179,8 @@ _mm_broadcastd_epi32(__m128i __X)
 /// \param __X
 ///    A 128-bit vector of [2 x i64] whose low element will be broadcast.
 /// \returns A 128-bit vector of [2 x i64] containing the result.
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_broadcastq_epi64(__m128i __X)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_broadcastq_epi64(__m128i __X) {
   return (__m128i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 0);
 }
 
@@ -3308,9 +3206,8 @@ _mm_broadcastq_epi64(__m128i __X)
 ///    A 256-bit vector of [8 x i32] containing indexes of values to use from
 ///    \a __a.
 /// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_permutevar8x32_epi32(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_permutevar8x32_epi32(__m256i __a, __m256i __b) {
   return (__m256i)__builtin_ia32_permvarsi256((__v8si)__a, (__v8si)__b);
 }
 
@@ -3366,9 +3263,8 @@ _mm256_permutevar8x32_epi32(__m256i __a, __m256i __b)
 ///    A 256-bit vector of [8 x i32] containing indexes of values to use from
 ///    \a __a.
 /// \returns A 256-bit vector of [8 x float] containing the result.
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_permutevar8x32_ps(__m256 __a, __m256i __b)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_permutevar8x32_ps(__m256 __a, __m256i __b) {
   return (__m256)__builtin_ia32_permvarsf256((__v8sf)__a, (__v8si)__b);
 }
 
@@ -3756,7 +3652,7 @@ _mm_maskstore_epi64(long long *__X, __m128i __M, __m128i __Y)
 ///    A 256-bit vector of [8 x i32] containing the unsigned shift counts (in
 ///    bits).
 /// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_sllv_epi32(__m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_psllv8si((__v8si)__X, (__v8si)__Y);
@@ -3778,7 +3674,7 @@ _mm256_sllv_epi32(__m256i __X, __m256i __Y)
 ///    A 128-bit vector of [4 x i32] containing the unsigned shift counts (in
 ///    bits).
 /// \returns A 128-bit vector of [4 x i32] containing the result.
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_sllv_epi32(__m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_psllv4si((__v4si)__X, (__v4si)__Y);
@@ -3800,7 +3696,7 @@ _mm_sllv_epi32(__m128i __X, __m128i __Y)
 ///    A 256-bit vector of [4 x i64] containing the unsigned shift counts (in
 ///    bits).
 /// \returns A 256-bit vector of [4 x i64] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_sllv_epi64(__m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_psllv4di((__v4di)__X, (__v4di)__Y);
@@ -3822,7 +3718,7 @@ _mm256_sllv_epi64(__m256i __X, __m256i __Y)
 ///    A 128-bit vector of [2 x i64] containing the unsigned shift counts (in
 ///    bits).
 /// \returns A 128-bit vector of [2 x i64] containing the result.
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_sllv_epi64(__m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_psllv2di((__v2di)__X, (__v2di)__Y);
@@ -3845,7 +3741,7 @@ _mm_sllv_epi64(__m128i __X, __m128i __Y)
 ///    A 256-bit vector of [8 x i32] containing the unsigned shift counts (in
 ///    bits).
 /// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_srav_epi32(__m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_psrav8si((__v8si)__X, (__v8si)__Y);
@@ -3868,7 +3764,7 @@ _mm256_srav_epi32(__m256i __X, __m256i __Y)
 ///    A 128-bit vector of [4 x i32] containing the unsigned shift counts (in
 ///    bits).
 /// \returns A 128-bit vector of [4 x i32] containing the result.
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_srav_epi32(__m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_psrav4si((__v4si)__X, (__v4si)__Y);
@@ -3890,7 +3786,7 @@ _mm_srav_epi32(__m128i __X, __m128i __Y)
 ///    A 256-bit vector of [8 x i32] containing the unsigned shift counts (in
 ///    bits).
 /// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_srlv_epi32(__m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_psrlv8si((__v8si)__X, (__v8si)__Y);
@@ -3912,7 +3808,7 @@ _mm256_srlv_epi32(__m256i __X, __m256i __Y)
 ///    A 128-bit vector of [4 x i32] containing the unsigned shift counts (in
 ///    bits).
 /// \returns A 128-bit vector of [4 x i32] containing the result.
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_srlv_epi32(__m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_psrlv4si((__v4si)__X, (__v4si)__Y);
@@ -3934,7 +3830,7 @@ _mm_srlv_epi32(__m128i __X, __m128i __Y)
 ///    A 256-bit vector of [4 x i64] containing the unsigned shift counts (in
 ///    bits).
 /// \returns A 256-bit vector of [4 x i64] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_srlv_epi64(__m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_psrlv4di((__v4di)__X, (__v4di)__Y);
@@ -3956,7 +3852,7 @@ _mm256_srlv_epi64(__m256i __X, __m256i __Y)
 ///    A 128-bit vector of [2 x i64] containing the unsigned shift counts (in
 ///    bits).
 /// \returns A 128-bit vector of [2 x i64] containing the result.
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_srlv_epi64(__m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_psrlv2di((__v2di)__X, (__v2di)__Y);
@@ -5289,5 +5185,7 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
 
 #undef __DEFAULT_FN_ATTRS256
 #undef __DEFAULT_FN_ATTRS128
+#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
+#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
 
 #endif /* __AVX2INTRIN_H */
diff --git a/lib/include/avx512bf16intrin.h b/lib/include/avx512bf16intrin.h
index b28d2e243f..458d1f8b99 100644
--- a/lib/include/avx512bf16intrin.h
+++ b/lib/include/avx512bf16intrin.h
@@ -19,12 +19,19 @@ typedef __bf16 __v32bf __attribute__((__vector_size__(64), __aligned__(64)));
 typedef __bf16 __m512bh __attribute__((__vector_size__(64), __aligned__(64)));
 typedef __bf16 __bfloat16 __attribute__((deprecated("use __bf16 instead")));
 
-#define __DEFAULT_FN_ATTRS512 \
-  __attribute__((__always_inline__, __nodebug__, __target__("avx512bf16,evex512"), \
+#define __DEFAULT_FN_ATTRS512                                                  \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx512bf16"),     \
                  __min_vector_width__(512)))
 #define __DEFAULT_FN_ATTRS                                                     \
-  __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx512bf16,no-evex512")))
+  __attribute__((__always_inline__, __nodebug__, __target__("avx512bf16")))
+
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 constexpr
+#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
+#else
+#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512
+#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
+#endif
 
 /// Convert One BF16 Data to One Single Float Data.
 ///
@@ -36,8 +43,8 @@ typedef __bf16 __bfloat16 __attribute__((deprecated("use __bf16 instead")));
 ///    A bfloat data.
 /// \returns A float data whose sign field and exponent field keep unchanged,
 ///    and fraction field is extended to 23 bits.
-static __inline__ float __DEFAULT_FN_ATTRS _mm_cvtsbh_ss(__bf16 __A) {
-  return __builtin_ia32_cvtsbf162ss_32(__A);
+static __inline__ float __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtsbh_ss(__bf16 __A) {
+  return (float)(__A);
 }
 
 /// Convert Two Packed Single Data to One Packed BF16 Data.
@@ -236,9 +243,9 @@ _mm512_maskz_dpbf16_ps(__mmask16 __U, __m512 __D, __m512bh __A, __m512bh __B) {
 /// \param __A
 ///    A 256-bit vector of [16 x bfloat].
 /// \returns A 512-bit vector of [16 x float] come from conversion of __A
-static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtpbh_ps(__m256bh __A) {
-  return _mm512_castsi512_ps((__m512i)_mm512_slli_epi32(
-      (__m512i)_mm512_cvtepi16_epi32((__m256i)__A), 16));
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_cvtpbh_ps(__m256bh __A) {
+  return (__m512) __builtin_convertvector(__A, __v16sf);
 }
 
 /// Convert Packed BF16 Data to Packed float Data using zeroing mask.
@@ -251,10 +258,11 @@ static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtpbh_ps(__m256bh __A) {
 /// \param __A
 ///    A 256-bit vector of [16 x bfloat].
 /// \returns A 512-bit vector of [16 x float] come from conversion of __A
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_cvtpbh_ps(__mmask16 __U, __m256bh __A) {
-  return _mm512_castsi512_ps((__m512i)_mm512_slli_epi32(
-      (__m512i)_mm512_maskz_cvtepi16_epi32((__mmask16)__U, (__m256i)__A), 16));
+  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
+                                             (__v16sf)_mm512_cvtpbh_ps(__A),
+                                             (__v16sf)_mm512_setzero_ps());
 }
 
 /// Convert Packed BF16 Data to Packed float Data using merging mask.
@@ -269,15 +277,16 @@ _mm512_maskz_cvtpbh_ps(__mmask16 __U, __m256bh __A) {
 /// \param __A
 ///    A 256-bit vector of [16 x bfloat].
 /// \returns A 512-bit vector of [16 x float] come from conversion of __A
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_cvtpbh_ps(__m512 __S, __mmask16 __U, __m256bh __A) {
-  return _mm512_castsi512_ps((__m512i)_mm512_mask_slli_epi32(
-      (__m512i)__S, (__mmask16)__U,
-      (__m512i)_mm512_cvtepi16_epi32((__m256i)__A), 16));
+  return (__m512)__builtin_ia32_selectps_512(
+      (__mmask16)__U, (__v16sf)_mm512_cvtpbh_ps(__A), (__v16sf)__S);
 }
 
 #undef __DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS_CONSTEXPR
 #undef __DEFAULT_FN_ATTRS512
+#undef __DEFAULT_FN_ATTRS512_CONSTEXPR
 
 #endif
 #endif
diff --git a/lib/include/avx512bitalgintrin.h b/lib/include/avx512bitalgintrin.h
index 3c446b34e7..f5e9b1a84f 100644
--- a/lib/include/avx512bitalgintrin.h
+++ b/lib/include/avx512bitalgintrin.h
@@ -15,53 +15,44 @@
 #define __AVX512BITALGINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
 #define __DEFAULT_FN_ATTRS                                                     \
-  __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx512bitalg,evex512"),                           \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx512bitalg"),   \
+                 __min_vector_width__(512))) constexpr
+#else
+#define __DEFAULT_FN_ATTRS                                                     \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx512bitalg"),   \
                  __min_vector_width__(512)))
+#endif
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_popcnt_epi16(__m512i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi16(__m512i __A) {
   return (__m512i)__builtin_elementwise_popcount((__v32hu)__A);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_selectw_512((__mmask32) __U,
-              (__v32hi) _mm512_popcnt_epi16(__B),
-              (__v32hi) __A);
+_mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, __m512i __B) {
+  return (__m512i)__builtin_ia32_selectw_512(
+      (__mmask32)__U, (__v32hi)_mm512_popcnt_epi16(__B), (__v32hi)__A);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B)
-{
-  return _mm512_mask_popcnt_epi16((__m512i) _mm512_setzero_si512(),
-              __U,
-              __B);
+_mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B) {
+  return _mm512_mask_popcnt_epi16((__m512i)_mm512_setzero_si512(), __U, __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_popcnt_epi8(__m512i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi8(__m512i __A) {
   return (__m512i)__builtin_elementwise_popcount((__v64qu)__A);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __m512i __B)
-{
-  return (__m512i) __builtin_ia32_selectb_512((__mmask64) __U,
-              (__v64qi) _mm512_popcnt_epi8(__B),
-              (__v64qi) __A);
+_mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __m512i __B) {
+  return (__m512i)__builtin_ia32_selectb_512(
+      (__mmask64)__U, (__v64qi)_mm512_popcnt_epi8(__B), (__v64qi)__A);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __B)
-{
-  return _mm512_mask_popcnt_epi8((__m512i) _mm512_setzero_si512(),
-              __U,
-              __B);
+_mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __B) {
+  return _mm512_mask_popcnt_epi8((__m512i)_mm512_setzero_si512(), __U, __B);
 }
 
 static __inline__ __mmask64 __DEFAULT_FN_ATTRS
@@ -80,7 +71,5 @@ _mm512_bitshuffle_epi64_mask(__m512i __A, __m512i __B)
               __B);
 }
 
-
 #undef __DEFAULT_FN_ATTRS
-
 #endif
diff --git a/lib/include/avx512bwintrin.h b/lib/include/avx512bwintrin.h
index c854720de6..cd4663abe7 100644
--- a/lib/include/avx512bwintrin.h
+++ b/lib/include/avx512bwintrin.h
@@ -19,153 +19,150 @@ typedef unsigned long long __mmask64;
 
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS512                                                  \
-  __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx512bw,evex512"), __min_vector_width__(512)))
+  __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"),       \
+                 __min_vector_width__(512)))
 #define __DEFAULT_FN_ATTRS                                                     \
-  __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx512bw,no-evex512")))
+  __attribute__((__always_inline__, __nodebug__, __target__("avx512bw")))
 
-static __inline __mmask32 __DEFAULT_FN_ATTRS
-_knot_mask32(__mmask32 __M)
-{
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 constexpr
+#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
+#else
+#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512
+#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
+#endif
+
+static __inline __mmask32
+    __DEFAULT_FN_ATTRS_CONSTEXPR _knot_mask32(__mmask32 __M) {
   return __builtin_ia32_knotsi(__M);
 }
 
-static __inline __mmask64 __DEFAULT_FN_ATTRS _knot_mask64(__mmask64 __M) {
+static __inline __mmask64 __DEFAULT_FN_ATTRS_CONSTEXPR
+_knot_mask64(__mmask64 __M) {
   return __builtin_ia32_knotdi(__M);
 }
 
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_kand_mask32(__mmask32 __A, __mmask32 __B)
-{
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS_CONSTEXPR
+_kand_mask32(__mmask32 __A, __mmask32 __B) {
   return (__mmask32)__builtin_ia32_kandsi((__mmask32)__A, (__mmask32)__B);
 }
 
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kand_mask64(__mmask64 __A,
-                                                            __mmask64 __B) {
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS_CONSTEXPR
+_kand_mask64(__mmask64 __A, __mmask64 __B) {
   return (__mmask64)__builtin_ia32_kanddi((__mmask64)__A, (__mmask64)__B);
 }
 
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_kandn_mask32(__mmask32 __A, __mmask32 __B)
-{
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS_CONSTEXPR
+_kandn_mask32(__mmask32 __A, __mmask32 __B) {
   return (__mmask32)__builtin_ia32_kandnsi((__mmask32)__A, (__mmask32)__B);
 }
 
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kandn_mask64(__mmask64 __A,
-                                                             __mmask64 __B) {
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS_CONSTEXPR
+_kandn_mask64(__mmask64 __A, __mmask64 __B) {
   return (__mmask64)__builtin_ia32_kandndi((__mmask64)__A, (__mmask64)__B);
 }
 
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_kor_mask32(__mmask32 __A, __mmask32 __B)
-{
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS_CONSTEXPR
+_kor_mask32(__mmask32 __A, __mmask32 __B) {
   return (__mmask32)__builtin_ia32_korsi((__mmask32)__A, (__mmask32)__B);
 }
 
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kor_mask64(__mmask64 __A,
-                                                           __mmask64 __B) {
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS_CONSTEXPR
+_kor_mask64(__mmask64 __A, __mmask64 __B) {
   return (__mmask64)__builtin_ia32_kordi((__mmask64)__A, (__mmask64)__B);
 }
 
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_kxnor_mask32(__mmask32 __A, __mmask32 __B)
-{
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS_CONSTEXPR
+_kxnor_mask32(__mmask32 __A, __mmask32 __B) {
   return (__mmask32)__builtin_ia32_kxnorsi((__mmask32)__A, (__mmask32)__B);
 }
 
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kxnor_mask64(__mmask64 __A,
-                                                             __mmask64 __B) {
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS_CONSTEXPR
+_kxnor_mask64(__mmask64 __A, __mmask64 __B) {
   return (__mmask64)__builtin_ia32_kxnordi((__mmask64)__A, (__mmask64)__B);
 }
 
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_kxor_mask32(__mmask32 __A, __mmask32 __B)
-{
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS_CONSTEXPR
+_kxor_mask32(__mmask32 __A, __mmask32 __B) {
   return (__mmask32)__builtin_ia32_kxorsi((__mmask32)__A, (__mmask32)__B);
 }
 
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kxor_mask64(__mmask64 __A,
-                                                            __mmask64 __B) {
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS_CONSTEXPR
+_kxor_mask64(__mmask64 __A, __mmask64 __B) {
   return (__mmask64)__builtin_ia32_kxordi((__mmask64)__A, (__mmask64)__B);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_kortestc_mask32_u8(__mmask32 __A, __mmask32 __B)
-{
+static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
+_kortestc_mask32_u8(__mmask32 __A, __mmask32 __B) {
   return (unsigned char)__builtin_ia32_kortestcsi(__A, __B);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_kortestz_mask32_u8(__mmask32 __A, __mmask32 __B)
-{
+static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
+_kortestz_mask32_u8(__mmask32 __A, __mmask32 __B) {
   return (unsigned char)__builtin_ia32_kortestzsi(__A, __B);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
+static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
 _kortest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) {
   *__C = (unsigned char)__builtin_ia32_kortestcsi(__A, __B);
   return (unsigned char)__builtin_ia32_kortestzsi(__A, __B);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
+static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
 _kortestc_mask64_u8(__mmask64 __A, __mmask64 __B) {
   return (unsigned char)__builtin_ia32_kortestcdi(__A, __B);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
+static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
 _kortestz_mask64_u8(__mmask64 __A, __mmask64 __B) {
   return (unsigned char)__builtin_ia32_kortestzdi(__A, __B);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
+static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
 _kortest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) {
   *__C = (unsigned char)__builtin_ia32_kortestcdi(__A, __B);
   return (unsigned char)__builtin_ia32_kortestzdi(__A, __B);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_ktestc_mask32_u8(__mmask32 __A, __mmask32 __B)
-{
+static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
+_ktestc_mask32_u8(__mmask32 __A, __mmask32 __B) {
   return (unsigned char)__builtin_ia32_ktestcsi(__A, __B);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_ktestz_mask32_u8(__mmask32 __A, __mmask32 __B)
-{
+static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
+_ktestz_mask32_u8(__mmask32 __A, __mmask32 __B) {
   return (unsigned char)__builtin_ia32_ktestzsi(__A, __B);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
+static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
 _ktest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) {
   *__C = (unsigned char)__builtin_ia32_ktestcsi(__A, __B);
   return (unsigned char)__builtin_ia32_ktestzsi(__A, __B);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
+static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
 _ktestc_mask64_u8(__mmask64 __A, __mmask64 __B) {
   return (unsigned char)__builtin_ia32_ktestcdi(__A, __B);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
+static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
 _ktestz_mask64_u8(__mmask64 __A, __mmask64 __B) {
   return (unsigned char)__builtin_ia32_ktestzdi(__A, __B);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
+static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
 _ktest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) {
   *__C = (unsigned char)__builtin_ia32_ktestcdi(__A, __B);
   return (unsigned char)__builtin_ia32_ktestzdi(__A, __B);
 }
 
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_kadd_mask32(__mmask32 __A, __mmask32 __B)
-{
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS_CONSTEXPR
+_kadd_mask32(__mmask32 __A, __mmask32 __B) {
   return (__mmask32)__builtin_ia32_kaddsi((__mmask32)__A, (__mmask32)__B);
 }
 
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kadd_mask64(__mmask64 __A,
-                                                            __mmask64 __B) {
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS_CONSTEXPR
+_kadd_mask64(__mmask64 __A, __mmask64 __B) {
   return (__mmask64)__builtin_ia32_kadddi((__mmask64)__A, (__mmask64)__B);
 }
 
@@ -181,22 +178,22 @@ static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kadd_mask64(__mmask64 __A,
 #define _kshiftri_mask64(A, I) \
   ((__mmask64)__builtin_ia32_kshiftridi((__mmask64)(A), (unsigned int)(I)))
 
-static __inline__ unsigned int __DEFAULT_FN_ATTRS
-_cvtmask32_u32(__mmask32 __A) {
+static __inline__ unsigned int
+    __DEFAULT_FN_ATTRS_CONSTEXPR _cvtmask32_u32(__mmask32 __A) {
   return (unsigned int)__builtin_ia32_kmovd((__mmask32)__A);
 }
 
-static __inline__ unsigned long long __DEFAULT_FN_ATTRS
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CONSTEXPR
 _cvtmask64_u64(__mmask64 __A) {
   return (unsigned long long)__builtin_ia32_kmovq((__mmask64)__A);
 }
 
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS_CONSTEXPR
 _cvtu32_mask32(unsigned int __A) {
   return (__mmask32)__builtin_ia32_kmovd((__mmask32)__A);
 }
 
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS_CONSTEXPR
 _cvtu64_mask64(unsigned long long __A) {
   return (__mmask64)__builtin_ia32_kmovq((__mmask64)__A);
 }
@@ -362,168 +359,159 @@ static __inline__ void __DEFAULT_FN_ATTRS _store_mask64(__mmask64 *__A,
 #define _mm512_mask_cmpneq_epu16_mask(k, A, B) \
     _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE)
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_add_epi8 (__m512i __A, __m512i __B) {
+static __inline__ __m512i
+    __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_epi8(__m512i __A, __m512i __B) {
   return (__m512i) ((__v64qu) __A + (__v64qu) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_add_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
                                              (__v64qi)_mm512_add_epi8(__A, __B),
                                              (__v64qi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_add_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
                                              (__v64qi)_mm512_add_epi8(__A, __B),
                                              (__v64qi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_sub_epi8 (__m512i __A, __m512i __B) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_sub_epi8(__m512i __A, __m512i __B) {
   return (__m512i) ((__v64qu) __A - (__v64qu) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_sub_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
                                              (__v64qi)_mm512_sub_epi8(__A, __B),
                                              (__v64qi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_sub_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
                                              (__v64qi)_mm512_sub_epi8(__A, __B),
                                              (__v64qi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_add_epi16 (__m512i __A, __m512i __B) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_add_epi16(__m512i __A, __m512i __B) {
   return (__m512i) ((__v32hu) __A + (__v32hu) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_add_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
                                              (__v32hi)_mm512_add_epi16(__A, __B),
                                              (__v32hi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_add_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
                                              (__v32hi)_mm512_add_epi16(__A, __B),
                                              (__v32hi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_sub_epi16 (__m512i __A, __m512i __B) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_sub_epi16(__m512i __A, __m512i __B) {
   return (__m512i) ((__v32hu) __A - (__v32hu) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_sub_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
                                              (__v32hi)_mm512_sub_epi16(__A, __B),
                                              (__v32hi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_sub_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
                                              (__v32hi)_mm512_sub_epi16(__A, __B),
                                              (__v32hi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mullo_epi16 (__m512i __A, __m512i __B) {
   return (__m512i) ((__v32hu) __A * (__v32hu) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_mullo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
                                              (__v32hi)_mm512_mullo_epi16(__A, __B),
                                              (__v32hi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_mullo_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
                                              (__v32hi)_mm512_mullo_epi16(__A, __B),
                                              (__v32hi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_blend_epi8 (__mmask64 __U, __m512i __A, __m512i __W)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_blend_epi8(__mmask64 __U, __m512i __A, __m512i __W) {
   return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U,
               (__v64qi) __W,
               (__v64qi) __A);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_blend_epi16 (__mmask32 __U, __m512i __A, __m512i __W)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_blend_epi16(__mmask32 __U, __m512i __A, __m512i __W) {
   return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U,
               (__v32hi) __W,
               (__v32hi) __A);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_abs_epi8 (__m512i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_abs_epi8(__m512i __A) {
   return (__m512i)__builtin_elementwise_abs((__v64qs)__A);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_abs_epi8 (__m512i __W, __mmask64 __U, __m512i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_abs_epi8(__m512i __W, __mmask64 __U, __m512i __A) {
   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
                                              (__v64qi)_mm512_abs_epi8(__A),
                                              (__v64qi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_abs_epi8 (__mmask64 __U, __m512i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_abs_epi8(__mmask64 __U, __m512i __A) {
   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
                                              (__v64qi)_mm512_abs_epi8(__A),
                                              (__v64qi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_abs_epi16 (__m512i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_abs_epi16(__m512i __A) {
   return (__m512i)__builtin_elementwise_abs((__v32hi)__A);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_abs_epi16 (__m512i __W, __mmask32 __U, __m512i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_abs_epi16(__m512i __W, __mmask32 __U, __m512i __A) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
                                              (__v32hi)_mm512_abs_epi16(__A),
                                              (__v32hi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_abs_epi16 (__mmask32 __U, __m512i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_abs_epi16(__mmask32 __U, __m512i __A) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
                                              (__v32hi)_mm512_abs_epi16(__A),
                                              (__v32hi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_packs_epi32(__m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_packs_epi32(__m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_packssdw512((__v16si)__A, (__v16si)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_packs_epi32(__mmask32 __M, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
@@ -531,21 +519,19 @@ _mm512_maskz_packs_epi32(__mmask32 __M, __m512i __A, __m512i __B)
                                        (__v32hi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_packs_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_packs_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
                                        (__v32hi)_mm512_packs_epi32(__A, __B),
                                        (__v32hi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_packs_epi16(__m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_packs_epi16(__m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_packsswb512((__v32hi)__A, (__v32hi) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_packs_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
@@ -553,7 +539,7 @@ _mm512_mask_packs_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
                                         (__v64qi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_packs_epi16(__mmask64 __M, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
@@ -561,13 +547,12 @@ _mm512_maskz_packs_epi16(__mmask64 __M, __m512i __A, __m512i __B)
                                         (__v64qi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_packus_epi32(__m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_packus_epi32(__m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_packusdw512((__v16si) __A, (__v16si) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_packus_epi32(__mmask32 __M, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
@@ -575,7 +560,7 @@ _mm512_maskz_packus_epi32(__mmask32 __M, __m512i __A, __m512i __B)
                                        (__v32hi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_packus_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
@@ -583,13 +568,12 @@ _mm512_mask_packus_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
                                        (__v32hi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_packus_epi16(__m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_packus_epi16(__m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_packuswb512((__v32hi) __A, (__v32hi) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_packus_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
@@ -597,7 +581,7 @@ _mm512_mask_packus_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
                                         (__v64qi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_packus_epi16(__mmask64 __M, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
@@ -605,35 +589,31 @@ _mm512_maskz_packus_epi16(__mmask64 __M, __m512i __A, __m512i __B)
                                         (__v64qi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_adds_epi8 (__m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_adds_epi8(__m512i __A, __m512i __B) {
   return (__m512i)__builtin_elementwise_add_sat((__v64qs)__A, (__v64qs)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_adds_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_adds_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
                                         (__v64qi)_mm512_adds_epi8(__A, __B),
                                         (__v64qi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_adds_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_adds_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
                                         (__v64qi)_mm512_adds_epi8(__A, __B),
                                         (__v64qi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_adds_epi16 (__m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_adds_epi16(__m512i __A, __m512i __B) {
   return (__m512i)__builtin_elementwise_add_sat((__v32hi)__A, (__v32hi)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_adds_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
@@ -641,7 +621,7 @@ _mm512_mask_adds_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
                                         (__v32hi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_adds_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
@@ -649,13 +629,12 @@ _mm512_maskz_adds_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
                                         (__v32hi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_adds_epu8 (__m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_adds_epu8(__m512i __A, __m512i __B) {
   return (__m512i)__builtin_elementwise_add_sat((__v64qu) __A, (__v64qu) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_adds_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
@@ -663,7 +642,7 @@ _mm512_mask_adds_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
                                         (__v64qi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_adds_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
@@ -671,280 +650,238 @@ _mm512_maskz_adds_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
                                         (__v64qi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_adds_epu16 (__m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_adds_epu16(__m512i __A, __m512i __B) {
   return (__m512i)__builtin_elementwise_add_sat((__v32hu) __A, (__v32hu) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_adds_epu16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_adds_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
                                         (__v32hi)_mm512_adds_epu16(__A, __B),
                                         (__v32hi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_adds_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_adds_epu16(__mmask32 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
                                         (__v32hi)_mm512_adds_epu16(__A, __B),
                                         (__v32hi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_avg_epu8 (__m512i __A, __m512i __B)
-{
-  return (__m512i)__builtin_ia32_pavgb512((__v64qi)__A, (__v64qi)__B);
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_avg_epu8(__m512i __A, __m512i __B) {
+  return (__m512i)__builtin_ia32_pavgb512((__v64qu)__A, (__v64qu)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_avg_epu8 (__m512i __W, __mmask64 __U, __m512i __A,
-          __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_avg_epu8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
+  return (__m512i)__builtin_ia32_selectb_512(
+      (__mmask64)__U, (__v64qi)_mm512_avg_epu8(__A, __B), (__v64qi)__W);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_avg_epu8(__mmask64 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
-              (__v64qi)_mm512_avg_epu8(__A, __B),
-              (__v64qi)__W);
+                                             (__v64qi)_mm512_avg_epu8(__A, __B),
+                                             (__v64qi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_avg_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
-              (__v64qi)_mm512_avg_epu8(__A, __B),
-              (__v64qi)_mm512_setzero_si512());
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_avg_epu16(__m512i __A, __m512i __B) {
+  return (__m512i)__builtin_ia32_pavgw512((__v32hu)__A, (__v32hu)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_avg_epu16 (__m512i __A, __m512i __B)
-{
-  return (__m512i)__builtin_ia32_pavgw512((__v32hi)__A, (__v32hi)__B);
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_avg_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
+  return (__m512i)__builtin_ia32_selectw_512(
+      (__mmask32)__U, (__v32hi)_mm512_avg_epu16(__A, __B), (__v32hi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_avg_epu16 (__m512i __W, __mmask32 __U, __m512i __A,
-           __m512i __B)
-{
-  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
-              (__v32hi)_mm512_avg_epu16(__A, __B),
-              (__v32hi)__W);
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_avg_epu16(__mmask32 __U, __m512i __A, __m512i __B) {
+  return (__m512i)__builtin_ia32_selectw_512(
+      (__mmask32)__U, (__v32hi)_mm512_avg_epu16(__A, __B),
+      (__v32hi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_avg_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
-              (__v32hi)_mm512_avg_epu16(__A, __B),
-              (__v32hi) _mm512_setzero_si512());
-}
-
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_max_epi8 (__m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_max_epi8(__m512i __A, __m512i __B) {
   return (__m512i)__builtin_elementwise_max((__v64qs) __A, (__v64qs) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_max_epi8 (__mmask64 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_max_epi8(__mmask64 __M, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
                                              (__v64qi)_mm512_max_epi8(__A, __B),
                                              (__v64qi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_max_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_max_epi8(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
                                              (__v64qi)_mm512_max_epi8(__A, __B),
                                              (__v64qi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_max_epi16 (__m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_max_epi16(__m512i __A, __m512i __B) {
   return (__m512i)__builtin_elementwise_max((__v32hi) __A, (__v32hi) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_max_epi16 (__mmask32 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_max_epi16(__mmask32 __M, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
                                             (__v32hi)_mm512_max_epi16(__A, __B),
                                             (__v32hi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_max_epi16 (__m512i __W, __mmask32 __M, __m512i __A,
-           __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_max_epi16(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
                                             (__v32hi)_mm512_max_epi16(__A, __B),
                                             (__v32hi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_max_epu8 (__m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_max_epu8(__m512i __A, __m512i __B) {
   return (__m512i)__builtin_elementwise_max((__v64qu)__A, (__v64qu)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_max_epu8 (__mmask64 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_max_epu8(__mmask64 __M, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
                                              (__v64qi)_mm512_max_epu8(__A, __B),
                                              (__v64qi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_max_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_max_epu8(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
                                              (__v64qi)_mm512_max_epu8(__A, __B),
                                              (__v64qi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_max_epu16 (__m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_max_epu16(__m512i __A, __m512i __B) {
   return (__m512i)__builtin_elementwise_max((__v32hu)__A, (__v32hu)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_max_epu16 (__mmask32 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_max_epu16(__mmask32 __M, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
                                             (__v32hi)_mm512_max_epu16(__A, __B),
                                             (__v32hi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_max_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_max_epu16(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
                                             (__v32hi)_mm512_max_epu16(__A, __B),
                                             (__v32hi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_min_epi8 (__m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_min_epi8(__m512i __A, __m512i __B) {
   return (__m512i)__builtin_elementwise_min((__v64qs) __A, (__v64qs) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_min_epi8 (__mmask64 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_min_epi8(__mmask64 __M, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
                                              (__v64qi)_mm512_min_epi8(__A, __B),
                                              (__v64qi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_min_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_min_epi8(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
                                              (__v64qi)_mm512_min_epi8(__A, __B),
                                              (__v64qi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_min_epi16 (__m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_min_epi16(__m512i __A, __m512i __B) {
   return (__m512i)__builtin_elementwise_min((__v32hi) __A, (__v32hi) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_min_epi16 (__mmask32 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_min_epi16(__mmask32 __M, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
                                             (__v32hi)_mm512_min_epi16(__A, __B),
                                             (__v32hi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_min_epi16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_min_epi16(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
                                             (__v32hi)_mm512_min_epi16(__A, __B),
                                             (__v32hi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_min_epu8 (__m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_min_epu8(__m512i __A, __m512i __B) {
   return (__m512i)__builtin_elementwise_min((__v64qu)__A, (__v64qu)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_min_epu8 (__mmask64 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_min_epu8(__mmask64 __M, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
                                              (__v64qi)_mm512_min_epu8(__A, __B),
                                              (__v64qi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_min_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_min_epu8(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
                                              (__v64qi)_mm512_min_epu8(__A, __B),
                                              (__v64qi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_min_epu16 (__m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_min_epu16(__m512i __A, __m512i __B) {
   return (__m512i)__builtin_elementwise_min((__v32hu)__A, (__v32hu)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_min_epu16 (__mmask32 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_min_epu16(__mmask32 __M, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
                                             (__v32hi)_mm512_min_epu16(__A, __B),
                                             (__v32hi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_min_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_min_epu16(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
                                             (__v32hi)_mm512_min_epu16(__A, __B),
                                             (__v32hi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_shuffle_epi8(__m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_shuffle_epi8(__m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_pshufb512((__v64qi)__A,(__v64qi)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_shuffle_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_shuffle_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
                                          (__v64qi)_mm512_shuffle_epi8(__A, __B),
                                          (__v64qi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_shuffle_epi8(__mmask64 __U, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_shuffle_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
                                          (__v64qi)_mm512_shuffle_epi8(__A, __B),
                                          (__v64qi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_subs_epi8 (__m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_subs_epi8(__m512i __A, __m512i __B) {
   return (__m512i)__builtin_elementwise_sub_sat((__v64qs)__A, (__v64qs)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_subs_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
@@ -952,7 +889,7 @@ _mm512_mask_subs_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
                                         (__v64qi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_subs_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
@@ -960,13 +897,12 @@ _mm512_maskz_subs_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
                                         (__v64qi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_subs_epi16 (__m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_subs_epi16(__m512i __A, __m512i __B) {
   return (__m512i)__builtin_elementwise_sub_sat((__v32hi)__A, (__v32hi)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_subs_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
@@ -974,7 +910,7 @@ _mm512_mask_subs_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
                                         (__v32hi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_subs_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
@@ -982,13 +918,12 @@ _mm512_maskz_subs_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
                                         (__v32hi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_subs_epu8 (__m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_subs_epu8(__m512i __A, __m512i __B) {
   return (__m512i)__builtin_elementwise_sub_sat((__v64qu) __A, (__v64qu) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_subs_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
@@ -996,7 +931,7 @@ _mm512_mask_subs_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
                                         (__v64qi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_subs_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
@@ -1004,13 +939,12 @@ _mm512_maskz_subs_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
                                         (__v64qi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_subs_epu16 (__m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_subs_epu16(__m512i __A, __m512i __B) {
   return (__m512i)__builtin_elementwise_sub_sat((__v32hu) __A, (__v32hu) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_subs_epu16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
@@ -1018,7 +952,7 @@ _mm512_mask_subs_epu16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
                                         (__v32hi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_subs_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
@@ -1026,113 +960,97 @@ _mm512_maskz_subs_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
                                         (__v32hi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_permutex2var_epi16(__m512i __A, __m512i __I, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_permutex2var_epi16(__m512i __A, __m512i __I, __m512i __B) {
   return (__m512i)__builtin_ia32_vpermi2varhi512((__v32hi)__A, (__v32hi)__I,
                                                  (__v32hi)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_permutex2var_epi16(__m512i __A, __mmask32 __U, __m512i __I,
-                               __m512i __B)
-{
+                               __m512i __B) {
   return (__m512i)__builtin_ia32_selectw_512(__U,
                               (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B),
                               (__v32hi)__A);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask2_permutex2var_epi16(__m512i __A, __m512i __I, __mmask32 __U,
-                                __m512i __B)
-{
+                                __m512i __B) {
   return (__m512i)__builtin_ia32_selectw_512(__U,
                               (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B),
                               (__v32hi)__I);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_permutex2var_epi16(__mmask32 __U, __m512i __A, __m512i __I,
-                                __m512i __B)
-{
+                                __m512i __B) {
   return (__m512i)__builtin_ia32_selectw_512(__U,
                               (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B),
                               (__v32hi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mulhrs_epi16(__m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mulhrs_epi16(__m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_pmulhrsw512((__v32hi)__A, (__v32hi)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
                                          (__v32hi)_mm512_mulhrs_epi16(__A, __B),
                                          (__v32hi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_mulhrs_epi16(__mmask32 __U, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_mulhrs_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
                                          (__v32hi)_mm512_mulhrs_epi16(__A, __B),
                                          (__v32hi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mulhi_epi16(__m512i __A, __m512i __B)
-{
-  return (__m512i)__builtin_ia32_pmulhw512((__v32hi) __A, (__v32hi) __B);
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mulhi_epi16(__m512i __A, __m512i __B) {
+  return (__m512i)__builtin_ia32_pmulhw512((__v32hi)__A, (__v32hi)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_mulhi_epi16(__m512i __W, __mmask32 __U, __m512i __A,
-       __m512i __B)
-{
-  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
-                                          (__v32hi)_mm512_mulhi_epi16(__A, __B),
-                                          (__v32hi)__W);
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_mulhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
+  return (__m512i)__builtin_ia32_selectw_512(
+      (__mmask32)__U, (__v32hi)_mm512_mulhi_epi16(__A, __B), (__v32hi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_mulhi_epi16(__mmask32 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
-                                          (__v32hi)_mm512_mulhi_epi16(__A, __B),
-                                          (__v32hi)_mm512_setzero_si512());
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_mulhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
+  return (__m512i)__builtin_ia32_selectw_512(
+      (__mmask32)__U, (__v32hi)_mm512_mulhi_epi16(__A, __B),
+      (__v32hi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mulhi_epu16(__m512i __A, __m512i __B)
-{
-  return (__m512i)__builtin_ia32_pmulhuw512((__v32hi) __A, (__v32hi) __B);
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mulhi_epu16(__m512i __A, __m512i __B) {
+  return (__m512i)__builtin_ia32_pmulhuw512((__v32hu)__A, (__v32hu)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_mulhi_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
-                                          (__v32hi)_mm512_mulhi_epu16(__A, __B),
-                                          (__v32hi)__W);
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_mulhi_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
+  return (__m512i)__builtin_ia32_selectw_512(
+      (__mmask32)__U, (__v32hi)_mm512_mulhi_epu16(__A, __B), (__v32hi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_mulhi_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
-{
-  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
-                                          (__v32hi)_mm512_mulhi_epu16(__A, __B),
-                                          (__v32hi)_mm512_setzero_si512());
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_mulhi_epu16(__mmask32 __U, __m512i __A, __m512i __B) {
+  return (__m512i)__builtin_ia32_selectw_512(
+      (__mmask32)__U, (__v32hi)_mm512_mulhi_epu16(__A, __B),
+      (__v32hi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maddubs_epi16(__m512i __X, __m512i __Y) {
   return (__m512i)__builtin_ia32_pmaddubsw512((__v64qi)__X, (__v64qi)__Y);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_maddubs_epi16(__m512i __W, __mmask32 __U, __m512i __X,
                           __m512i __Y) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U,
@@ -1140,26 +1058,26 @@ _mm512_mask_maddubs_epi16(__m512i __W, __mmask32 __U, __m512i __X,
                                         (__v32hi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_maddubs_epi16(__mmask32 __U, __m512i __X, __m512i __Y) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U,
                                         (__v32hi)_mm512_maddubs_epi16(__X, __Y),
                                         (__v32hi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_madd_epi16(__m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_pmaddwd512((__v32hi)__A, (__v32hi)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_madd_epi16(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
                                            (__v16si)_mm512_madd_epi16(__A, __B),
                                            (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_madd_epi16(__mmask16 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
                                            (__v16si)_mm512_madd_epi16(__A, __B),
@@ -1247,7 +1165,7 @@ _mm512_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A)
   __builtin_ia32_pmovuswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_unpackhi_epi8(__m512i __A, __m512i __B) {
   return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B,
                                           8,  64+8,   9, 64+9,
@@ -1268,21 +1186,21 @@ _mm512_unpackhi_epi8(__m512i __A, __m512i __B) {
                                           62, 64+62, 63, 64+63);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_unpackhi_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
                                         (__v64qi)_mm512_unpackhi_epi8(__A, __B),
                                         (__v64qi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_unpackhi_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
                                         (__v64qi)_mm512_unpackhi_epi8(__A, __B),
                                         (__v64qi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_unpackhi_epi16(__m512i __A, __m512i __B) {
   return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B,
                                           4,  32+4,   5, 32+5,
@@ -1295,21 +1213,21 @@ _mm512_unpackhi_epi16(__m512i __A, __m512i __B) {
                                           30, 32+30, 31, 32+31);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_unpackhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
                                        (__v32hi)_mm512_unpackhi_epi16(__A, __B),
                                        (__v32hi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_unpackhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
                                        (__v32hi)_mm512_unpackhi_epi16(__A, __B),
                                        (__v32hi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_unpacklo_epi8(__m512i __A, __m512i __B) {
   return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B,
                                           0,  64+0,   1, 64+1,
@@ -1330,21 +1248,21 @@ _mm512_unpacklo_epi8(__m512i __A, __m512i __B) {
                                           54, 64+54, 55, 64+55);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_unpacklo_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
                                         (__v64qi)_mm512_unpacklo_epi8(__A, __B),
                                         (__v64qi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_unpacklo_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
                                         (__v64qi)_mm512_unpacklo_epi8(__A, __B),
                                         (__v64qi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_unpacklo_epi16(__m512i __A, __m512i __B) {
   return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B,
                                           0,  32+0,   1, 32+1,
@@ -1357,67 +1275,60 @@ _mm512_unpacklo_epi16(__m512i __A, __m512i __B) {
                                           26, 32+26, 27, 32+27);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_unpacklo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
                                        (__v32hi)_mm512_unpacklo_epi16(__A, __B),
                                        (__v32hi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_unpacklo_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
                                        (__v32hi)_mm512_unpacklo_epi16(__A, __B),
                                        (__v32hi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_cvtepi8_epi16(__m256i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_cvtepi8_epi16(__m256i __A) {
   /* This function always performs a signed extension, but __v32qi is a char
      which may be signed or unsigned, so use __v32qs. */
   return (__m512i)__builtin_convertvector((__v32qs)__A, __v32hi);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_cvtepi8_epi16(__m512i __W, __mmask32 __U, __m256i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_cvtepi8_epi16(__m512i __W, __mmask32 __U, __m256i __A) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
                                              (__v32hi)_mm512_cvtepi8_epi16(__A),
                                              (__v32hi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvtepi8_epi16(__mmask32 __U, __m256i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_cvtepi8_epi16(__mmask32 __U, __m256i __A) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
                                              (__v32hi)_mm512_cvtepi8_epi16(__A),
                                              (__v32hi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_cvtepu8_epi16(__m256i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_cvtepu8_epi16(__m256i __A) {
   return (__m512i)__builtin_convertvector((__v32qu)__A, __v32hi);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_cvtepu8_epi16(__m512i __W, __mmask32 __U, __m256i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_cvtepu8_epi16(__m512i __W, __mmask32 __U, __m256i __A) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
                                              (__v32hi)_mm512_cvtepu8_epi16(__A),
                                              (__v32hi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvtepu8_epi16(__mmask32 __U, __m256i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_cvtepu8_epi16(__mmask32 __U, __m256i __A) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
                                              (__v32hi)_mm512_cvtepu8_epi16(__A),
                                              (__v32hi)_mm512_setzero_si512());
 }
 
-
 #define _mm512_shufflehi_epi16(A, imm) \
   ((__m512i)__builtin_ia32_pshufhw512((__v32hi)(__m512i)(A), (int)(imm)))
 
@@ -1450,13 +1361,13 @@ _mm512_maskz_cvtepu8_epi16(__mmask32 __U, __m256i __A)
                                                                        (imm)), \
                                        (__v32hi)_mm512_setzero_si512()))
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_sllv_epi16(__m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_psllv32hi((__v32hi) __A, (__v32hi) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_sllv_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
@@ -1464,7 +1375,7 @@ _mm512_mask_sllv_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
                                            (__v32hi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_sllv_epi16(__mmask32 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
@@ -1472,61 +1383,56 @@ _mm512_maskz_sllv_epi16(__mmask32 __U, __m512i __A, __m512i __B)
                                            (__v32hi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_sll_epi16(__m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_sll_epi16(__m512i __A, __m128i __B) {
   return (__m512i)__builtin_ia32_psllw512((__v32hi) __A, (__v8hi) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_sll_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_sll_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
                                           (__v32hi)_mm512_sll_epi16(__A, __B),
                                           (__v32hi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_sll_epi16(__mmask32 __U, __m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_sll_epi16(__mmask32 __U, __m512i __A, __m128i __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
                                           (__v32hi)_mm512_sll_epi16(__A, __B),
                                           (__v32hi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_slli_epi16(__m512i __A, unsigned int __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_slli_epi16(__m512i __A, unsigned int __B) {
   return (__m512i)__builtin_ia32_psllwi512((__v32hi)__A, (int)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_slli_epi16(__m512i __W, __mmask32 __U, __m512i __A,
-                       unsigned int __B)
-{
+                       unsigned int __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
                                          (__v32hi)_mm512_slli_epi16(__A, __B),
                                          (__v32hi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_slli_epi16(__mmask32 __U, __m512i __A, unsigned int __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_slli_epi16(__mmask32 __U, __m512i __A, unsigned int __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
                                          (__v32hi)_mm512_slli_epi16(__A, __B),
                                          (__v32hi)_mm512_setzero_si512());
 }
 
-#define _mm512_bslli_epi128(a, imm) \
-  ((__m512i)__builtin_ia32_pslldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm)))
+#define _mm512_bslli_epi128(a, imm)                                            \
+  ((__m512i)__builtin_ia32_pslldqi512_byteshift((__v64qi)(__m512i)(a),         \
+                                                (int)(imm)))
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_srlv_epi16(__m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_psrlv32hi((__v32hi)__A, (__v32hi)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_srlv_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
@@ -1534,7 +1440,7 @@ _mm512_mask_srlv_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
                                            (__v32hi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_srlv_epi16(__mmask32 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
@@ -1542,13 +1448,13 @@ _mm512_maskz_srlv_epi16(__mmask32 __U, __m512i __A, __m512i __B)
                                            (__v32hi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_srav_epi16(__m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_psrav32hi((__v32hi)__A, (__v32hi)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_srav_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
@@ -1556,7 +1462,7 @@ _mm512_mask_srav_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
                                            (__v32hi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_srav_epi16(__mmask32 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
@@ -1564,100 +1470,89 @@ _mm512_maskz_srav_epi16(__mmask32 __U, __m512i __A, __m512i __B)
                                            (__v32hi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_sra_epi16(__m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_sra_epi16(__m512i __A, __m128i __B) {
   return (__m512i)__builtin_ia32_psraw512((__v32hi) __A, (__v8hi) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_sra_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_sra_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
                                           (__v32hi)_mm512_sra_epi16(__A, __B),
                                           (__v32hi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_sra_epi16(__mmask32 __U, __m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_sra_epi16(__mmask32 __U, __m512i __A, __m128i __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
                                           (__v32hi)_mm512_sra_epi16(__A, __B),
                                           (__v32hi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_srai_epi16(__m512i __A, unsigned int __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_srai_epi16(__m512i __A, unsigned int __B) {
   return (__m512i)__builtin_ia32_psrawi512((__v32hi)__A, (int)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_srai_epi16(__m512i __W, __mmask32 __U, __m512i __A,
-                       unsigned int __B)
-{
+                       unsigned int __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
                                          (__v32hi)_mm512_srai_epi16(__A, __B),
                                          (__v32hi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_srai_epi16(__mmask32 __U, __m512i __A, unsigned int __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_srai_epi16(__mmask32 __U, __m512i __A, unsigned int __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
                                          (__v32hi)_mm512_srai_epi16(__A, __B),
                                          (__v32hi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_srl_epi16(__m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_srl_epi16(__m512i __A, __m128i __B) {
   return (__m512i)__builtin_ia32_psrlw512((__v32hi) __A, (__v8hi) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_srl_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_srl_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
                                           (__v32hi)_mm512_srl_epi16(__A, __B),
                                           (__v32hi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_srl_epi16(__mmask32 __U, __m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_srl_epi16(__mmask32 __U, __m512i __A, __m128i __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
                                           (__v32hi)_mm512_srl_epi16(__A, __B),
                                           (__v32hi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_srli_epi16(__m512i __A, unsigned int __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_srli_epi16(__m512i __A, unsigned int __B) {
   return (__m512i)__builtin_ia32_psrlwi512((__v32hi)__A, (int)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_srli_epi16(__m512i __W, __mmask32 __U, __m512i __A,
-                       unsigned int __B)
-{
+                       unsigned int __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
                                          (__v32hi)_mm512_srli_epi16(__A, __B),
                                          (__v32hi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_srli_epi16(__mmask32 __U, __m512i __A, int __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_srli_epi16(__mmask32 __U, __m512i __A, int __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
                                          (__v32hi)_mm512_srli_epi16(__A, (unsigned int)__B),
                                          (__v32hi)_mm512_setzero_si512());
 }
 
-#define _mm512_bsrli_epi128(a, imm) \
-  ((__m512i)__builtin_ia32_psrldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm)))
+#define _mm512_bsrli_epi128(a, imm)                                            \
+  ((__m512i)__builtin_ia32_psrldqi512_byteshift((__v64qi)(__m512i)(a),         \
+                                                (int)(imm)))
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_mov_epi16 (__m512i __W, __mmask32 __U, __m512i __A)
 {
   return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U,
@@ -1665,23 +1560,21 @@ _mm512_mask_mov_epi16 (__m512i __W, __mmask32 __U, __m512i __A)
                 (__v32hi) __W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_mov_epi16 (__mmask32 __U, __m512i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_mov_epi16(__mmask32 __U, __m512i __A) {
   return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U,
                 (__v32hi) __A,
                 (__v32hi) _mm512_setzero_si512 ());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_mov_epi8 (__m512i __W, __mmask64 __U, __m512i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_mov_epi8(__m512i __W, __mmask64 __U, __m512i __A) {
   return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U,
                 (__v64qi) __A,
                 (__v64qi) __W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_mov_epi8 (__mmask64 __U, __m512i __A)
 {
   return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U,
@@ -1689,7 +1582,7 @@ _mm512_maskz_mov_epi8 (__mmask64 __U, __m512i __A)
                 (__v64qi) _mm512_setzero_si512 ());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_set1_epi8 (__m512i __O, __mmask64 __M, char __A)
 {
   return (__m512i) __builtin_ia32_selectb_512(__M,
@@ -1697,23 +1590,21 @@ _mm512_mask_set1_epi8 (__m512i __O, __mmask64 __M, char __A)
                                               (__v64qi) __O);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_set1_epi8 (__mmask64 __M, char __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_set1_epi8(__mmask64 __M, char __A) {
   return (__m512i) __builtin_ia32_selectb_512(__M,
                                               (__v64qi) _mm512_set1_epi8(__A),
                                               (__v64qi) _mm512_setzero_si512());
 }
 
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_kunpackd(__mmask64 __A,
-                                                               __mmask64 __B) {
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_kunpackd(__mmask64 __A, __mmask64 __B) {
   return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A,
                 (__mmask64) __B);
 }
 
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm512_kunpackw (__mmask32 __A, __mmask32 __B)
-{
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_kunpackw(__mmask32 __A, __mmask32 __B) {
   return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A,
                 (__mmask32) __B);
 }
@@ -1859,33 +1750,28 @@ _mm512_mask_testn_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B)
                                        _mm512_setzero_si512());
 }
 
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS512
-_mm512_movepi8_mask (__m512i __A)
-{
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_movepi8_mask(__m512i __A) {
   return (__mmask64) __builtin_ia32_cvtb2mask512 ((__v64qi) __A);
 }
 
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS512
-_mm512_movepi16_mask (__m512i __A)
-{
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_movepi16_mask(__m512i __A) {
   return (__mmask32) __builtin_ia32_cvtw2mask512 ((__v32hi) __A);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_movm_epi8 (__mmask64 __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_movm_epi8(__mmask64 __A) {
   return (__m512i) __builtin_ia32_cvtmask2b512 (__A);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_movm_epi16 (__mmask32 __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_movm_epi16(__mmask32 __A) {
   return (__m512i) __builtin_ia32_cvtmask2w512 (__A);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_broadcastb_epi8 (__m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_broadcastb_epi8(__m128i __A) {
   return (__m512i)__builtin_shufflevector((__v16qi) __A, (__v16qi) __A,
                                           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                                           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -1893,7 +1779,7 @@ _mm512_broadcastb_epi8 (__m128i __A)
                                           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_broadcastb_epi8 (__m512i __O, __mmask64 __M, __m128i __A)
 {
   return (__m512i)__builtin_ia32_selectb_512(__M,
@@ -1901,15 +1787,14 @@ _mm512_mask_broadcastb_epi8 (__m512i __O, __mmask64 __M, __m128i __A)
                                              (__v64qi) __O);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_broadcastb_epi8 (__mmask64 __M, __m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_broadcastb_epi8(__mmask64 __M, __m128i __A) {
   return (__m512i)__builtin_ia32_selectb_512(__M,
                                              (__v64qi) _mm512_broadcastb_epi8(__A),
                                              (__v64qi) _mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_set1_epi16 (__m512i __O, __mmask32 __M, short __A)
 {
   return (__m512i) __builtin_ia32_selectw_512(__M,
@@ -1917,23 +1802,21 @@ _mm512_mask_set1_epi16 (__m512i __O, __mmask32 __M, short __A)
                                               (__v32hi) __O);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_set1_epi16 (__mmask32 __M, short __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_set1_epi16(__mmask32 __M, short __A) {
   return (__m512i) __builtin_ia32_selectw_512(__M,
                                               (__v32hi) _mm512_set1_epi16(__A),
                                               (__v32hi) _mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_broadcastw_epi16 (__m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_broadcastw_epi16(__m128i __A) {
   return (__m512i)__builtin_shufflevector((__v8hi) __A, (__v8hi) __A,
                                           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                                           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_broadcastw_epi16 (__m512i __O, __mmask32 __M, __m128i __A)
 {
   return (__m512i)__builtin_ia32_selectw_512(__M,
@@ -1941,7 +1824,7 @@ _mm512_mask_broadcastw_epi16 (__m512i __O, __mmask32 __M, __m128i __A)
                                              (__v32hi) __O);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_broadcastw_epi16 (__mmask32 __M, __m128i __A)
 {
   return (__m512i)__builtin_ia32_selectw_512(__M,
@@ -1949,25 +1832,21 @@ _mm512_maskz_broadcastw_epi16 (__mmask32 __M, __m128i __A)
                                              (__v32hi) _mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_permutexvar_epi16 (__m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_permutexvar_epi16(__m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_permutexvar_epi16 (__mmask32 __M, __m512i __A,
-        __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_permutexvar_epi16(__mmask32 __M, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
                                     (__v32hi)_mm512_permutexvar_epi16(__A, __B),
                                     (__v32hi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_permutexvar_epi16 (__m512i __W, __mmask32 __M, __m512i __A,
-             __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_permutexvar_epi16(__m512i __W, __mmask32 __M, __m512i __A,
+                              __m512i __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
                                     (__v32hi)_mm512_permutexvar_epi16(__A, __B),
                                     (__v32hi)__W);
@@ -2010,5 +1889,7 @@ _mm512_sad_epu8 (__m512i __A, __m512i __B)
 
 #undef __DEFAULT_FN_ATTRS512
 #undef __DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS512_CONSTEXPR
+#undef __DEFAULT_FN_ATTRS_CONSTEXPR
 
 #endif
diff --git a/lib/include/avx512cdintrin.h b/lib/include/avx512cdintrin.h
index 33b552f6fe..f9de207b76 100644
--- a/lib/include/avx512cdintrin.h
+++ b/lib/include/avx512cdintrin.h
@@ -15,109 +15,96 @@
 #define __AVX512CDINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
 #define __DEFAULT_FN_ATTRS                                                     \
-  __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx512cd,evex512"), __min_vector_width__(512)))
+  __attribute__((__always_inline__, __nodebug__, __target__("avx512cd"),       \
+                 __min_vector_width__(512))) constexpr
+#else
+#define __DEFAULT_FN_ATTRS                                                     \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx512cd"),       \
+                 __min_vector_width__(512)))
+#endif
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_conflict_epi64 (__m512i __A)
-{
-  return (__m512i) __builtin_ia32_vpconflictdi_512 ((__v8di) __A);
+_mm512_conflict_epi64(__m512i __A) {
+  return (__m512i)__builtin_ia32_vpconflictdi_512((__v8di)__A);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_conflict_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
-{
+_mm512_mask_conflict_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
+  return (__m512i)__builtin_ia32_selectq_512(
+      (__mmask8)__U, (__v8di)_mm512_conflict_epi64(__A), (__v8di)__W);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_conflict_epi64(__mmask8 __U, __m512i __A) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                              (__v8di)_mm512_conflict_epi64(__A),
-                                             (__v8di)__W);
+                                             (__v8di)_mm512_setzero_si512());
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_conflict_epi64 (__mmask8 __U, __m512i __A)
-{
-  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
-                                             (__v8di)_mm512_conflict_epi64(__A),
-                                             (__v8di)_mm512_setzero_si512 ());
+_mm512_conflict_epi32(__m512i __A) {
+  return (__m512i)__builtin_ia32_vpconflictsi_512((__v16si)__A);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_conflict_epi32 (__m512i __A)
-{
-  return (__m512i) __builtin_ia32_vpconflictsi_512 ((__v16si) __A);
+_mm512_mask_conflict_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
+  return (__m512i)__builtin_ia32_selectd_512(
+      (__mmask16)__U, (__v16si)_mm512_conflict_epi32(__A), (__v16si)__W);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_conflict_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
-{
-  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
-                                            (__v16si)_mm512_conflict_epi32(__A),
-                                            (__v16si)__W);
+_mm512_maskz_conflict_epi32(__mmask16 __U, __m512i __A) {
+  return (__m512i)__builtin_ia32_selectd_512(
+      (__mmask16)__U, (__v16si)_mm512_conflict_epi32(__A),
+      (__v16si)_mm512_setzero_si512());
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_lzcnt_epi32(__m512i __A) {
+  return (__m512i)__builtin_elementwise_clzg((__v16si)__A,
+                                             (__v16si)_mm512_set1_epi32(32));
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_conflict_epi32 (__mmask16 __U, __m512i __A)
-{
-  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
-                                            (__v16si)_mm512_conflict_epi32(__A),
-                                            (__v16si)_mm512_setzero_si512());
+_mm512_mask_lzcnt_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
+  return (__m512i)__builtin_ia32_selectd_512(
+      (__mmask16)__U, (__v16si)_mm512_lzcnt_epi32(__A), (__v16si)__W);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_lzcnt_epi32 (__m512i __A)
-{
-  return (__m512i) __builtin_ia32_vplzcntd_512 ((__v16si) __A);
-}
-
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_lzcnt_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
-{
-  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
-                                             (__v16si)_mm512_lzcnt_epi32(__A),
-                                             (__v16si)__W);
-}
-
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_lzcnt_epi32 (__mmask16 __U, __m512i __A)
-{
+_mm512_maskz_lzcnt_epi32(__mmask16 __U, __m512i __A) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
                                              (__v16si)_mm512_lzcnt_epi32(__A),
                                              (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_lzcnt_epi64 (__m512i __A)
-{
-  return (__m512i) __builtin_ia32_vplzcntq_512 ((__v8di) __A);
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_lzcnt_epi64(__m512i __A) {
+  return (__m512i)__builtin_elementwise_clzg(
+      (__v8di)__A, (__v8di)_mm512_set1_epi64((long long)64));
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_lzcnt_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
-{
-  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
-                                             (__v8di)_mm512_lzcnt_epi64(__A),
-                                             (__v8di)__W);
+_mm512_mask_lzcnt_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
+  return (__m512i)__builtin_ia32_selectq_512(
+      (__mmask8)__U, (__v8di)_mm512_lzcnt_epi64(__A), (__v8di)__W);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_lzcnt_epi64 (__mmask8 __U, __m512i __A)
-{
+_mm512_maskz_lzcnt_epi64(__mmask8 __U, __m512i __A) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                              (__v8di)_mm512_lzcnt_epi64(__A),
                                              (__v8di)_mm512_setzero_si512());
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_broadcastmb_epi64 (__mmask8 __A)
-{
-  return (__m512i) _mm512_set1_epi64((long long) __A);
+_mm512_broadcastmb_epi64(__mmask8 __A) {
+  return (__m512i)_mm512_set1_epi64((long long)__A);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_broadcastmw_epi32 (__mmask16 __A)
-{
-  return (__m512i) _mm512_set1_epi32((int) __A);
-
+_mm512_broadcastmw_epi32(__mmask16 __A) {
+  return (__m512i)_mm512_set1_epi32((int)__A);
 }
 
 #undef __DEFAULT_FN_ATTRS
diff --git a/lib/include/avx512dqintrin.h b/lib/include/avx512dqintrin.h
index 88b48e3a32..084ac89182 100644
--- a/lib/include/avx512dqintrin.h
+++ b/lib/include/avx512dqintrin.h
@@ -15,110 +15,105 @@
 #define __AVX512DQINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512dq,evex512"), __min_vector_width__(512)))
+#define __DEFAULT_FN_ATTRS512                                                  \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx512dq"),       \
+                 __min_vector_width__(512)))
 #define __DEFAULT_FN_ATTRS                                                     \
-  __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx512dq,no-evex512")))
+  __attribute__((__always_inline__, __nodebug__, __target__("avx512dq")))
 
-static __inline __mmask8 __DEFAULT_FN_ATTRS
-_knot_mask8(__mmask8 __M)
-{
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 constexpr
+#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
+#else
+#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512
+#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
+#endif
+
+static __inline __mmask8 __DEFAULT_FN_ATTRS_CONSTEXPR
+_knot_mask8(__mmask8 __M) {
   return __builtin_ia32_knotqi(__M);
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_kand_mask8(__mmask8 __A, __mmask8 __B)
-{
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_CONSTEXPR
+_kand_mask8(__mmask8 __A, __mmask8 __B) {
   return (__mmask8)__builtin_ia32_kandqi((__mmask8)__A, (__mmask8)__B);
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_kandn_mask8(__mmask8 __A, __mmask8 __B)
-{
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_CONSTEXPR
+_kandn_mask8(__mmask8 __A, __mmask8 __B) {
   return (__mmask8)__builtin_ia32_kandnqi((__mmask8)__A, (__mmask8)__B);
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_kor_mask8(__mmask8 __A, __mmask8 __B)
-{
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_CONSTEXPR
+_kor_mask8(__mmask8 __A, __mmask8 __B) {
   return (__mmask8)__builtin_ia32_korqi((__mmask8)__A, (__mmask8)__B);
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_kxnor_mask8(__mmask8 __A, __mmask8 __B)
-{
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_CONSTEXPR
+_kxnor_mask8(__mmask8 __A, __mmask8 __B) {
   return (__mmask8)__builtin_ia32_kxnorqi((__mmask8)__A, (__mmask8)__B);
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_kxor_mask8(__mmask8 __A, __mmask8 __B)
-{
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_CONSTEXPR
+_kxor_mask8(__mmask8 __A, __mmask8 __B) {
   return (__mmask8)__builtin_ia32_kxorqi((__mmask8)__A, (__mmask8)__B);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_kortestc_mask8_u8(__mmask8 __A, __mmask8 __B)
-{
+static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
+_kortestc_mask8_u8(__mmask8 __A, __mmask8 __B) {
   return (unsigned char)__builtin_ia32_kortestcqi(__A, __B);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_kortestz_mask8_u8(__mmask8 __A, __mmask8 __B)
-{
+static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
+_kortestz_mask8_u8(__mmask8 __A, __mmask8 __B) {
   return (unsigned char)__builtin_ia32_kortestzqi(__A, __B);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
+static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
 _kortest_mask8_u8(__mmask8 __A, __mmask8 __B, unsigned char *__C) {
   *__C = (unsigned char)__builtin_ia32_kortestcqi(__A, __B);
   return (unsigned char)__builtin_ia32_kortestzqi(__A, __B);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_ktestc_mask8_u8(__mmask8 __A, __mmask8 __B)
-{
+static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
+_ktestc_mask8_u8(__mmask8 __A, __mmask8 __B) {
   return (unsigned char)__builtin_ia32_ktestcqi(__A, __B);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_ktestz_mask8_u8(__mmask8 __A, __mmask8 __B)
-{
+static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
+_ktestz_mask8_u8(__mmask8 __A, __mmask8 __B) {
   return (unsigned char)__builtin_ia32_ktestzqi(__A, __B);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
+static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
 _ktest_mask8_u8(__mmask8 __A, __mmask8 __B, unsigned char *__C) {
   *__C = (unsigned char)__builtin_ia32_ktestcqi(__A, __B);
   return (unsigned char)__builtin_ia32_ktestzqi(__A, __B);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_ktestc_mask16_u8(__mmask16 __A, __mmask16 __B)
-{
+static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
+_ktestc_mask16_u8(__mmask16 __A, __mmask16 __B) {
   return (unsigned char)__builtin_ia32_ktestchi(__A, __B);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_ktestz_mask16_u8(__mmask16 __A, __mmask16 __B)
-{
+static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
+_ktestz_mask16_u8(__mmask16 __A, __mmask16 __B) {
   return (unsigned char)__builtin_ia32_ktestzhi(__A, __B);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
+static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
 _ktest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) {
   *__C = (unsigned char)__builtin_ia32_ktestchi(__A, __B);
   return (unsigned char)__builtin_ia32_ktestzhi(__A, __B);
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
-_kadd_mask8(__mmask8 __A, __mmask8 __B)
-{
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_CONSTEXPR
+_kadd_mask8(__mmask8 __A, __mmask8 __B) {
   return (__mmask8)__builtin_ia32_kaddqi((__mmask8)__A, (__mmask8)__B);
 }
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_kadd_mask16(__mmask16 __A, __mmask16 __B)
-{
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR
+_kadd_mask16(__mmask16 __A, __mmask16 __B) {
   return (__mmask16)__builtin_ia32_kaddhi((__mmask16)__A, (__mmask16)__B);
 }
 
@@ -128,12 +123,12 @@ _kadd_mask16(__mmask16 __A, __mmask16 __B)
 #define _kshiftri_mask8(A, I) \
   ((__mmask8)__builtin_ia32_kshiftriqi((__mmask8)(A), (unsigned int)(I)))
 
-static __inline__ unsigned int __DEFAULT_FN_ATTRS
-_cvtmask8_u32(__mmask8 __A) {
+static __inline__ unsigned int
+    __DEFAULT_FN_ATTRS_CONSTEXPR _cvtmask8_u32(__mmask8 __A) {
   return (unsigned int)__builtin_ia32_kmovb((__mmask8)__A);
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_CONSTEXPR
 _cvtu32_mask8(unsigned int __A) {
   return (__mmask8)__builtin_ia32_kmovb((__mmask8)__A);
 }
@@ -148,26 +143,26 @@ _store_mask8(__mmask8 *__A, __mmask8 __B) {
   *(__mmask8 *)__A = __builtin_ia32_kmovb((__mmask8)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mullo_epi64 (__m512i __A, __m512i __B) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mullo_epi64(__m512i __A, __m512i __B) {
   return (__m512i) ((__v8du) __A * (__v8du) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_mullo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                              (__v8di)_mm512_mullo_epi64(__A, __B),
                                              (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_mullo_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                              (__v8di)_mm512_mullo_epi64(__A, __B),
                                              (__v8di)_mm512_setzero_si512());
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_xor_pd(__m512d __A, __m512d __B) {
   return (__m512d)((__v8du)__A ^ (__v8du)__B);
 }
@@ -186,7 +181,7 @@ _mm512_maskz_xor_pd(__mmask8 __U, __m512d __A, __m512d __B) {
                                               (__v8df)_mm512_setzero_pd());
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_xor_ps (__m512 __A, __m512 __B) {
   return (__m512)((__v16su)__A ^ (__v16su)__B);
 }
@@ -205,7 +200,7 @@ _mm512_maskz_xor_ps(__mmask16 __U, __m512 __A, __m512 __B) {
                                              (__v16sf)_mm512_setzero_ps());
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_or_pd(__m512d __A, __m512d __B) {
   return (__m512d)((__v8du)__A | (__v8du)__B);
 }
@@ -224,7 +219,7 @@ _mm512_maskz_or_pd(__mmask8 __U, __m512d __A, __m512d __B) {
                                               (__v8df)_mm512_setzero_pd());
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_or_ps(__m512 __A, __m512 __B) {
   return (__m512)((__v16su)__A | (__v16su)__B);
 }
@@ -243,7 +238,7 @@ _mm512_maskz_or_ps(__mmask16 __U, __m512 __A, __m512 __B) {
                                              (__v16sf)_mm512_setzero_ps());
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_and_pd(__m512d __A, __m512d __B) {
   return (__m512d)((__v8du)__A & (__v8du)__B);
 }
@@ -262,7 +257,7 @@ _mm512_maskz_and_pd(__mmask8 __U, __m512d __A, __m512d __B) {
                                               (__v8df)_mm512_setzero_pd());
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_and_ps(__m512 __A, __m512 __B) {
   return (__m512)((__v16su)__A & (__v16su)__B);
 }
@@ -281,7 +276,7 @@ _mm512_maskz_and_ps(__mmask16 __U, __m512 __A, __m512 __B) {
                                              (__v16sf)_mm512_setzero_ps());
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_andnot_pd(__m512d __A, __m512d __B) {
   return (__m512d)(~(__v8du)__A & (__v8du)__B);
 }
@@ -300,7 +295,7 @@ _mm512_maskz_andnot_pd(__mmask8 __U, __m512d __A, __m512d __B) {
                                               (__v8df)_mm512_setzero_pd());
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_andnot_ps(__m512 __A, __m512 __B) {
   return (__m512)(~(__v16su)__A & (__v16su)__B);
 }
@@ -475,21 +470,20 @@ _mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A) {
                                              (__v8di)_mm512_setzero_si512(), \
                                              (__mmask8)(U), (int)(R)))
 
-
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_cvtepi64_pd (__m512i __A) {
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_cvtepi64_pd(__m512i __A) {
   return (__m512d)__builtin_convertvector((__v8di)__A, __v8df);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_mask_cvtepi64_pd (__m512d __W, __mmask8 __U, __m512i __A) {
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_cvtepi64_pd(__m512d __W, __mmask8 __U, __m512i __A) {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
                                               (__v8df)_mm512_cvtepi64_pd(__A),
                                               (__v8df)__W);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A) {
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_cvtepi64_pd(__mmask8 __U, __m512i __A) {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
                                               (__v8df)_mm512_cvtepi64_pd(__A),
                                               (__v8df)_mm512_setzero_pd());
@@ -706,20 +700,20 @@ _mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A) {
                                               (__v8di)_mm512_setzero_si512(), \
                                               (__mmask8)(U), (int)(R)))
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_cvtepu64_pd (__m512i __A) {
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_cvtepu64_pd(__m512i __A) {
   return (__m512d)__builtin_convertvector((__v8du)__A, __v8df);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_mask_cvtepu64_pd (__m512d __W, __mmask8 __U, __m512i __A) {
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_cvtepu64_pd(__m512d __W, __mmask8 __U, __m512i __A) {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
                                               (__v8df)_mm512_cvtepu64_pd(__A),
                                               (__v8df)__W);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A) {
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_cvtepu64_pd(__mmask8 __U, __m512i __A) {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
                                               (__v8df)_mm512_cvtepu64_pd(__A),
                                               (__v8df)_mm512_setzero_pd());
@@ -1052,177 +1046,154 @@ _mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A) {
                                          (__v2df)_mm_setzero_pd(), \
                                          (__mmask8)(U), (int)(C), (int)(R)))
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
-_mm512_movepi32_mask (__m512i __A)
-{
+static __inline__ __mmask16
+    __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_movepi32_mask(__m512i __A) {
   return (__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_movm_epi32 (__mmask16 __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_movm_epi32(__mmask16 __A) {
   return (__m512i) __builtin_ia32_cvtmask2d512 (__A);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_movm_epi64 (__mmask8 __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_movm_epi64(__mmask8 __A) {
   return (__m512i) __builtin_ia32_cvtmask2q512 (__A);
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
-_mm512_movepi64_mask (__m512i __A)
-{
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_movepi64_mask(__m512i __A) {
   return (__mmask8) __builtin_ia32_cvtq2mask512 ((__v8di) __A);
 }
 
-
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_broadcast_f32x2 (__m128 __A)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_broadcast_f32x2(__m128 __A) {
   return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
                                          0, 1, 0, 1, 0, 1, 0, 1,
                                          0, 1, 0, 1, 0, 1, 0, 1);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_broadcast_f32x2(__m512 __O, __mmask16 __M, __m128 __A) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
                                              (__v16sf)_mm512_broadcast_f32x2(__A),
                                              (__v16sf)__O);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_broadcast_f32x2(__mmask16 __M, __m128 __A) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
                                              (__v16sf)_mm512_broadcast_f32x2(__A),
                                              (__v16sf)_mm512_setzero_ps());
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_broadcast_f32x8(__m256 __A)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_broadcast_f32x8(__m256 __A) {
   return (__m512)__builtin_shufflevector((__v8sf)__A, (__v8sf)__A,
                                          0, 1, 2, 3, 4, 5, 6, 7,
                                          0, 1, 2, 3, 4, 5, 6, 7);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, __m256 __A)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, __m256 __A) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
                                            (__v16sf)_mm512_broadcast_f32x8(__A),
                                            (__v16sf)__O);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_maskz_broadcast_f32x8(__mmask16 __M, __m256 __A)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_broadcast_f32x8(__mmask16 __M, __m256 __A) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
                                            (__v16sf)_mm512_broadcast_f32x8(__A),
                                            (__v16sf)_mm512_setzero_ps());
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_broadcast_f64x2(__m128d __A)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_broadcast_f64x2(__m128d __A) {
   return (__m512d)__builtin_shufflevector((__v2df)__A, (__v2df)__A,
                                           0, 1, 0, 1, 0, 1, 0, 1);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, __m128d __A)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, __m128d __A) {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
                                             (__v8df)_mm512_broadcast_f64x2(__A),
                                             (__v8df)__O);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A) {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
                                             (__v8df)_mm512_broadcast_f64x2(__A),
                                             (__v8df)_mm512_setzero_pd());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_broadcast_i32x2 (__m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_broadcast_i32x2(__m128i __A) {
   return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
                                           0, 1, 0, 1, 0, 1, 0, 1,
                                           0, 1, 0, 1, 0, 1, 0, 1);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_broadcast_i32x2(__m512i __O, __mmask16 __M, __m128i __A) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
                                              (__v16si)_mm512_broadcast_i32x2(__A),
                                              (__v16si)__O);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_broadcast_i32x2(__mmask16 __M, __m128i __A) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
                                              (__v16si)_mm512_broadcast_i32x2(__A),
                                              (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_broadcast_i32x8(__m256i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_broadcast_i32x8(__m256i __A) {
   return (__m512i)__builtin_shufflevector((__v8si)__A, (__v8si)__A,
                                           0, 1, 2, 3, 4, 5, 6, 7,
                                           0, 1, 2, 3, 4, 5, 6, 7);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i __A) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
                                            (__v16si)_mm512_broadcast_i32x8(__A),
                                            (__v16si)__O);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i __A) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
                                            (__v16si)_mm512_broadcast_i32x8(__A),
                                            (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_broadcast_i64x2(__m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_broadcast_i64x2(__m128i __A) {
   return (__m512i)__builtin_shufflevector((__v2di)__A, (__v2di)__A,
                                           0, 1, 0, 1, 0, 1, 0, 1);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i __A) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
                                             (__v8di)_mm512_broadcast_i64x2(__A),
                                             (__v8di)__O);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
                                             (__v8di)_mm512_broadcast_i64x2(__A),
                                             (__v8di)_mm512_setzero_si512());
 }
 
-#define _mm512_extractf32x8_ps(A, imm) \
-  ((__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
-                                            (__v8sf)_mm256_undefined_ps(), \
-                                            (__mmask8)-1))
+#define _mm512_extractf32x8_ps(A, imm)                                         \
+  ((__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm),  \
+                                            (__v8sf)_mm256_setzero_ps(),       \
+                                            (__mmask8) - 1))
 
 #define _mm512_mask_extractf32x8_ps(W, U, A, imm) \
   ((__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
@@ -1234,11 +1205,10 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
                                             (__v8sf)_mm256_setzero_ps(), \
                                             (__mmask8)(U)))
 
-#define _mm512_extractf64x2_pd(A, imm) \
-  ((__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
-                                                 (int)(imm), \
-                                                 (__v2df)_mm_undefined_pd(), \
-                                                 (__mmask8)-1))
+#define _mm512_extractf64x2_pd(A, imm)                                         \
+  ((__m128d)__builtin_ia32_extractf64x2_512_mask(                              \
+      (__v8df)(__m512d)(A), (int)(imm), (__v2df)_mm_setzero_pd(),              \
+      (__mmask8) - 1))
 
 #define _mm512_mask_extractf64x2_pd(W, U, A, imm) \
   ((__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
@@ -1252,10 +1222,10 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
                                                  (__v2df)_mm_setzero_pd(), \
                                                  (__mmask8)(U)))
 
-#define _mm512_extracti32x8_epi32(A, imm) \
-  ((__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
-                                             (__v8si)_mm256_undefined_si256(), \
-                                             (__mmask8)-1))
+#define _mm512_extracti32x8_epi32(A, imm)                                      \
+  ((__m256i)__builtin_ia32_extracti32x8_mask(                                  \
+      (__v16si)(__m512i)(A), (int)(imm), (__v8si)_mm256_setzero_si256(),       \
+      (__mmask8) - 1))
 
 #define _mm512_mask_extracti32x8_epi32(W, U, A, imm) \
   ((__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
@@ -1267,11 +1237,10 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
                                              (__v8si)_mm256_setzero_si256(), \
                                              (__mmask8)(U)))
 
-#define _mm512_extracti64x2_epi64(A, imm) \
-  ((__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
-                                                (int)(imm), \
-                                                (__v2di)_mm_undefined_si128(), \
-                                                (__mmask8)-1))
+#define _mm512_extracti64x2_epi64(A, imm)                                      \
+  ((__m128i)__builtin_ia32_extracti64x2_512_mask(                              \
+      (__v8di)(__m512i)(A), (int)(imm), (__v2di)_mm_setzero_si128(),           \
+      (__mmask8) - 1))
 
 #define _mm512_mask_extracti64x2_epi64(W, U, A, imm) \
   ((__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
@@ -1375,5 +1344,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
 
 #undef __DEFAULT_FN_ATTRS512
 #undef __DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS512_CONSTEXPR
+#undef __DEFAULT_FN_ATTRS_CONSTEXPR
 
 #endif
diff --git a/lib/include/avx512fintrin.h b/lib/include/avx512fintrin.h
index 45e7eeb532..942ed72686 100644
--- a/lib/include/avx512fintrin.h
+++ b/lib/include/avx512fintrin.h
@@ -167,22 +167,23 @@ typedef enum
 } _MM_MANTISSA_SIGN_ENUM;
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f,evex512"), __min_vector_width__(512)))
+#define __DEFAULT_FN_ATTRS512                                                  \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx512f"),        \
+                 __min_vector_width__(512)))
 #define __DEFAULT_FN_ATTRS128                                                  \
-  __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx512f,no-evex512"), __min_vector_width__(128)))
+  __attribute__((__always_inline__, __nodebug__, __target__("avx512f"),        \
+                 __min_vector_width__(128)))
 #define __DEFAULT_FN_ATTRS                                                     \
-  __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx512f,no-evex512")))
+  __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
 
 #if defined(__cplusplus) && (__cplusplus >= 201103L)
 #define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
 #define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 constexpr
 #define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
 #else
-#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS128
+#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
 #define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512
-#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
 #endif
 
 /* Create vectors with repeated elements */
@@ -206,9 +207,7 @@ _mm512_undefined(void)
   return (__m512)__builtin_ia32_undef512();
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_undefined_ps(void)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined_ps(void) {
   return (__m512)__builtin_ia32_undef512();
 }
 
@@ -218,48 +217,40 @@ _mm512_undefined_epi32(void)
   return (__m512i)__builtin_ia32_undef512();
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_broadcastd_epi32 (__m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_broadcastd_epi32(__m128i __A) {
   return (__m512i)__builtin_shufflevector((__v4si) __A, (__v4si) __A,
                                           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_broadcastd_epi32(__m512i __O, __mmask16 __M, __m128i __A) {
   return (__m512i)__builtin_ia32_selectd_512(__M,
                                              (__v16si) _mm512_broadcastd_epi32(__A),
                                              (__v16si) __O);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_broadcastd_epi32(__mmask16 __M, __m128i __A) {
   return (__m512i)__builtin_ia32_selectd_512(__M,
                                              (__v16si) _mm512_broadcastd_epi32(__A),
                                              (__v16si) _mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_broadcastq_epi64 (__m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_broadcastq_epi64(__m128i __A) {
   return (__m512i)__builtin_shufflevector((__v2di) __A, (__v2di) __A,
                                           0, 0, 0, 0, 0, 0, 0, 0);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
-{
-  return (__m512i)__builtin_ia32_selectq_512(__M,
-                                             (__v8di) _mm512_broadcastq_epi64(__A),
-                                             (__v8di) __O);
-
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_broadcastq_epi64(__m512i __O, __mmask8 __M, __m128i __A) {
+  return (__m512i)__builtin_ia32_selectq_512(
+      __M, (__v8di)_mm512_broadcastq_epi64(__A), (__v8di)__O);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A) {
   return (__m512i)__builtin_ia32_selectq_512(__M,
                                              (__v8di) _mm512_broadcastq_epi64(__A),
                                              (__v8di) _mm512_setzero_si512());
@@ -277,20 +268,20 @@ _mm512_setzero_pd(void) {
   return __extension__(__m512d){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
 }
 
-static __inline __m512 __DEFAULT_FN_ATTRS512
+static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_set1_ps(float __w)
 {
   return __extension__ (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
                                  __w, __w, __w, __w, __w, __w, __w, __w  };
 }
 
-static __inline __m512d __DEFAULT_FN_ATTRS512
+static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_set1_pd(double __w)
 {
   return __extension__ (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS512
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_set1_epi8(char __w)
 {
   return __extension__ (__m512i)(__v64qi){
@@ -304,7 +295,7 @@ _mm512_set1_epi8(char __w)
     __w, __w, __w, __w, __w, __w, __w, __w  };
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS512
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_set1_epi16(short __w)
 {
   return __extension__ (__m512i)(__v32hi){
@@ -314,7 +305,7 @@ _mm512_set1_epi16(short __w)
     __w, __w, __w, __w, __w, __w, __w, __w };
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS512
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_set1_epi32(int __s)
 {
   return __extension__ (__m512i)(__v16si){
@@ -322,81 +313,80 @@ _mm512_set1_epi32(int __s)
     __s, __s, __s, __s, __s, __s, __s, __s };
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_set1_epi32(__mmask16 __M, int __A)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_set1_epi32(__mmask16 __M, int __A) {
   return (__m512i)__builtin_ia32_selectd_512(__M,
                                              (__v16si)_mm512_set1_epi32(__A),
                                              (__v16si)_mm512_setzero_si512());
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS512
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_set1_epi64(long long __d)
 {
   return __extension__(__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_set1_epi64(__mmask8 __M, long long __A) {
   return (__m512i)__builtin_ia32_selectq_512(__M,
                                              (__v8di)_mm512_set1_epi64(__A),
                                              (__v8di)_mm512_setzero_si512());
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_broadcastss_ps(__m128 __A)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_broadcastss_ps(__m128 __A) {
   return (__m512)__builtin_shufflevector((__v4sf) __A, (__v4sf) __A,
                                          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_set4_epi32 (int __A, int __B, int __C, int __D)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_set4_epi32(int __A, int __B, int __C, int __D) {
   return __extension__ (__m512i)(__v16si)
    { __D, __C, __B, __A, __D, __C, __B, __A,
      __D, __C, __B, __A, __D, __C, __B, __A };
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_set4_epi64 (long long __A, long long __B, long long __C,
-       long long __D)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_set4_epi64(long long __A, long long __B, long long __C, long long __D) {
   return __extension__ (__m512i) (__v8di)
    { __D, __C, __B, __A, __D, __C, __B, __A };
 }
 
-static __inline __m512d __DEFAULT_FN_ATTRS512
-_mm512_set4_pd (double __A, double __B, double __C, double __D)
-{
+static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_set4_pd(double __A, double __B, double __C, double __D) {
   return __extension__ (__m512d)
    { __D, __C, __B, __A, __D, __C, __B, __A };
 }
 
-static __inline __m512 __DEFAULT_FN_ATTRS512
-_mm512_set4_ps (float __A, float __B, float __C, float __D)
-{
+static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_set4_ps(float __A, float __B, float __C, float __D) {
   return __extension__ (__m512)
    { __D, __C, __B, __A, __D, __C, __B, __A,
      __D, __C, __B, __A, __D, __C, __B, __A };
 }
 
-#define _mm512_setr4_epi32(e0,e1,e2,e3)               \
-  _mm512_set4_epi32((e3),(e2),(e1),(e0))
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_setr4_epi32(int e0, int e1, int e2, int e3) {
+  return _mm512_set4_epi32(e3, e2, e1, e0);
+}
 
-#define _mm512_setr4_epi64(e0,e1,e2,e3)               \
-  _mm512_set4_epi64((e3),(e2),(e1),(e0))
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_setr4_epi64(long long e0, long long e1, long long e2, long long e3) {
+  return _mm512_set4_epi64(e3, e2, e1, e0);
+}
 
-#define _mm512_setr4_pd(e0,e1,e2,e3)                \
-  _mm512_set4_pd((e3),(e2),(e1),(e0))
+static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_setr4_pd(double e0, double e1, double e2, double e3) {
+  return _mm512_set4_pd(e3, e2, e1, e0);
+}
 
-#define _mm512_setr4_ps(e0,e1,e2,e3)                \
-  _mm512_set4_ps((e3),(e2),(e1),(e0))
+static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_setr4_ps(float e0, float e1, float e2, float e3) {
+  return _mm512_set4_ps(e3, e2, e1, e0);
+}
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_broadcastsd_pd(__m128d __A)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_broadcastsd_pd(__m128d __A) {
   return (__m512d)__builtin_shufflevector((__v2df) __A, (__v2df) __A,
                                           0, 0, 0, 0, 0, 0, 0, 0);
 }
@@ -417,37 +407,36 @@ _mm512_castps256_ps512(__m256 __a)
                                  1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
 }
 
-static __inline __m128d __DEFAULT_FN_ATTRS512
+static __inline __m128d __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_castpd512_pd128(__m512d __a)
 {
   return __builtin_shufflevector(__a, __a, 0, 1);
 }
 
-static __inline __m256d __DEFAULT_FN_ATTRS512
+static __inline __m256d __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_castpd512_pd256 (__m512d __A)
 {
   return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
 }
 
-static __inline __m128 __DEFAULT_FN_ATTRS512
+static __inline __m128 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_castps512_ps128(__m512 __a)
 {
   return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
 }
 
-static __inline __m256 __DEFAULT_FN_ATTRS512
-_mm512_castps512_ps256 (__m512 __A)
-{
+static __inline __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_castps512_ps256(__m512 __A) {
   return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
 }
 
-static __inline __m512 __DEFAULT_FN_ATTRS512
+static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_castpd_ps (__m512d __A)
 {
   return (__m512) (__A);
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS512
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_castpd_si512 (__m512d __A)
 {
   return (__m512i) (__A);
@@ -462,13 +451,13 @@ _mm512_castpd128_pd512 (__m128d __A)
       __B, 0, 1, 2, 3, 4, 5, 6, 7);
 }
 
-static __inline __m512d __DEFAULT_FN_ATTRS512
+static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_castps_pd (__m512 __A)
 {
   return (__m512d) (__A);
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS512
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_castps_si512 (__m512 __A)
 {
   return (__m512i) (__A);
@@ -498,39 +487,36 @@ _mm512_castsi256_si512 (__m256i __A)
    return  __builtin_shufflevector( __A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3, 4, 5, 6, 7);
 }
 
-static __inline __m512 __DEFAULT_FN_ATTRS512
+static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_castsi512_ps (__m512i __A)
 {
   return (__m512) (__A);
 }
 
-static __inline __m512d __DEFAULT_FN_ATTRS512
+static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_castsi512_pd (__m512i __A)
 {
   return (__m512d) (__A);
 }
 
-static __inline __m128i __DEFAULT_FN_ATTRS512
+static __inline __m128i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_castsi512_si128 (__m512i __A)
 {
   return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
 }
 
-static __inline __m256i __DEFAULT_FN_ATTRS512
-_mm512_castsi512_si256 (__m512i __A)
-{
+static __inline __m256i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_castsi512_si256(__m512i __A) {
   return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
 }
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm512_int2mask(int __a)
-{
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_int2mask(int __a) {
   return (__mmask16)__a;
 }
 
-static __inline__ int __DEFAULT_FN_ATTRS
-_mm512_mask2int(__mmask16 __a)
-{
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_mask2int(__mmask16 __a) {
   return (int)__a;
 }
 
@@ -547,9 +533,8 @@ _mm512_mask2int(__mmask16 __a)
 ///    A 128-bit vector of [2 x double].
 /// \returns A 512-bit floating-point vector of [8 x double]. The lower 128 bits
 ///    contain the value of the parameter. The upper 384 bits are set to zero.
-static __inline __m512d __DEFAULT_FN_ATTRS512
-_mm512_zextpd128_pd512(__m128d __a)
-{
+static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_zextpd128_pd512(__m128d __a) {
   return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3);
 }
 
@@ -566,9 +551,8 @@ _mm512_zextpd128_pd512(__m128d __a)
 ///    A 256-bit vector of [4 x double].
 /// \returns A 512-bit floating-point vector of [8 x double]. The lower 256 bits
 ///    contain the value of the parameter. The upper 256 bits are set to zero.
-static __inline __m512d __DEFAULT_FN_ATTRS512
-_mm512_zextpd256_pd512(__m256d __a)
-{
+static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_zextpd256_pd512(__m256d __a) {
   return __builtin_shufflevector((__v4df)__a, (__v4df)_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7);
 }
 
@@ -584,9 +568,8 @@ _mm512_zextpd256_pd512(__m256d __a)
 ///    A 128-bit vector of [4 x float].
 /// \returns A 512-bit floating-point vector of [16 x float]. The lower 128 bits
 ///    contain the value of the parameter. The upper 384 bits are set to zero.
-static __inline __m512 __DEFAULT_FN_ATTRS512
-_mm512_zextps128_ps512(__m128 __a)
-{
+static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_zextps128_ps512(__m128 __a) {
   return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7);
 }
 
@@ -602,9 +585,8 @@ _mm512_zextps128_ps512(__m128 __a)
 ///    A 256-bit vector of [8 x float].
 /// \returns A 512-bit floating-point vector of [16 x float]. The lower 256 bits
 ///    contain the value of the parameter. The upper 256 bits are set to zero.
-static __inline __m512 __DEFAULT_FN_ATTRS512
-_mm512_zextps256_ps512(__m256 __a)
-{
+static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_zextps256_ps512(__m256 __a) {
   return __builtin_shufflevector((__v8sf)__a, (__v8sf)_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
 }
 
@@ -620,9 +602,8 @@ _mm512_zextps256_ps512(__m256 __a)
 ///    A 128-bit integer vector.
 /// \returns A 512-bit integer vector. The lower 128 bits contain the value of
 ///    the parameter. The upper 384 bits are set to zero.
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_zextsi128_si512(__m128i __a)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_zextsi128_si512(__m128i __a) {
   return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3);
 }
 
@@ -638,22 +619,20 @@ _mm512_zextsi128_si512(__m128i __a)
 ///    A 256-bit integer vector.
 /// \returns A 512-bit integer vector. The lower 256 bits contain the value of
 ///    the parameter. The upper 256 bits are set to zero.
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_zextsi256_si512(__m256i __a)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_zextsi256_si512(__m256i __a) {
   return __builtin_shufflevector((__v4di)__a, (__v4di)_mm256_setzero_si256(), 0, 1, 2, 3, 4, 5, 6, 7);
 }
 
 /* Bitwise operators */
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_and_epi32(__m512i __a, __m512i __b)
 {
   return (__m512i)((__v16su)__a & (__v16su)__b);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
                 (__v16si) _mm512_and_epi32(__a, __b),
                 (__v16si) __src);
@@ -666,18 +645,16 @@ _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
                                          __k, __a, __b);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_and_epi64(__m512i __a, __m512i __b)
 {
   return (__m512i)((__v8du)__a & (__v8du)__b);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
-{
-    return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __k,
-                (__v8di) _mm512_and_epi64(__a, __b),
-                (__v8di) __src);
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) {
+  return (__m512i)__builtin_ia32_selectq_512(
+      (__mmask8)__k, (__v8di)_mm512_and_epi64(__a, __b), (__v8di)__src);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -687,13 +664,13 @@ _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
                                          __k, __a, __b);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_andnot_si512 (__m512i __A, __m512i __B)
 {
   return (__m512i)(~(__v8du)__A & (__v8du)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_andnot_epi32 (__m512i __A, __m512i __B)
 {
   return (__m512i)(~(__v16su)__A & (__v16su)__B);
@@ -714,7 +691,7 @@ _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
                                            __U, __A, __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_andnot_epi64(__m512i __A, __m512i __B)
 {
   return (__m512i)(~(__v8du)__A & (__v8du)__B);
@@ -735,7 +712,7 @@ _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
                                            __U, __A, __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_or_epi32(__m512i __a, __m512i __b)
 {
   return (__m512i)((__v16su)__a | (__v16su)__b);
@@ -755,7 +732,7 @@ _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
   return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_or_epi64(__m512i __a, __m512i __b)
 {
   return (__m512i)((__v8du)__a | (__v8du)__b);
@@ -775,7 +752,7 @@ _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
   return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_xor_epi32(__m512i __a, __m512i __b)
 {
   return (__m512i)((__v16su)__a ^ (__v16su)__b);
@@ -795,7 +772,7 @@ _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
   return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_xor_epi64(__m512i __a, __m512i __b)
 {
   return (__m512i)((__v8du)__a ^ (__v8du)__b);
@@ -815,19 +792,19 @@ _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
   return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_and_si512(__m512i __a, __m512i __b)
 {
   return (__m512i)((__v8du)__a & (__v8du)__b);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_or_si512(__m512i __a, __m512i __b)
 {
   return (__m512i)((__v8du)__a | (__v8du)__b);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_xor_si512(__m512i __a, __m512i __b)
 {
   return (__m512i)((__v8du)__a ^ (__v8du)__b);
@@ -835,125 +812,107 @@ _mm512_xor_si512(__m512i __a, __m512i __b)
 
 /* Arithmetic */
 
-static __inline __m512d __DEFAULT_FN_ATTRS512
-_mm512_add_pd(__m512d __a, __m512d __b)
-{
+static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_add_pd(__m512d __a, __m512d __b) {
   return (__m512d)((__v8df)__a + (__v8df)__b);
 }
 
-static __inline __m512 __DEFAULT_FN_ATTRS512
-_mm512_add_ps(__m512 __a, __m512 __b)
-{
+static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_add_ps(__m512 __a, __m512 __b) {
   return (__m512)((__v16sf)__a + (__v16sf)__b);
 }
 
-static __inline __m512d __DEFAULT_FN_ATTRS512
-_mm512_mul_pd(__m512d __a, __m512d __b)
-{
+static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mul_pd(__m512d __a, __m512d __b) {
   return (__m512d)((__v8df)__a * (__v8df)__b);
 }
 
-static __inline __m512 __DEFAULT_FN_ATTRS512
-_mm512_mul_ps(__m512 __a, __m512 __b)
-{
+static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mul_ps(__m512 __a, __m512 __b) {
   return (__m512)((__v16sf)__a * (__v16sf)__b);
 }
 
-static __inline __m512d __DEFAULT_FN_ATTRS512
-_mm512_sub_pd(__m512d __a, __m512d __b)
-{
+static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_sub_pd(__m512d __a, __m512d __b) {
   return (__m512d)((__v8df)__a - (__v8df)__b);
 }
 
-static __inline __m512 __DEFAULT_FN_ATTRS512
-_mm512_sub_ps(__m512 __a, __m512 __b)
-{
+static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_sub_ps(__m512 __a, __m512 __b) {
   return (__m512)((__v16sf)__a - (__v16sf)__b);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_add_epi64 (__m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_add_epi64(__m512i __A, __m512i __B) {
   return (__m512i) ((__v8du) __A + (__v8du) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                              (__v8di)_mm512_add_epi64(__A, __B),
                                              (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                              (__v8di)_mm512_add_epi64(__A, __B),
                                              (__v8di)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_sub_epi64 (__m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_sub_epi64(__m512i __A, __m512i __B) {
   return (__m512i) ((__v8du) __A - (__v8du) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                              (__v8di)_mm512_sub_epi64(__A, __B),
                                              (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                              (__v8di)_mm512_sub_epi64(__A, __B),
                                              (__v8di)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_add_epi32 (__m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_add_epi32(__m512i __A, __m512i __B) {
   return (__m512i) ((__v16su) __A + (__v16su) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
                                              (__v16si)_mm512_add_epi32(__A, __B),
                                              (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_add_epi32(__mmask16 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
                                              (__v16si)_mm512_add_epi32(__A, __B),
                                              (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_sub_epi32 (__m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_sub_epi32(__m512i __A, __m512i __B) {
   return (__m512i) ((__v16su) __A - (__v16su) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
                                              (__v16si)_mm512_sub_epi32(__A, __B),
                                              (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
                                              (__v16si)_mm512_sub_epi32(__A, __B),
                                              (__v16si)_mm512_setzero_si512());
@@ -973,24 +932,21 @@ _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
                                    (__v8df)_mm512_max_round_pd((A), (B), (R)), \
                                    (__v8df)_mm512_setzero_pd()))
 
-static  __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_max_pd(__m512d __A, __m512d __B)
-{
+static __inline__ __m512d
+    __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_pd(__m512d __A, __m512d __B) {
   return (__m512d) __builtin_ia32_maxpd512((__v8df) __A, (__v8df) __B,
                                            _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_max_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   return (__m512d)__builtin_ia32_selectpd_512(__U,
                                               (__v8df)_mm512_max_pd(__A, __B),
                                               (__v8df)__W);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_max_pd(__mmask8 __U, __m512d __A, __m512d __B) {
   return (__m512d)__builtin_ia32_selectpd_512(__U,
                                               (__v8df)_mm512_max_pd(__A, __B),
                                               (__v8df)_mm512_setzero_pd());
@@ -1010,31 +966,30 @@ _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
                                   (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
                                   (__v16sf)_mm512_setzero_ps()))
 
-static  __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_max_ps(__m512 __A, __m512 __B)
-{
+static __inline__ __m512
+    __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_ps(__m512 __A, __m512 __B) {
   return (__m512) __builtin_ia32_maxps512((__v16sf) __A, (__v16sf) __B,
                                           _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_max_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   return (__m512)__builtin_ia32_selectps_512(__U,
                                              (__v16sf)_mm512_max_ps(__A, __B),
                                              (__v16sf)__W);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_max_ps(__mmask16 __U, __m512 __A, __m512 __B) {
   return (__m512)__builtin_ia32_selectps_512(__U,
                                              (__v16sf)_mm512_max_ps(__A, __B),
                                              (__v16sf)_mm512_setzero_ps());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_max_ss(__m128 __W,
+                                                               __mmask8 __U,
+                                                               __m128 __A,
+                                                               __m128 __B) {
   return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
                 (__v4sf) __B,
                 (__v4sf) __W,
@@ -1042,8 +997,9 @@ _mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
                 _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_max_ss(__mmask8 __U,
+                                                                __m128 __A,
+                                                                __m128 __B) {
   return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
                 (__v4sf) __B,
                 (__v4sf)  _mm_setzero_ps (),
@@ -1069,8 +1025,10 @@ _mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
                                            (__v4sf)_mm_setzero_ps(), \
                                            (__mmask8)(U), (int)(R)))
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_max_sd(__m128d __W,
+                                                                __mmask8 __U,
+                                                                __m128d __A,
+                                                                __m128d __B) {
   return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
                 (__v2df) __B,
                 (__v2df) __W,
@@ -1078,8 +1036,9 @@ _mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
                 _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_max_sd(__mmask8 __U,
+                                                                 __m128d __A,
+                                                                 __m128d __B) {
   return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
                 (__v2df) __B,
                 (__v2df)  _mm_setzero_pd (),
@@ -1106,89 +1065,76 @@ _mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
                                             (__mmask8)(U), (int)(R)))
 
 static __inline __m512i
-__DEFAULT_FN_ATTRS512
-_mm512_max_epi32(__m512i __A, __m512i __B)
-{
+    __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_epi32(__m512i __A, __m512i __B) {
   return (__m512i)__builtin_elementwise_max((__v16si)__A, (__v16si)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_max_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
                                             (__v16si)_mm512_max_epi32(__A, __B),
                                             (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_max_epi32(__mmask16 __M, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
                                             (__v16si)_mm512_max_epi32(__A, __B),
                                             (__v16si)_mm512_setzero_si512());
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_max_epu32(__m512i __A, __m512i __B)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_max_epu32(__m512i __A, __m512i __B) {
   return (__m512i)__builtin_elementwise_max((__v16su)__A, (__v16su)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_max_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
                                             (__v16si)_mm512_max_epu32(__A, __B),
                                             (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_max_epu32(__mmask16 __M, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
                                             (__v16si)_mm512_max_epu32(__A, __B),
                                             (__v16si)_mm512_setzero_si512());
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_max_epi64(__m512i __A, __m512i __B)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_max_epi64(__m512i __A, __m512i __B) {
   return (__m512i)__builtin_elementwise_max((__v8di)__A, (__v8di)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_max_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
                                              (__v8di)_mm512_max_epi64(__A, __B),
                                              (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_max_epi64(__mmask8 __M, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
                                              (__v8di)_mm512_max_epi64(__A, __B),
                                              (__v8di)_mm512_setzero_si512());
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_max_epu64(__m512i __A, __m512i __B)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_max_epu64(__m512i __A, __m512i __B) {
   return (__m512i)__builtin_elementwise_max((__v8du)__A, (__v8du)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_max_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
                                              (__v8di)_mm512_max_epu64(__A, __B),
                                              (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_max_epu64(__mmask8 __M, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
                                              (__v8di)_mm512_max_epu64(__A, __B),
                                              (__v8di)_mm512_setzero_si512());
@@ -1208,24 +1154,21 @@ _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
                                    (__v8df)_mm512_min_round_pd((A), (B), (R)), \
                                    (__v8df)_mm512_setzero_pd()))
 
-static  __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_min_pd(__m512d __A, __m512d __B)
-{
+static __inline__ __m512d
+    __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_pd(__m512d __A, __m512d __B) {
   return (__m512d) __builtin_ia32_minpd512((__v8df) __A, (__v8df) __B,
                                            _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_min_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   return (__m512d)__builtin_ia32_selectpd_512(__U,
                                               (__v8df)_mm512_min_pd(__A, __B),
                                               (__v8df)__W);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_min_pd(__mmask8 __U, __m512d __A, __m512d __B) {
   return (__m512d)__builtin_ia32_selectpd_512(__U,
                                               (__v8df)_mm512_min_pd(__A, __B),
                                               (__v8df)_mm512_setzero_pd());
@@ -1245,31 +1188,30 @@ _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
                                   (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
                                   (__v16sf)_mm512_setzero_ps()))
 
-static  __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_min_ps(__m512 __A, __m512 __B)
-{
+static __inline__ __m512
+    __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_ps(__m512 __A, __m512 __B) {
   return (__m512) __builtin_ia32_minps512((__v16sf) __A, (__v16sf) __B,
                                           _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_min_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   return (__m512)__builtin_ia32_selectps_512(__U,
                                              (__v16sf)_mm512_min_ps(__A, __B),
                                              (__v16sf)__W);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_min_ps(__mmask16 __U, __m512 __A, __m512 __B) {
   return (__m512)__builtin_ia32_selectps_512(__U,
                                              (__v16sf)_mm512_min_ps(__A, __B),
                                              (__v16sf)_mm512_setzero_ps());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_min_ss(__m128 __W,
+                                                               __mmask8 __U,
+                                                               __m128 __A,
+                                                               __m128 __B) {
   return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
                 (__v4sf) __B,
                 (__v4sf) __W,
@@ -1277,8 +1219,9 @@ _mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
                 _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_min_ss(__mmask8 __U,
+                                                                __m128 __A,
+                                                                __m128 __B) {
   return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
                 (__v4sf) __B,
                 (__v4sf)  _mm_setzero_ps (),
@@ -1304,8 +1247,10 @@ _mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
                                            (__v4sf)_mm_setzero_ps(), \
                                            (__mmask8)(U), (int)(R)))
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_min_sd(__m128d __W,
+                                                                __mmask8 __U,
+                                                                __m128d __A,
+                                                                __m128d __B) {
   return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
                 (__v2df) __B,
                 (__v2df) __W,
@@ -1313,8 +1258,9 @@ _mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
                 _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_min_sd(__mmask8 __U,
+                                                                 __m128d __A,
+                                                                 __m128d __B) {
   return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
                 (__v2df) __B,
                 (__v2df)  _mm_setzero_pd (),
@@ -1341,166 +1287,144 @@ _mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
                                             (__mmask8)(U), (int)(R)))
 
 static __inline __m512i
-__DEFAULT_FN_ATTRS512
-_mm512_min_epi32(__m512i __A, __m512i __B)
-{
+    __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_epi32(__m512i __A, __m512i __B) {
   return (__m512i)__builtin_elementwise_min((__v16si)__A, (__v16si)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_min_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
                                             (__v16si)_mm512_min_epi32(__A, __B),
                                             (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_min_epi32(__mmask16 __M, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
                                             (__v16si)_mm512_min_epi32(__A, __B),
                                             (__v16si)_mm512_setzero_si512());
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_min_epu32(__m512i __A, __m512i __B)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_min_epu32(__m512i __A, __m512i __B) {
   return (__m512i)__builtin_elementwise_min((__v16su)__A, (__v16su)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_min_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
                                             (__v16si)_mm512_min_epu32(__A, __B),
                                             (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_min_epu32(__mmask16 __M, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
                                             (__v16si)_mm512_min_epu32(__A, __B),
                                             (__v16si)_mm512_setzero_si512());
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_min_epi64(__m512i __A, __m512i __B)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_min_epi64(__m512i __A, __m512i __B) {
   return (__m512i)__builtin_elementwise_min((__v8di)__A, (__v8di)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_min_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
                                              (__v8di)_mm512_min_epi64(__A, __B),
                                              (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_min_epi64(__mmask8 __M, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
                                              (__v8di)_mm512_min_epi64(__A, __B),
                                              (__v8di)_mm512_setzero_si512());
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_min_epu64(__m512i __A, __m512i __B)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_min_epu64(__m512i __A, __m512i __B) {
   return (__m512i)__builtin_elementwise_min((__v8du)__A, (__v8du)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_min_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
                                              (__v8di)_mm512_min_epu64(__A, __B),
                                              (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_min_epu64(__mmask8 __M, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
                                              (__v8di)_mm512_min_epu64(__A, __B),
                                              (__v8di)_mm512_setzero_si512());
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_mul_epi32(__m512i __X, __m512i __Y)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mul_epi32(__m512i __X, __m512i __Y) {
   return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
                                              (__v8di)_mm512_mul_epi32(__X, __Y),
                                              (__v8di)__W);
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
                                              (__v8di)_mm512_mul_epi32(__X, __Y),
                                              (__v8di)_mm512_setzero_si512 ());
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_mul_epu32(__m512i __X, __m512i __Y)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mul_epu32(__m512i __X, __m512i __Y) {
   return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
                                              (__v8di)_mm512_mul_epu32(__X, __Y),
                                              (__v8di)__W);
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
                                              (__v8di)_mm512_mul_epu32(__X, __Y),
                                              (__v8di)_mm512_setzero_si512 ());
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_mullo_epi32 (__m512i __A, __m512i __B)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mullo_epi32(__m512i __A, __m512i __B) {
   return (__m512i) ((__v16su) __A * (__v16su) __B);
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
                                              (__v16si)_mm512_mullo_epi32(__A, __B),
                                              (__v16si)_mm512_setzero_si512());
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
                                              (__v16si)_mm512_mullo_epi32(__A, __B),
                                              (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mullox_epi64 (__m512i __A, __m512i __B) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mullox_epi64(__m512i __A, __m512i __B) {
   return (__m512i) ((__v8du) __A * (__v8du) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                              (__v8di)_mm512_mullox_epi64(__A, __B),
@@ -1520,26 +1444,19 @@ _mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
                                        (__v8df)_mm512_sqrt_round_pd((A), (R)), \
                                        (__v8df)_mm512_setzero_pd()))
 
-static  __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_sqrt_pd(__m512d __A)
-{
-  return (__m512d)__builtin_ia32_sqrtpd512((__v8df)__A,
-                                           _MM_FROUND_CUR_DIRECTION);
+static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_sqrt_pd(__m512d __A) {
+  return (__m512d)__builtin_elementwise_sqrt((__v8df)__A);
 }
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
-{
-  return (__m512d)__builtin_ia32_selectpd_512(__U,
-                                              (__v8df)_mm512_sqrt_pd(__A),
+_mm512_mask_sqrt_pd(__m512d __W, __mmask8 __U, __m512d __A) {
+  return (__m512d)__builtin_ia32_selectpd_512(__U, (__v8df)_mm512_sqrt_pd(__A),
                                               (__v8df)__W);
 }
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
-{
-  return (__m512d)__builtin_ia32_selectpd_512(__U,
-                                              (__v8df)_mm512_sqrt_pd(__A),
+_mm512_maskz_sqrt_pd(__mmask8 __U, __m512d __A) {
+  return (__m512d)__builtin_ia32_selectpd_512(__U, (__v8df)_mm512_sqrt_pd(__A),
                                               (__v8df)_mm512_setzero_pd());
 }
 
@@ -1556,26 +1473,19 @@ _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
                                       (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
                                       (__v16sf)_mm512_setzero_ps()))
 
-static  __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_sqrt_ps(__m512 __A)
-{
-  return (__m512)__builtin_ia32_sqrtps512((__v16sf)__A,
-                                          _MM_FROUND_CUR_DIRECTION);
+static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_sqrt_ps(__m512 __A) {
+  return (__m512)__builtin_elementwise_sqrt((__v16sf)__A);
 }
 
-static  __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
-{
-  return (__m512)__builtin_ia32_selectps_512(__U,
-                                             (__v16sf)_mm512_sqrt_ps(__A),
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
+_mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A) {
+  return (__m512)__builtin_ia32_selectps_512(__U, (__v16sf)_mm512_sqrt_ps(__A),
                                              (__v16sf)__W);
 }
 
-static  __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A)
-{
-  return (__m512)__builtin_ia32_selectps_512(__U,
-                                             (__v16sf)_mm512_sqrt_ps(__A),
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
+_mm512_maskz_sqrt_ps(__mmask16 __U, __m512 __A) {
+  return (__m512)__builtin_ia32_selectps_512(__U, (__v16sf)_mm512_sqrt_ps(__A),
                                              (__v16sf)_mm512_setzero_ps());
 }
 
@@ -1866,58 +1776,52 @@ _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
                 _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_abs_epi64(__m512i __A)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_abs_epi64(__m512i __A) {
   return (__m512i)__builtin_elementwise_abs((__v8di)__A);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_abs_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                              (__v8di)_mm512_abs_epi64(__A),
                                              (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_abs_epi64(__mmask8 __U, __m512i __A) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                              (__v8di)_mm512_abs_epi64(__A),
                                              (__v8di)_mm512_setzero_si512());
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_abs_epi32(__m512i __A)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_abs_epi32(__m512i __A) {
   return (__m512i)__builtin_elementwise_abs((__v16si) __A);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_abs_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
   return (__m512i)__builtin_ia32_selectd_512(__U,
                                              (__v16si)_mm512_abs_epi32(__A),
                                              (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_abs_epi32(__mmask16 __U, __m512i __A) {
   return (__m512i)__builtin_ia32_selectd_512(__U,
                                              (__v16si)_mm512_abs_epi32(__A),
                                              (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_add_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   __A = _mm_add_ss(__A, __B);
   return __builtin_ia32_selectss_128(__U, __A, __W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B) {
   __A = _mm_add_ss(__A, __B);
   return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
 }
@@ -1940,14 +1844,14 @@ _mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
                                            (__v4sf)_mm_setzero_ps(), \
                                            (__mmask8)(U), (int)(R)))
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_add_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   __A = _mm_add_sd(__A, __B);
   return __builtin_ia32_selectsd_128(__U, __A, __W);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B) {
   __A = _mm_add_sd(__A, __B);
   return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
 }
@@ -1969,28 +1873,28 @@ _mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
                                             (__v2df)_mm_setzero_pd(), \
                                             (__mmask8)(U), (int)(R)))
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
                                               (__v8df)_mm512_add_pd(__A, __B),
                                               (__v8df)__W);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
                                               (__v8df)_mm512_add_pd(__A, __B),
                                               (__v8df)_mm512_setzero_pd());
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
                                              (__v16sf)_mm512_add_ps(__A, __B),
                                              (__v16sf)__W);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
                                              (__v16sf)_mm512_add_ps(__A, __B),
@@ -2025,14 +1929,14 @@ _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
                                   (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
                                   (__v16sf)_mm512_setzero_ps()))
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_sub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   __A = _mm_sub_ss(__A, __B);
   return __builtin_ia32_selectss_128(__U, __A, __W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B) {
   __A = _mm_sub_ss(__A, __B);
   return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
 }
@@ -2054,14 +1958,14 @@ _mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
                                            (__v4sf)_mm_setzero_ps(), \
                                            (__mmask8)(U), (int)(R)))
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_sub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   __A = _mm_sub_sd(__A, __B);
   return __builtin_ia32_selectsd_128(__U, __A, __W);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B) {
   __A = _mm_sub_sd(__A, __B);
   return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
 }
@@ -2084,28 +1988,28 @@ _mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
                                             (__v2df)_mm_setzero_pd(), \
                                             (__mmask8)(U), (int)(R)))
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
                                               (__v8df)_mm512_sub_pd(__A, __B),
                                               (__v8df)__W);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
                                               (__v8df)_mm512_sub_pd(__A, __B),
                                               (__v8df)_mm512_setzero_pd());
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
                                              (__v16sf)_mm512_sub_ps(__A, __B),
                                              (__v16sf)__W);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
                                              (__v16sf)_mm512_sub_ps(__A, __B),
@@ -2140,14 +2044,14 @@ _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
                                   (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
                                   (__v16sf)_mm512_setzero_ps()))
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_mul_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   __A = _mm_mul_ss(__A, __B);
   return __builtin_ia32_selectss_128(__U, __A, __W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B) {
   __A = _mm_mul_ss(__A, __B);
   return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
 }
@@ -2169,14 +2073,14 @@ _mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
                                            (__v4sf)_mm_setzero_ps(), \
                                            (__mmask8)(U), (int)(R)))
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_mul_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   __A = _mm_mul_sd(__A, __B);
   return __builtin_ia32_selectsd_128(__U, __A, __W);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B) {
   __A = _mm_mul_sd(__A, __B);
   return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
 }
@@ -2199,28 +2103,28 @@ _mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
                                             (__v2df)_mm_setzero_pd(), \
                                             (__mmask8)(U), (int)(R)))
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
                                               (__v8df)_mm512_mul_pd(__A, __B),
                                               (__v8df)__W);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
                                               (__v8df)_mm512_mul_pd(__A, __B),
                                               (__v8df)_mm512_setzero_pd());
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
                                              (__v16sf)_mm512_mul_ps(__A, __B),
                                              (__v16sf)__W);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
                                              (__v16sf)_mm512_mul_ps(__A, __B),
@@ -2255,14 +2159,14 @@ _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
                                   (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
                                   (__v16sf)_mm512_setzero_ps()))
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_div_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   __A = _mm_div_ss(__A, __B);
   return __builtin_ia32_selectss_128(__U, __A, __W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_div_ss(__mmask8 __U, __m128 __A, __m128 __B) {
   __A = _mm_div_ss(__A, __B);
   return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
 }
@@ -2285,14 +2189,14 @@ _mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
                                            (__v4sf)_mm_setzero_ps(), \
                                            (__mmask8)(U), (int)(R)))
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_div_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   __A = _mm_div_sd(__A, __B);
   return __builtin_ia32_selectsd_128(__U, __A, __W);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_div_sd(__mmask8 __U, __m128d __A, __m128d __B) {
   __A = _mm_div_sd(__A, __B);
   return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
 }
@@ -2315,40 +2219,38 @@ _mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
                                             (__v2df)_mm_setzero_pd(), \
                                             (__mmask8)(U), (int)(R)))
 
-static __inline __m512d __DEFAULT_FN_ATTRS512
-_mm512_div_pd(__m512d __a, __m512d __b)
-{
+static __inline __m512d
+    __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_div_pd(__m512d __a, __m512d __b) {
   return (__m512d)((__v8df)__a/(__v8df)__b);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
                                               (__v8df)_mm512_div_pd(__A, __B),
                                               (__v8df)__W);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
                                               (__v8df)_mm512_div_pd(__A, __B),
                                               (__v8df)_mm512_setzero_pd());
 }
 
-static __inline __m512 __DEFAULT_FN_ATTRS512
-_mm512_div_ps(__m512 __a, __m512 __b)
-{
+static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_div_ps(__m512 __a, __m512 __b) {
   return (__m512)((__v16sf)__a/(__v16sf)__b);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
                                              (__v16sf)_mm512_div_ps(__A, __B),
                                              (__v16sf)__W);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
                                              (__v16sf)_mm512_div_ps(__A, __B),
@@ -2530,125 +2432,104 @@ _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
                                              -(__v8df)(__m512d)(C), \
                                              (__mmask8)(U), (int)(R)))
 
-
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
-{
-  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
-                                                    (__v8df) __B,
-                                                    (__v8df) __C,
-                                                    (__mmask8) -1,
-                                                    _MM_FROUND_CUR_DIRECTION);
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C) {
+  return (__m512d)__builtin_elementwise_fma((__v8df)__A, (__v8df)__B,
+                                            (__v8df)__C);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
-{
-  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
-                                                    (__v8df) __B,
-                                                    (__v8df) __C,
-                                                    (__mmask8) __U,
-                                                    _MM_FROUND_CUR_DIRECTION);
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
+  return (__m512d)__builtin_ia32_selectpd_512(
+      (__mmask8)__U, (__v8df)_mm512_fmadd_pd(__A, __B, __C), (__v8df)__A);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
-{
-  return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
-                                                     (__v8df) __B,
-                                                     (__v8df) __C,
-                                                     (__mmask8) __U,
-                                                     _MM_FROUND_CUR_DIRECTION);
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
+  return (__m512d)__builtin_ia32_selectpd_512(
+      (__mmask8)__U, (__v8df)_mm512_fmadd_pd(__A, __B, __C), (__v8df)__C);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
-{
-  return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
-                                                     (__v8df) __B,
-                                                     (__v8df) __C,
-                                                     (__mmask8) __U,
-                                                     _MM_FROUND_CUR_DIRECTION);
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
+  return (__m512d)__builtin_ia32_selectpd_512(
+      (__mmask8)__U, (__v8df)_mm512_fmadd_pd(__A, __B, __C),
+      (__v8df)_mm512_setzero_pd());
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
-{
-  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
-                                                    (__v8df) __B,
-                                                    -(__v8df) __C,
-                                                    (__mmask8) -1,
-                                                    _MM_FROUND_CUR_DIRECTION);
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C) {
+  return (__m512d)__builtin_elementwise_fma((__v8df)__A, (__v8df)__B,
+                                            -(__v8df)__C);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
-{
-  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
-                                                    (__v8df) __B,
-                                                    -(__v8df) __C,
-                                                    (__mmask8) __U,
-                                                    _MM_FROUND_CUR_DIRECTION);
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
+  return (__m512d)__builtin_ia32_selectpd_512(
+      (__mmask8)__U, (__v8df)_mm512_fmsub_pd(__A, __B, __C), (__v8df)__A);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
-{
-  return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
-                                                     (__v8df) __B,
-                                                     -(__v8df) __C,
-                                                     (__mmask8) __U,
-                                                     _MM_FROUND_CUR_DIRECTION);
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
+  return (__m512d)__builtin_ia32_selectpd_512(
+      (__mmask8)__U, (__v8df)_mm512_fmsub_pd(__A, __B, __C), (__v8df)__C);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
-{
-  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
-                                                    -(__v8df) __B,
-                                                    (__v8df) __C,
-                                                    (__mmask8) -1,
-                                                    _MM_FROUND_CUR_DIRECTION);
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
+  return (__m512d)__builtin_ia32_selectpd_512(
+      (__mmask8)__U, (__v8df)_mm512_fmsub_pd(__A, __B, __C),
+      (__v8df)_mm512_setzero_pd());
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
-{
-  return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
-                                                     (__v8df) __B,
-                                                     (__v8df) __C,
-                                                     (__mmask8) __U,
-                                                     _MM_FROUND_CUR_DIRECTION);
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C) {
+  return (__m512d)__builtin_elementwise_fma(-(__v8df)__A, (__v8df)__B,
+                                            (__v8df)__C);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
-{
-  return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
-                                                     (__v8df) __B,
-                                                     (__v8df) __C,
-                                                     (__mmask8) __U,
-                                                     _MM_FROUND_CUR_DIRECTION);
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
+  return (__m512d)__builtin_ia32_selectpd_512(
+      (__mmask8)__U, (__v8df)_mm512_fnmadd_pd(__A, __B, __C), (__v8df)__A);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
-{
-  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
-                                                    -(__v8df) __B,
-                                                    -(__v8df) __C,
-                                                    (__mmask8) -1,
-                                                    _MM_FROUND_CUR_DIRECTION);
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
+  return (__m512d)__builtin_ia32_selectpd_512(
+      (__mmask8)__U, (__v8df)_mm512_fnmadd_pd(__A, __B, __C), (__v8df)__C);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
-{
-  return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
-                                                     (__v8df) __B,
-                                                     -(__v8df) __C,
-                                                     (__mmask8) __U,
-                                                     _MM_FROUND_CUR_DIRECTION);
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
+  return (__m512d)__builtin_ia32_selectpd_512(
+      (__mmask8)__U, (__v8df)_mm512_fnmadd_pd(__A, __B, __C),
+      (__v8df)_mm512_setzero_pd());
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C) {
+  return (__m512d)__builtin_elementwise_fma(-(__v8df)__A, (__v8df)__B,
+                                            -(__v8df)__C);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
+  return (__m512d)__builtin_ia32_selectpd_512(
+      (__mmask8)__U, (__v8df)_mm512_fnmsub_pd(__A, __B, __C), (__v8df)__A);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
+  return (__m512d)__builtin_ia32_selectpd_512(
+      (__mmask8)__U, (__v8df)_mm512_fnmsub_pd(__A, __B, __C), (__v8df)__C);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
+  return (__m512d)__builtin_ia32_selectpd_512(
+      (__mmask8)__U, (__v8df)_mm512_fnmsub_pd(__A, __B, __C),
+      (__v8df)_mm512_setzero_pd());
 }
 
 #define _mm512_fmadd_round_ps(A, B, C, R) \
@@ -2734,125 +2615,104 @@ _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
                                             -(__v16sf)(__m512)(C), \
                                             (__mmask16)(U), (int)(R)))
 
-
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
-{
-  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
-                                                   (__v16sf) __B,
-                                                   (__v16sf) __C,
-                                                   (__mmask16) -1,
-                                                   _MM_FROUND_CUR_DIRECTION);
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C) {
+  return (__m512)__builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B,
+                                           (__v16sf)__C);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
-{
-  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
-                                                   (__v16sf) __B,
-                                                   (__v16sf) __C,
-                                                   (__mmask16) __U,
-                                                   _MM_FROUND_CUR_DIRECTION);
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
+  return (__m512)__builtin_ia32_selectps_512(
+      (__mmask16)__U, (__v16sf)_mm512_fmadd_ps(__A, __B, __C), (__v16sf)__A);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
-{
-  return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
-                                                    (__v16sf) __B,
-                                                    (__v16sf) __C,
-                                                    (__mmask16) __U,
-                                                    _MM_FROUND_CUR_DIRECTION);
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
+  return (__m512)__builtin_ia32_selectps_512(
+      (__mmask16)__U, (__v16sf)_mm512_fmadd_ps(__A, __B, __C), (__v16sf)__C);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
-{
-  return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
-                                                    (__v16sf) __B,
-                                                    (__v16sf) __C,
-                                                    (__mmask16) __U,
-                                                    _MM_FROUND_CUR_DIRECTION);
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
+  return (__m512)__builtin_ia32_selectps_512(
+      (__mmask16)__U, (__v16sf)_mm512_fmadd_ps(__A, __B, __C),
+      (__v16sf)_mm512_setzero_ps());
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
-{
-  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
-                                                   (__v16sf) __B,
-                                                   -(__v16sf) __C,
-                                                   (__mmask16) -1,
-                                                   _MM_FROUND_CUR_DIRECTION);
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C) {
+  return (__m512)__builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B,
+                                           -(__v16sf)__C);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
-{
-  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
-                                                   (__v16sf) __B,
-                                                   -(__v16sf) __C,
-                                                   (__mmask16) __U,
-                                                   _MM_FROUND_CUR_DIRECTION);
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
+  return (__m512)__builtin_ia32_selectps_512(
+      (__mmask16)__U, (__v16sf)_mm512_fmsub_ps(__A, __B, __C), (__v16sf)__A);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
-{
-  return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
-                                                    (__v16sf) __B,
-                                                    -(__v16sf) __C,
-                                                    (__mmask16) __U,
-                                                    _MM_FROUND_CUR_DIRECTION);
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
+  return (__m512)__builtin_ia32_selectps_512(
+      (__mmask16)__U, (__v16sf)_mm512_fmsub_ps(__A, __B, __C), (__v16sf)__C);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
-{
-  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
-                                                   -(__v16sf) __B,
-                                                   (__v16sf) __C,
-                                                   (__mmask16) -1,
-                                                   _MM_FROUND_CUR_DIRECTION);
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
+  return (__m512)__builtin_ia32_selectps_512(
+      (__mmask16)__U, (__v16sf)_mm512_fmsub_ps(__A, __B, __C),
+      (__v16sf)_mm512_setzero_ps());
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
-{
-  return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
-                                                    (__v16sf) __B,
-                                                    (__v16sf) __C,
-                                                    (__mmask16) __U,
-                                                    _MM_FROUND_CUR_DIRECTION);
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C) {
+  return (__m512)__builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B,
+                                           (__v16sf)__C);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
-{
-  return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
-                                                    (__v16sf) __B,
-                                                    (__v16sf) __C,
-                                                    (__mmask16) __U,
-                                                    _MM_FROUND_CUR_DIRECTION);
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
+  return (__m512)__builtin_ia32_selectps_512(
+      (__mmask16)__U, (__v16sf)_mm512_fnmadd_ps(__A, __B, __C), (__v16sf)__A);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
-{
-  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
-                                                   -(__v16sf) __B,
-                                                   -(__v16sf) __C,
-                                                   (__mmask16) -1,
-                                                   _MM_FROUND_CUR_DIRECTION);
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
+  return (__m512)__builtin_ia32_selectps_512(
+      (__mmask16)__U, (__v16sf)_mm512_fnmadd_ps(__A, __B, __C), (__v16sf)__C);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
-{
-  return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
-                                                    (__v16sf) __B,
-                                                    -(__v16sf) __C,
-                                                    (__mmask16) __U,
-                                                    _MM_FROUND_CUR_DIRECTION);
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
+  return (__m512)__builtin_ia32_selectps_512(
+      (__mmask16)__U, (__v16sf)_mm512_fnmadd_ps(__A, __B, __C),
+      (__v16sf)_mm512_setzero_ps());
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C) {
+  return (__m512)__builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B,
+                                           -(__v16sf)__C);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
+  return (__m512)__builtin_ia32_selectps_512(
+      (__mmask16)__U, (__v16sf)_mm512_fnmsub_ps(__A, __B, __C), (__v16sf)__A);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
+  return (__m512)__builtin_ia32_selectps_512(
+      (__mmask16)__U, (__v16sf)_mm512_fnmsub_ps(__A, __B, __C), (__v16sf)__C);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
+  return (__m512)__builtin_ia32_selectps_512(
+      (__mmask16)__U, (__v16sf)_mm512_fnmsub_ps(__A, __B, __C),
+      (__v16sf)_mm512_setzero_ps());
 }
 
 #define _mm512_fmaddsub_round_pd(A, B, C, R) \
@@ -3099,33 +2959,12 @@ _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
                                              (__v8df)(__m512d)(C), \
                                              (__mmask8)(U), (int)(R)))
 
-
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
-{
-  return (__m512d)__builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
-                                                    (__v8df) __B,
-                                                    (__v8df) __C,
-                                                    (__mmask8) __U,
-                                                    _MM_FROUND_CUR_DIRECTION);
-}
-
 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
   ((__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
                                             (__v16sf)(__m512)(B), \
                                             (__v16sf)(__m512)(C), \
                                             (__mmask16)(U), (int)(R)))
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
-{
-  return (__m512)__builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
-                                                   (__v16sf) __B,
-                                                   (__v16sf) __C,
-                                                   (__mmask16) __U,
-                                                   _MM_FROUND_CUR_DIRECTION);
-}
-
 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
   ((__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
                                                 (__v8df)(__m512d)(B), \
@@ -3166,34 +3005,12 @@ _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
                                             (__v8df)(__m512d)(C), \
                                             (__mmask8)(U), (int)(R)))
 
-
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
-{
-  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
-                                                    -(__v8df) __B,
-                                                    (__v8df) __C,
-                                                    (__mmask8) __U,
-                                                    _MM_FROUND_CUR_DIRECTION);
-}
-
 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
   ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
                                            -(__v16sf)(__m512)(B), \
                                            (__v16sf)(__m512)(C), \
                                            (__mmask16)(U), (int)(R)))
 
-
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
-{
-  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
-                                                   -(__v16sf) __B,
-                                                   (__v16sf) __C,
-                                                   (__mmask16) __U,
-                                                   _MM_FROUND_CUR_DIRECTION);
-}
-
 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
   ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
                                             -(__v8df)(__m512d)(B), \
@@ -3207,27 +3024,6 @@ _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
                                              (__v8df)(__m512d)(C), \
                                              (__mmask8)(U), (int)(R)))
 
-
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
-{
-  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
-                                                    -(__v8df) __B,
-                                                    -(__v8df) __C,
-                                                    (__mmask8) __U,
-                                                    _MM_FROUND_CUR_DIRECTION);
-}
-
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
-{
-  return (__m512d) __builtin_ia32_vfmsubpd512_mask3 (-(__v8df) __A,
-                                                     (__v8df) __B,
-                                                     (__v8df) __C,
-                                                     (__mmask8) __U,
-                                                     _MM_FROUND_CUR_DIRECTION);
-}
-
 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
   ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
                                            -(__v16sf)(__m512)(B), \
@@ -3241,94 +3037,63 @@ _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
                                             (__v16sf)(__m512)(C), \
                                             (__mmask16)(U), (int)(R)))
 
-
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
-{
-  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
-                                                   -(__v16sf) __B,
-                                                   -(__v16sf) __C,
-                                                   (__mmask16) __U,
-                                                   _MM_FROUND_CUR_DIRECTION);
-}
-
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
-{
-  return (__m512) __builtin_ia32_vfmsubps512_mask3 (-(__v16sf) __A,
-                                                    (__v16sf) __B,
-                                                    (__v16sf) __C,
-                                                    (__mmask16) __U,
-                                                    _MM_FROUND_CUR_DIRECTION);
-}
-
-
-
 /* Vector permutations */
 
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B) {
   return (__m512i)__builtin_ia32_vpermi2vard512((__v16si)__A, (__v16si) __I,
                                                 (__v16si) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I,
-                               __m512i __B)
-{
+                               __m512i __B) {
   return (__m512i)__builtin_ia32_selectd_512(__U,
                               (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
                               (__v16si)__A);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U,
-                                __m512i __B)
-{
+                                __m512i __B) {
   return (__m512i)__builtin_ia32_selectd_512(__U,
                               (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
                               (__v16si)__I);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I,
-                                __m512i __B)
-{
+                                __m512i __B) {
   return (__m512i)__builtin_ia32_selectd_512(__U,
                               (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
                               (__v16si)_mm512_setzero_si512());
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B) {
   return (__m512i)__builtin_ia32_vpermi2varq512((__v8di)__A, (__v8di) __I,
                                                 (__v8di) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I,
-                               __m512i __B)
-{
+                               __m512i __B) {
   return (__m512i)__builtin_ia32_selectq_512(__U,
                                (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
                                (__v8di)__A);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U,
-                                __m512i __B)
-{
+                                __m512i __B) {
   return (__m512i)__builtin_ia32_selectq_512(__U,
                                (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
                                (__v8di)__I);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
-                                __m512i __B)
-{
+                                __m512i __B) {
   return (__m512i)__builtin_ia32_selectq_512(__U,
                                (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
                                (__v8di)_mm512_setzero_si512());
@@ -3363,10 +3128,10 @@ _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
                                  (__v16si)_mm512_setzero_si512()))
 /* Vector Extract */
 
-#define _mm512_extractf64x4_pd(A, I) \
-  ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \
-                                             (__v4df)_mm256_undefined_pd(), \
-                                             (__mmask8)-1))
+#define _mm512_extractf64x4_pd(A, I)                                           \
+  ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I),   \
+                                             (__v4df)_mm256_setzero_pd(),      \
+                                             (__mmask8) - 1))
 
 #define _mm512_mask_extractf64x4_pd(W, U, A, imm) \
   ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
@@ -3378,10 +3143,10 @@ _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
                                              (__v4df)_mm256_setzero_pd(), \
                                              (__mmask8)(U)))
 
-#define _mm512_extractf32x4_ps(A, I) \
-  ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
-                                            (__v4sf)_mm_undefined_ps(), \
-                                            (__mmask8)-1))
+#define _mm512_extractf32x4_ps(A, I)                                           \
+  ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I),    \
+                                            (__v4sf)_mm_setzero_ps(),          \
+                                            (__mmask8) - 1))
 
 #define _mm512_mask_extractf32x4_ps(W, U, A, imm) \
   ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
@@ -3395,33 +3160,29 @@ _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
 
 /* Vector Blend */
 
-static __inline __m512d __DEFAULT_FN_ATTRS512
-_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
-{
+static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W) {
   return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
                  (__v8df) __W,
                  (__v8df) __A);
 }
 
-static __inline __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
-{
+static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W) {
   return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
                 (__v16sf) __W,
                 (__v16sf) __A);
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W) {
   return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
                 (__v8di) __W,
                 (__v8di) __A);
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) {
   return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
                 (__v16si) __W,
                 (__v16si) __A);
@@ -3615,115 +3376,99 @@ _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
                                             (__v16sf)_mm512_setzero_ps(), \
                                             (__mmask16)(U), (int)(R)))
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_cvtepu32_ps (__m512i __A)
-{
+static __inline__ __m512
+    __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtepu32_ps(__m512i __A) {
   return (__m512)__builtin_convertvector((__v16su)__A, __v16sf);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_cvtepu32_ps(__m512 __W, __mmask16 __U, __m512i __A) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
                                              (__v16sf)_mm512_cvtepu32_ps(__A),
                                              (__v16sf)__W);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_cvtepu32_ps(__mmask16 __U, __m512i __A) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
                                              (__v16sf)_mm512_cvtepu32_ps(__A),
                                              (__v16sf)_mm512_setzero_ps());
 }
 
-static __inline __m512d __DEFAULT_FN_ATTRS512
-_mm512_cvtepi32_pd(__m256i __A)
-{
+static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_cvtepi32_pd(__m256i __A) {
   return (__m512d)__builtin_convertvector((__v8si)__A, __v8df);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_cvtepi32_pd(__m512d __W, __mmask8 __U, __m256i __A) {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
                                               (__v8df)_mm512_cvtepi32_pd(__A),
                                               (__v8df)__W);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_cvtepi32_pd(__mmask8 __U, __m256i __A) {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
                                               (__v8df)_mm512_cvtepi32_pd(__A),
                                               (__v8df)_mm512_setzero_pd());
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_cvtepi32lo_pd(__m512i __A)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_cvtepi32lo_pd(__m512i __A) {
   return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A));
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U, __m512i __A) {
   return (__m512d) _mm512_mask_cvtepi32_pd(__W, __U, _mm512_castsi512_si256(__A));
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_cvtepi32_ps (__m512i __A)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_cvtepi32_ps(__m512i __A) {
   return (__m512)__builtin_convertvector((__v16si)__A, __v16sf);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_cvtepi32_ps(__m512 __W, __mmask16 __U, __m512i __A) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
                                              (__v16sf)_mm512_cvtepi32_ps(__A),
                                              (__v16sf)__W);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_cvtepi32_ps(__mmask16 __U, __m512i __A) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
                                              (__v16sf)_mm512_cvtepi32_ps(__A),
                                              (__v16sf)_mm512_setzero_ps());
 }
 
-static __inline __m512d __DEFAULT_FN_ATTRS512
-_mm512_cvtepu32_pd(__m256i __A)
-{
+static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_cvtepu32_pd(__m256i __A) {
   return (__m512d)__builtin_convertvector((__v8su)__A, __v8df);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_cvtepu32_pd(__m512d __W, __mmask8 __U, __m256i __A) {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
                                               (__v8df)_mm512_cvtepu32_pd(__A),
                                               (__v8df)__W);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_cvtepu32_pd(__mmask8 __U, __m256i __A) {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
                                               (__v8df)_mm512_cvtepu32_pd(__A),
                                               (__v8df)_mm512_setzero_pd());
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_cvtepu32lo_pd(__m512i __A)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_cvtepu32lo_pd(__m512i __A) {
   return (__m512d) _mm512_cvtepu32_pd(_mm512_castsi512_si256(__A));
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U, __m512i __A) {
   return (__m512d) _mm512_mask_cvtepu32_pd(__W, __U, _mm512_castsi512_si256(__A));
 }
 
@@ -3742,44 +3487,38 @@ _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
                                            (__v8sf)_mm256_setzero_ps(), \
                                            (__mmask8)(U), (int)(R)))
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS512
-_mm512_cvtpd_ps (__m512d __A)
-{
-  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
-                (__v8sf) _mm256_undefined_ps (),
-                (__mmask8) -1,
-                _MM_FROUND_CUR_DIRECTION);
+static __inline__ __m256
+    __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtpd_ps(__m512d __A) {
+  return (__m256)__builtin_ia32_cvtpd2ps512_mask(
+      (__v8df)__A, (__v8sf)_mm256_setzero_ps(), (__mmask8)-1,
+      _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS512
-_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_cvtpd_ps(__m256 __W, __mmask8 __U, __m512d __A) {
   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
                 (__v8sf) __W,
                 (__mmask8) __U,
                 _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_cvtpd_ps(__mmask8 __U, __m512d __A) {
   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
                 (__v8sf) _mm256_setzero_ps (),
                 (__mmask8) __U,
                 _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_cvtpd_pslo (__m512d __A)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_cvtpd_pslo(__m512d __A) {
   return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A),
                 (__v8sf) _mm256_setzero_ps (),
                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_cvtpd_pslo(__m512 __W, __mmask8 __U, __m512d __A) {
   return (__m512) __builtin_shufflevector (
                 (__v8sf) _mm512_mask_cvtpd_ps (_mm512_castps512_ps256(__W),
                                                __U, __A),
@@ -4123,9 +3862,8 @@ _mm512_cvtss_f32(__m512 __a)
 
 /* Unpack and Interleave */
 
-static __inline __m512d __DEFAULT_FN_ATTRS512
-_mm512_unpackhi_pd(__m512d __a, __m512d __b)
-{
+static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_unpackhi_pd(__m512d __a, __m512d __b) {
   return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
                                           1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
 }
@@ -4146,9 +3884,8 @@ _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
                                            (__v8df)_mm512_setzero_pd());
 }
 
-static __inline __m512d __DEFAULT_FN_ATTRS512
-_mm512_unpacklo_pd(__m512d __a, __m512d __b)
-{
+static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_unpacklo_pd(__m512d __a, __m512d __b) {
   return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
                                           0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
 }
@@ -4169,9 +3906,8 @@ _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
                                            (__v8df)_mm512_setzero_pd());
 }
 
-static __inline __m512 __DEFAULT_FN_ATTRS512
-_mm512_unpackhi_ps(__m512 __a, __m512 __b)
-{
+static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_unpackhi_ps(__m512 __a, __m512 __b) {
   return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
                                          2,    18,    3,    19,
                                          2+4,  18+4,  3+4,  19+4,
@@ -4195,9 +3931,8 @@ _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
                                           (__v16sf)_mm512_setzero_ps());
 }
 
-static __inline __m512 __DEFAULT_FN_ATTRS512
-_mm512_unpacklo_ps(__m512 __a, __m512 __b)
-{
+static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_unpacklo_ps(__m512 __a, __m512 __b) {
   return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
                                          0,    16,    1,    17,
                                          0+4,  16+4,  1+4,  17+4,
@@ -4221,9 +3956,8 @@ _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
                                           (__v16sf)_mm512_setzero_ps());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_unpackhi_epi32(__m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_unpackhi_epi32(__m512i __A, __m512i __B) {
   return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
                                           2,    18,    3,    19,
                                           2+4,  18+4,  3+4,  19+4,
@@ -4247,9 +3981,8 @@ _mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
                                        (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_unpacklo_epi32(__m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_unpacklo_epi32(__m512i __A, __m512i __B) {
   return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
                                           0,    16,    1,    17,
                                           0+4,  16+4,  1+4,  17+4,
@@ -4273,9 +4006,8 @@ _mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
                                        (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_unpackhi_epi64(__m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_unpackhi_epi64(__m512i __A, __m512i __B) {
   return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
                                           1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
 }
@@ -4296,9 +4028,8 @@ _mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
                                         (__v8di)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_unpacklo_epi64(__m512i __A, __m512i __B) {
   return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
                                           0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
 }
@@ -4619,9 +4350,8 @@ _mm512_store_epi64 (void *__P, __m512i __A)
 
 /* Mask ops */
 
-static __inline __mmask16 __DEFAULT_FN_ATTRS
-_mm512_knot(__mmask16 __M)
-{
+static __inline __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_knot(__mmask16 __M) {
   return __builtin_ia32_knothi(__M);
 }
 
@@ -4727,237 +4457,207 @@ _mm512_knot(__mmask16 __M)
 #define _mm512_mask_cmpneq_epu64_mask(k, A, B) \
     _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_cvtepi8_epi32(__m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_cvtepi8_epi32(__m128i __A) {
   /* This function always performs a signed extension, but __v16qi is a char
      which may be signed or unsigned, so use __v16qs. */
   return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
                                              (__v16si)_mm512_cvtepi8_epi32(__A),
                                              (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
                                              (__v16si)_mm512_cvtepi8_epi32(__A),
                                              (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_cvtepi8_epi64(__m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_cvtepi8_epi64(__m128i __A) {
   /* This function always performs a signed extension, but __v16qi is a char
      which may be signed or unsigned, so use __v16qs. */
   return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                              (__v8di)_mm512_cvtepi8_epi64(__A),
                                              (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                              (__v8di)_mm512_cvtepi8_epi64(__A),
                                              (__v8di)_mm512_setzero_si512 ());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_cvtepi32_epi64(__m256i __X)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_cvtepi32_epi64(__m256i __X) {
   return (__m512i)__builtin_convertvector((__v8si)__X, __v8di);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                              (__v8di)_mm512_cvtepi32_epi64(__X),
                                              (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                              (__v8di)_mm512_cvtepi32_epi64(__X),
                                              (__v8di)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_cvtepi16_epi32(__m256i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_cvtepi16_epi32(__m256i __A) {
   return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
                                             (__v16si)_mm512_cvtepi16_epi32(__A),
                                             (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
                                             (__v16si)_mm512_cvtepi16_epi32(__A),
                                             (__v16si)_mm512_setzero_si512 ());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_cvtepi16_epi64(__m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_cvtepi16_epi64(__m128i __A) {
   return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                              (__v8di)_mm512_cvtepi16_epi64(__A),
                                              (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                              (__v8di)_mm512_cvtepi16_epi64(__A),
                                              (__v8di)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_cvtepu8_epi32(__m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_cvtepu8_epi32(__m128i __A) {
   return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
                                              (__v16si)_mm512_cvtepu8_epi32(__A),
                                              (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
                                              (__v16si)_mm512_cvtepu8_epi32(__A),
                                              (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_cvtepu8_epi64(__m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_cvtepu8_epi64(__m128i __A) {
   return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                              (__v8di)_mm512_cvtepu8_epi64(__A),
                                              (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                              (__v8di)_mm512_cvtepu8_epi64(__A),
                                              (__v8di)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_cvtepu32_epi64(__m256i __X)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_cvtepu32_epi64(__m256i __X) {
   return (__m512i)__builtin_convertvector((__v8su)__X, __v8di);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                              (__v8di)_mm512_cvtepu32_epi64(__X),
                                              (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                              (__v8di)_mm512_cvtepu32_epi64(__X),
                                              (__v8di)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_cvtepu16_epi32(__m256i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_cvtepu16_epi32(__m256i __A) {
   return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
                                             (__v16si)_mm512_cvtepu16_epi32(__A),
                                             (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
                                             (__v16si)_mm512_cvtepu16_epi32(__A),
                                             (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_cvtepu16_epi64(__m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_cvtepu16_epi64(__m128i __A) {
   return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                              (__v8di)_mm512_cvtepu16_epi64(__A),
                                              (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                              (__v8di)_mm512_cvtepu16_epi64(__A),
                                              (__v8di)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_rorv_epi32 (__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_ia32_prorvd512((__v16si)__A, (__v16si)__B);
+  return (__m512i)__builtin_elementwise_fshr((__v16su)__A,(__v16su)__A, (__v16su)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectd_512(__U,
@@ -4965,7 +4665,7 @@ _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
                                            (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectd_512(__U,
@@ -4973,13 +4673,13 @@ _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
                                            (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_rorv_epi64 (__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_ia32_prorvq512((__v8di)__A, (__v8di)__B);
+  return (__m512i)__builtin_elementwise_fshr((__v8du)__A, (__v8du)__A, (__v8du)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectq_512(__U,
@@ -4987,7 +4687,7 @@ _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
                                             (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectq_512(__U,
@@ -5063,13 +4763,13 @@ _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
                                        (__v8di)_mm512_rol_epi64((a), (b)), \
                                        (__v8di)_mm512_setzero_si512()))
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_rolv_epi32 (__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_ia32_prolvd512((__v16si)__A, (__v16si)__B);
+  return (__m512i)__builtin_elementwise_fshl((__v16su)__A, (__v16su)__A, (__v16su)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectd_512(__U,
@@ -5077,7 +4777,7 @@ _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
                                            (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectd_512(__U,
@@ -5085,13 +4785,13 @@ _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
                                            (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_rolv_epi64 (__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_ia32_prolvq512((__v8di)__A, (__v8di)__B);
+  return (__m512i)__builtin_elementwise_fshl((__v8du)__A, (__v8du)__A, (__v8du)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectq_512(__U,
@@ -5099,7 +4799,7 @@ _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
                                             (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectq_512(__U,
@@ -5133,91 +4833,81 @@ _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
                                        (__v8di)_mm512_ror_epi64((A), (B)), \
                                        (__v8di)_mm512_setzero_si512()))
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_slli_epi32(__m512i __A, unsigned int __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_slli_epi32(__m512i __A, unsigned int __B) {
   return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, (int)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A,
-                       unsigned int __B)
-{
+                       unsigned int __B) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
                                          (__v16si)_mm512_slli_epi32(__A, __B),
                                          (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, unsigned int __B) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
                                          (__v16si)_mm512_slli_epi32(__A, __B),
                                          (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_slli_epi64(__m512i __A, unsigned int __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_slli_epi64(__m512i __A, unsigned int __B) {
   return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, (int)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A,
+                       unsigned int __B) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                           (__v8di)_mm512_slli_epi64(__A, __B),
                                           (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, unsigned int __B) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                           (__v8di)_mm512_slli_epi64(__A, __B),
                                           (__v8di)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_srli_epi32(__m512i __A, unsigned int __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_srli_epi32(__m512i __A, unsigned int __B) {
   return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, (int)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A,
-                       unsigned int __B)
-{
+                       unsigned int __B) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
                                          (__v16si)_mm512_srli_epi32(__A, __B),
                                          (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, unsigned int __B) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
                                          (__v16si)_mm512_srli_epi32(__A, __B),
                                          (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_srli_epi64(__m512i __A, unsigned int __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_srli_epi64(__m512i __A, unsigned int __B) {
   return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, (int)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A,
-                       unsigned int __B)
-{
+                       unsigned int __B) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                           (__v8di)_mm512_srli_epi64(__A, __B),
                                           (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A,
-                        unsigned int __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, unsigned int __B) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                           (__v8di)_mm512_srli_epi64(__A, __B),
                                           (__v8di)_mm512_setzero_si512());
@@ -5303,7 +4993,7 @@ _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
           (__mmask8) __U);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_movedup_pd (__m512d __A)
 {
   return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
@@ -5665,9 +5355,8 @@ _mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
                                                (__v4sf)_mm_setzero_ps(), \
                                                (__mmask8)(U), (int)(R)))
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm512_kmov (__mmask16 __A)
-{
+static __inline__ __mmask16
+    __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kmov(__mmask16 __A) {
   return  __A;
 }
 
@@ -5684,79 +5373,70 @@ _mm512_kmov (__mmask16 __A)
   ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)))
 #endif
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_sll_epi32(__m512i __A, __m128i __B)
-{
+static __inline__ __m512i
+    __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sll_epi32(__m512i __A, __m128i __B) {
   return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
                                           (__v16si)_mm512_sll_epi32(__A, __B),
                                           (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
                                           (__v16si)_mm512_sll_epi32(__A, __B),
                                           (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_sll_epi64(__m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_sll_epi64(__m512i __A, __m128i __B) {
   return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                              (__v8di)_mm512_sll_epi64(__A, __B),
                                              (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                            (__v8di)_mm512_sll_epi64(__A, __B),
                                            (__v8di)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_sllv_epi32(__m512i __X, __m512i __Y)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_sllv_epi32(__m512i __X, __m512i __Y) {
   return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
                                            (__v16si)_mm512_sllv_epi32(__X, __Y),
                                            (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
                                            (__v16si)_mm512_sllv_epi32(__X, __Y),
                                            (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_sllv_epi64(__m512i __X, __m512i __Y)
 {
   return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -5764,7 +5444,7 @@ _mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
                                             (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -5772,79 +5452,70 @@ _mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
                                             (__v8di)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_sra_epi32(__m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_sra_epi32(__m512i __A, __m128i __B) {
   return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
                                           (__v16si)_mm512_sra_epi32(__A, __B),
                                           (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
                                           (__v16si)_mm512_sra_epi32(__A, __B),
                                           (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_sra_epi64(__m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_sra_epi64(__m512i __A, __m128i __B) {
   return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                            (__v8di)_mm512_sra_epi64(__A, __B),
                                            (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                            (__v8di)_mm512_sra_epi64(__A, __B),
                                            (__v8di)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_srav_epi32(__m512i __X, __m512i __Y)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_srav_epi32(__m512i __X, __m512i __Y) {
   return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
                                            (__v16si)_mm512_srav_epi32(__X, __Y),
                                            (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
                                            (__v16si)_mm512_srav_epi32(__X, __Y),
                                            (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_srav_epi64(__m512i __X, __m512i __Y)
 {
   return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -5852,7 +5523,7 @@ _mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
                                             (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -5860,79 +5531,70 @@ _mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
                                             (__v8di)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_srl_epi32(__m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_srl_epi32(__m512i __A, __m128i __B) {
   return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
                                           (__v16si)_mm512_srl_epi32(__A, __B),
                                           (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
                                           (__v16si)_mm512_srl_epi32(__A, __B),
                                           (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_srl_epi64(__m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_srl_epi64(__m512i __A, __m128i __B) {
   return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                            (__v8di)_mm512_srl_epi64(__A, __B),
                                            (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                            (__v8di)_mm512_srl_epi64(__A, __B),
                                            (__v8di)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_srlv_epi32(__m512i __X, __m512i __Y)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_srlv_epi32(__m512i __X, __m512i __Y) {
   return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
                                            (__v16si)_mm512_srlv_epi32(__X, __Y),
                                            (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
                                            (__v16si)_mm512_srlv_epi32(__X, __Y),
                                            (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_srlv_epi64 (__m512i __X, __m512i __Y)
 {
   return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -5940,7 +5602,7 @@ _mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
                                             (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -6190,115 +5852,104 @@ _mm_cvttss_u64 (__m128 __A)
                                        (__v16sf)_mm512_permute_ps((X), (C)), \
                                        (__v16sf)_mm512_setzero_ps()))
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_permutevar_pd(__m512d __A, __m512i __C)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_permutevar_pd(__m512d __A, __m512i __C) {
   return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C) {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
                                          (__v8df)_mm512_permutevar_pd(__A, __C),
                                          (__v8df)__W);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C) {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
                                          (__v8df)_mm512_permutevar_pd(__A, __C),
                                          (__v8df)_mm512_setzero_pd());
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_permutevar_ps(__m512 __A, __m512i __C)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_permutevar_ps(__m512 __A, __m512i __C) {
   return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
                                         (__v16sf)_mm512_permutevar_ps(__A, __C),
                                         (__v16sf)__W);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
                                         (__v16sf)_mm512_permutevar_ps(__A, __C),
                                         (__v16sf)_mm512_setzero_ps());
 }
 
-static __inline __m512d __DEFAULT_FN_ATTRS512
-_mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
-{
+static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B) {
   return (__m512d)__builtin_ia32_vpermi2varpd512((__v8df)__A, (__v8di)__I,
                                                  (__v8df)__B);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I,
+                            __m512d __B) {
   return (__m512d)__builtin_ia32_selectpd_512(__U,
                                   (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
                                   (__v8df)__A);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U,
-                             __m512d __B)
-{
+                             __m512d __B) {
   return (__m512d)__builtin_ia32_selectpd_512(__U,
                                   (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
                                   (__v8df)(__m512d)__I);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I,
-                             __m512d __B)
-{
+                             __m512d __B) {
   return (__m512d)__builtin_ia32_selectpd_512(__U,
                                   (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
                                   (__v8df)_mm512_setzero_pd());
 }
 
-static __inline __m512 __DEFAULT_FN_ATTRS512
-_mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
-{
+static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B) {
   return (__m512)__builtin_ia32_vpermi2varps512((__v16sf)__A, (__v16si)__I,
                                                 (__v16sf) __B);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I,
+                            __m512 __B) {
   return (__m512)__builtin_ia32_selectps_512(__U,
                                  (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
                                  (__v16sf)__A);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U, __m512 __B)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U,
+                             __m512 __B) {
   return (__m512)__builtin_ia32_selectps_512(__U,
                                  (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
                                  (__v16sf)(__m512)__I);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, __m512 __B)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I,
+                             __m512 __B) {
   return (__m512)__builtin_ia32_selectps_512(__U,
                                  (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
                                  (__v16sf)_mm512_setzero_ps());
 }
 
-
 #define _mm512_cvtt_roundpd_epu32(A, R) \
   ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
                                               (__v8si)_mm256_undefined_si256(), \
@@ -6622,46 +6273,41 @@ _mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B)
                                               (__mmask8)(U), \
                                               (int)(R)))
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_srai_epi32(__m512i __A, unsigned int __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_srai_epi32(__m512i __A, unsigned int __B) {
   return (__m512i)__builtin_ia32_psradi512((__v16si)__A, (int)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A,
-                       unsigned int __B)
-{
+                       unsigned int __B) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
                                          (__v16si)_mm512_srai_epi32(__A, __B),
                                          (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A,
-                        unsigned int __B) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, unsigned int __B) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
                                          (__v16si)_mm512_srai_epi32(__A, __B),
                                          (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_srai_epi64(__m512i __A, unsigned int __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_srai_epi64(__m512i __A, unsigned int __B) {
   return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, (int)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A,
+                       unsigned int __B) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                           (__v8di)_mm512_srai_epi64(__A, __B),
                                           (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, unsigned int __B) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                           (__v8di)_mm512_srai_epi64(__A, __B),
                                           (__v8di)_mm512_setzero_si512());
@@ -6827,33 +6473,29 @@ _mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B)
                                             (__v4sf)_mm_setzero_ps(), \
                                             (__mmask8)(U), (int)(R)))
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_broadcast_f32x4(__m128 __A)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_broadcast_f32x4(__m128 __A) {
   return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
                                          0, 1, 2, 3, 0, 1, 2, 3,
                                          0, 1, 2, 3, 0, 1, 2, 3);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
                                            (__v16sf)_mm512_broadcast_f32x4(__A),
                                            (__v16sf)__O);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
                                            (__v16sf)_mm512_broadcast_f32x4(__A),
                                            (__v16sf)_mm512_setzero_ps());
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_broadcast_f64x4(__m256d __A)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_broadcast_f64x4(__m256d __A) {
   return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A,
                                           0, 1, 2, 3, 0, 1, 2, 3);
 }
@@ -6874,33 +6516,29 @@ _mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A)
                                             (__v8df)_mm512_setzero_pd());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_broadcast_i32x4(__m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_broadcast_i32x4(__m128i __A) {
   return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
                                           0, 1, 2, 3, 0, 1, 2, 3,
                                           0, 1, 2, 3, 0, 1, 2, 3);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
                                            (__v16si)_mm512_broadcast_i32x4(__A),
                                            (__v16si)__O);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
                                            (__v16si)_mm512_broadcast_i32x4(__A),
                                            (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_broadcast_i64x4(__m256i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_broadcast_i64x4(__m256i __A) {
   return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A,
                                           0, 1, 2, 3, 0, 1, 2, 3);
 }
@@ -6921,33 +6559,29 @@ _mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A)
                                             (__v8di)_mm512_setzero_si512());
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_broadcastsd_pd(__m512d __O, __mmask8 __M, __m128d __A) {
   return (__m512d)__builtin_ia32_selectpd_512(__M,
                                               (__v8df) _mm512_broadcastsd_pd(__A),
                                               (__v8df) __O);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A) {
   return (__m512d)__builtin_ia32_selectpd_512(__M,
                                               (__v8df) _mm512_broadcastsd_pd(__A),
                                               (__v8df) _mm512_setzero_pd());
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_broadcastss_ps(__m512 __O, __mmask16 __M, __m128 __A) {
   return (__m512)__builtin_ia32_selectps_512(__M,
                                              (__v16sf) _mm512_broadcastss_ps(__A),
                                              (__v16sf) __O);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_broadcastss_ps(__mmask16 __M, __m128 __A) {
   return (__m512)__builtin_ia32_selectps_512(__M,
                                              (__v16sf) _mm512_broadcastss_ps(__A),
                                              (__v16sf) _mm512_setzero_ps());
@@ -7391,10 +7025,10 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
   __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
 }
 
-#define _mm512_extracti32x4_epi32(A, imm) \
-  ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
-                                             (__v4si)_mm_undefined_si128(), \
-                                             (__mmask8)-1))
+#define _mm512_extracti32x4_epi32(A, imm)                                      \
+  ((__m128i)__builtin_ia32_extracti32x4_mask(                                  \
+      (__v16si)(__m512i)(A), (int)(imm), (__v4si)_mm_setzero_si128(),          \
+      (__mmask8) - 1))
 
 #define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \
   ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
@@ -7406,10 +7040,10 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
                                              (__v4si)_mm_setzero_si128(), \
                                              (__mmask8)(U)))
 
-#define _mm512_extracti64x4_epi64(A, imm) \
+#define _mm512_extracti64x4_epi64(A, imm)                                      \
   ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
-                                             (__v4di)_mm256_undefined_si256(), \
-                                             (__mmask8)-1))
+                                             (__v4di)_mm256_setzero_si256(),   \
+                                             (__mmask8) - 1))
 
 #define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \
   ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
@@ -8274,93 +7908,82 @@ _mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
                                        (__v8di)_mm512_permutex_epi64((X), (C)), \
                                        (__v8di)_mm512_setzero_si512()))
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_permutexvar_pd (__m512i __X, __m512d __Y)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_permutexvar_pd(__m512i __X, __m512d __Y) {
   return (__m512d)__builtin_ia32_permvardf512((__v8df) __Y, (__v8di) __X);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_permutexvar_pd(__m512d __W, __mmask8 __U, __m512i __X,
+                           __m512d __Y) {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
                                         (__v8df)_mm512_permutexvar_pd(__X, __Y),
                                         (__v8df)__W);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_permutexvar_pd(__mmask8 __U, __m512i __X, __m512d __Y) {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
                                         (__v8df)_mm512_permutexvar_pd(__X, __Y),
                                         (__v8df)_mm512_setzero_pd());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_permutexvar_epi64(__m512i __X, __m512i __Y) {
   return (__m512i)__builtin_ia32_permvardi512((__v8di)__Y, (__v8di)__X);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_permutexvar_epi64(__mmask8 __M, __m512i __X, __m512i __Y) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
                                      (__v8di)_mm512_permutexvar_epi64(__X, __Y),
                                      (__v8di)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
-             __m512i __Y)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_permutexvar_epi64(__m512i __W, __mmask8 __M, __m512i __X,
+                              __m512i __Y) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
                                      (__v8di)_mm512_permutexvar_epi64(__X, __Y),
                                      (__v8di)__W);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_permutexvar_ps (__m512i __X, __m512 __Y)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_permutexvar_ps(__m512i __X, __m512 __Y) {
   return (__m512)__builtin_ia32_permvarsf512((__v16sf)__Y, (__v16si)__X);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_permutexvar_ps(__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
                                        (__v16sf)_mm512_permutexvar_ps(__X, __Y),
                                        (__v16sf)__W);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_permutexvar_ps(__mmask16 __U, __m512i __X, __m512 __Y) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
                                        (__v16sf)_mm512_permutexvar_ps(__X, __Y),
                                        (__v16sf)_mm512_setzero_ps());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_permutexvar_epi32(__m512i __X, __m512i __Y) {
   return (__m512i)__builtin_ia32_permvarsi512((__v16si)__Y, (__v16si)__X);
 }
 
 #define _mm512_permutevar_epi32 _mm512_permutexvar_epi32
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_permutexvar_epi32(__mmask16 __M, __m512i __X, __m512i __Y) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
                                     (__v16si)_mm512_permutexvar_epi32(__X, __Y),
                                     (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
-             __m512i __Y)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_permutexvar_epi32(__m512i __W, __mmask16 __M, __m512i __X,
+                              __m512i __Y) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
                                     (__v16si)_mm512_permutexvar_epi32(__X, __Y),
                                     (__v16si)__W);
@@ -8368,69 +7991,59 @@ _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
 
 #define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm512_kand (__mmask16 __A, __mmask16 __B)
-{
+static __inline__ __mmask16
+    __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kand(__mmask16 __A, __mmask16 __B) {
   return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
 }
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm512_kandn (__mmask16 __A, __mmask16 __B)
-{
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_kandn(__mmask16 __A, __mmask16 __B) {
   return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
 }
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm512_kor (__mmask16 __A, __mmask16 __B)
-{
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_kor(__mmask16 __A, __mmask16 __B) {
   return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
 }
 
-static __inline__ int __DEFAULT_FN_ATTRS
-_mm512_kortestc (__mmask16 __A, __mmask16 __B)
-{
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_kortestc(__mmask16 __A, __mmask16 __B) {
   return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B);
 }
 
-static __inline__ int __DEFAULT_FN_ATTRS
-_mm512_kortestz (__mmask16 __A, __mmask16 __B)
-{
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_kortestz(__mmask16 __A, __mmask16 __B) {
   return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_kortestc_mask16_u8(__mmask16 __A, __mmask16 __B)
-{
+static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
+_kortestc_mask16_u8(__mmask16 __A, __mmask16 __B) {
   return (unsigned char)__builtin_ia32_kortestchi(__A, __B);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_kortestz_mask16_u8(__mmask16 __A, __mmask16 __B)
-{
+static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
+_kortestz_mask16_u8(__mmask16 __A, __mmask16 __B) {
   return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
+static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR
 _kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) {
   *__C = (unsigned char)__builtin_ia32_kortestchi(__A, __B);
   return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
 }
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm512_kunpackb (__mmask16 __A, __mmask16 __B)
-{
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_kunpackb(__mmask16 __A, __mmask16 __B) {
   return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
 }
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm512_kxnor (__mmask16 __A, __mmask16 __B)
-{
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_kxnor(__mmask16 __A, __mmask16 __B) {
   return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
 }
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm512_kxor (__mmask16 __A, __mmask16 __B)
-{
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_kxor(__mmask16 __A, __mmask16 __B) {
   return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
 }
 
@@ -8447,12 +8060,12 @@ _mm512_kxor (__mmask16 __A, __mmask16 __B)
 #define _kshiftri_mask16(A, I) \
   ((__mmask16)__builtin_ia32_kshiftrihi((__mmask16)(A), (unsigned int)(I)))
 
-static __inline__ unsigned int __DEFAULT_FN_ATTRS
-_cvtmask16_u32(__mmask16 __A) {
+static __inline__ unsigned int
+    __DEFAULT_FN_ATTRS_CONSTEXPR _cvtmask16_u32(__mmask16 __A) {
   return (unsigned int)__builtin_ia32_kmovw((__mmask16)__A);
 }
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR
 _cvtu32_mask16(unsigned int __A) {
   return (__mmask16)__builtin_ia32_kmovw((__mmask16)__A);
 }
@@ -8665,74 +8278,66 @@ _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
                                        _mm512_setzero_si512());
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_movehdup_ps (__m512 __A)
 {
   return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
                          1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_movehdup_ps(__m512 __W, __mmask16 __U, __m512 __A) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
                                              (__v16sf)_mm512_movehdup_ps(__A),
                                              (__v16sf)__W);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_movehdup_ps(__mmask16 __U, __m512 __A) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
                                              (__v16sf)_mm512_movehdup_ps(__A),
                                              (__v16sf)_mm512_setzero_ps());
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_moveldup_ps (__m512 __A)
 {
   return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
                          0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_moveldup_ps(__m512 __W, __mmask16 __U, __m512 __A) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
                                              (__v16sf)_mm512_moveldup_ps(__A),
                                              (__v16sf)__W);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_moveldup_ps(__mmask16 __U, __m512 __A) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
                                              (__v16sf)_mm512_moveldup_ps(__A),
                                              (__v16sf)_mm512_setzero_ps());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_move_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), __W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_move_ss(__mmask8 __U, __m128 __A, __m128 __B) {
   return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B),
                                      _mm_setzero_ps());
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
-{
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_move_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), __W);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
-{
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_move_sd(__mmask8 __U, __m128d __A, __m128d __B) {
   return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B),
                                      _mm_setzero_pd());
 }
@@ -8941,70 +8546,57 @@ _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
                                             (__v8df)_mm512_setzero_pd(), \
                                             (__mmask8)(U), (int)(R)))
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_cvtps_pd (__m256 __A)
-{
+static __inline__ __m512d
+    __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtps_pd(__m256 __A) {
   return (__m512d) __builtin_convertvector((__v8sf)__A, __v8df);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_cvtps_pd(__m512d __W, __mmask8 __U, __m256 __A) {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
                                               (__v8df)_mm512_cvtps_pd(__A),
                                               (__v8df)__W);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_cvtps_pd(__mmask8 __U, __m256 __A) {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
                                               (__v8df)_mm512_cvtps_pd(__A),
                                               (__v8df)_mm512_setzero_pd());
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_cvtpslo_pd (__m512 __A)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_cvtpslo_pd(__m512 __A) {
   return (__m512d) _mm512_cvtps_pd(_mm512_castps512_ps256(__A));
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_mask_cvtpslo_pd (__m512d __W, __mmask8 __U, __m512 __A)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_cvtpslo_pd(__m512d __W, __mmask8 __U, __m512 __A) {
   return (__m512d) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A));
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
-{
-  return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
-              (__v8df) __A,
-              (__v8df) __W);
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_mov_pd(__m512d __W, __mmask8 __U, __m512d __A) {
+  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)__A,
+                                              (__v8df)__W);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
-{
-  return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
-              (__v8df) __A,
-              (__v8df) _mm512_setzero_pd ());
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_mov_pd(__mmask8 __U, __m512d __A) {
+  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)__A,
+                                              (__v8df)_mm512_setzero_pd());
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
-{
-  return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
-             (__v16sf) __A,
-             (__v16sf) __W);
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_mov_ps(__m512 __W, __mmask16 __U, __m512 __A) {
+  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)__A,
+                                             (__v16sf)__W);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
-{
-  return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
-             (__v16sf) __A,
-             (__v16sf) _mm512_setzero_ps ());
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_mov_ps(__mmask16 __U, __m512 __A) {
+  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)__A,
+                                             (__v16sf)_mm512_setzero_ps());
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS512
@@ -9053,18 +8645,16 @@ _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
                                               (__v4sf)_mm_setzero_ps(), \
                                               (__mmask8)(U), (int)(R)))
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_cvtsd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128d __B) {
   return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
                                              (__v2df)__B,
                                              (__v4sf)__W,
                                              (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_cvtsd_ss(__mmask8 __U, __m128 __A, __m128d __B) {
   return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
                                              (__v2df)__B,
                                              (__v4sf)_mm_setzero_ps(),
@@ -9188,34 +8778,32 @@ _mm_cvtu64_ss (__m128 __A, unsigned long long __B)
 }
 #endif
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_set1_epi32(__m512i __O, __mmask16 __M, int __A) {
   return (__m512i) __builtin_ia32_selectd_512(__M,
                                               (__v16si) _mm512_set1_epi32(__A),
                                               (__v16si) __O);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_set1_epi64(__m512i __O, __mmask8 __M, long long __A) {
   return (__m512i) __builtin_ia32_selectq_512(__M,
                                               (__v8di) _mm512_set1_epi64(__A),
                                               (__v8di) __O);
 }
 
-static  __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_set_epi8 (char __e63, char __e62, char __e61, char __e60, char __e59,
-    char __e58, char __e57, char __e56, char __e55, char __e54, char __e53,
-    char __e52, char __e51, char __e50, char __e49, char __e48, char __e47,
-    char __e46, char __e45, char __e44, char __e43, char __e42, char __e41,
-    char __e40, char __e39, char __e38, char __e37, char __e36, char __e35,
-    char __e34, char __e33, char __e32, char __e31, char __e30, char __e29,
-    char __e28, char __e27, char __e26, char __e25, char __e24, char __e23,
-    char __e22, char __e21, char __e20, char __e19, char __e18, char __e17,
-    char __e16, char __e15, char __e14, char __e13, char __e12, char __e11,
-    char __e10, char __e9, char __e8, char __e7, char __e6, char __e5,
-    char __e4, char __e3, char __e2, char __e1, char __e0) {
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi8(
+    char __e63, char __e62, char __e61, char __e60, char __e59, char __e58,
+    char __e57, char __e56, char __e55, char __e54, char __e53, char __e52,
+    char __e51, char __e50, char __e49, char __e48, char __e47, char __e46,
+    char __e45, char __e44, char __e43, char __e42, char __e41, char __e40,
+    char __e39, char __e38, char __e37, char __e36, char __e35, char __e34,
+    char __e33, char __e32, char __e31, char __e30, char __e29, char __e28,
+    char __e27, char __e26, char __e25, char __e24, char __e23, char __e22,
+    char __e21, char __e20, char __e19, char __e18, char __e17, char __e16,
+    char __e15, char __e14, char __e13, char __e12, char __e11, char __e10,
+    char __e9, char __e8, char __e7, char __e6, char __e5, char __e4, char __e3,
+    char __e2, char __e1, char __e0) {
 
   return __extension__ (__m512i)(__v64qi)
     {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
@@ -9228,14 +8816,13 @@ _mm512_set_epi8 (char __e63, char __e62, char __e61, char __e60, char __e59,
      __e56, __e57, __e58, __e59, __e60, __e61, __e62, __e63};
 }
 
-static  __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_set_epi16(short __e31, short __e30, short __e29, short __e28,
-    short __e27, short __e26, short __e25, short __e24, short __e23,
-    short __e22, short __e21, short __e20, short __e19, short __e18,
-    short __e17, short __e16, short __e15, short __e14, short __e13,
-    short __e12, short __e11, short __e10, short __e9, short __e8,
-    short __e7, short __e6, short __e5, short __e4, short __e3,
-    short __e2, short __e1, short __e0) {
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi16(
+    short __e31, short __e30, short __e29, short __e28, short __e27,
+    short __e26, short __e25, short __e24, short __e23, short __e22,
+    short __e21, short __e20, short __e19, short __e18, short __e17,
+    short __e16, short __e15, short __e14, short __e13, short __e12,
+    short __e11, short __e10, short __e9, short __e8, short __e7, short __e6,
+    short __e5, short __e4, short __e3, short __e2, short __e1, short __e0) {
   return __extension__ (__m512i)(__v32hi)
     {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
      __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
@@ -9243,81 +8830,81 @@ _mm512_set_epi16(short __e31, short __e30, short __e29, short __e28,
      __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31 };
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_set_epi32 (int __A, int __B, int __C, int __D,
-     int __E, int __F, int __G, int __H,
-     int __I, int __J, int __K, int __L,
-     int __M, int __N, int __O, int __P)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_set_epi32(
+    int __A, int __B, int __C, int __D, int __E, int __F, int __G, int __H,
+    int __I, int __J, int __K, int __L, int __M, int __N, int __O, int __P) {
   return __extension__ (__m512i)(__v16si)
   { __P, __O, __N, __M, __L, __K, __J, __I,
     __H, __G, __F, __E, __D, __C, __B, __A };
 }
 
-#define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7,           \
-       e8,e9,e10,e11,e12,e13,e14,e15)          \
-  _mm512_set_epi32((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6), \
-                   (e5),(e4),(e3),(e2),(e1),(e0))
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr_epi32(
+    int e0, int e1, int e2, int e3, int e4, int e5, int e6, int e7, int e8,
+    int e9, int e10, int e11, int e12, int e13, int e14, int e15) {
+  return _mm512_set_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4,
+                          e3, e2, e1, e0);
+}
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_set_epi64 (long long __A, long long __B, long long __C,
-     long long __D, long long __E, long long __F,
-     long long __G, long long __H)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_set_epi64(long long __A, long long __B, long long __C, long long __D,
+                 long long __E, long long __F, long long __G, long long __H) {
   return __extension__ (__m512i) (__v8di)
   { __H, __G, __F, __E, __D, __C, __B, __A };
 }
 
-#define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7)           \
-  _mm512_set_epi64((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_setr_epi64(long long e0, long long e1, long long e2, long long e3,
+                  long long e4, long long e5, long long e6, long long e7) {
+  return _mm512_set_epi64(e7, e6, e5, e4, e3, e2, e1, e0);
+}
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_set_pd (double __A, double __B, double __C, double __D,
-        double __E, double __F, double __G, double __H)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_set_pd(double __A, double __B, double __C, double __D, double __E,
+              double __F, double __G, double __H) {
   return __extension__ (__m512d)
   { __H, __G, __F, __E, __D, __C, __B, __A };
 }
 
-#define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7)              \
-  _mm512_set_pd((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_setr_pd(double e0, double e1, double e2, double e3, double e4, double e5,
+               double e6, double e7) {
+  return _mm512_set_pd(e7, e6, e5, e4, e3, e2, e1, e0);
+}
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_set_ps (float __A, float __B, float __C, float __D,
-        float __E, float __F, float __G, float __H,
-        float __I, float __J, float __K, float __L,
-        float __M, float __N, float __O, float __P)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_set_ps(float __A, float __B, float __C, float __D, float __E, float __F,
+              float __G, float __H, float __I, float __J, float __K, float __L,
+              float __M, float __N, float __O, float __P) {
   return __extension__ (__m512)
   { __P, __O, __N, __M, __L, __K, __J, __I,
     __H, __G, __F, __E, __D, __C, __B, __A };
 }
 
-#define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
-  _mm512_set_ps((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6),(e5), \
-                (e4),(e3),(e2),(e1),(e0))
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_setr_ps(float e0, float e1, float e2, float e3, float e4, float e5,
+               float e6, float e7, float e8, float e9, float e10, float e11,
+               float e12, float e13, float e14, float e15) {
+  return _mm512_set_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3,
+                       e2, e1, e0);
+}
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_abs_ps(__m512 __A)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_abs_ps(__m512 __A) {
   return (__m512)_mm512_and_epi32(_mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A) {
   return (__m512)_mm512_mask_and_epi32((__m512i)__W, __K, _mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_abs_pd(__m512d __A)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_abs_pd(__m512d __A) {
   return (__m512d)_mm512_and_epi64(_mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A) ;
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A) {
   return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A);
 }
 
@@ -9337,19 +8924,23 @@ _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
  * This takes log2(n) steps where n is the number of elements in the vector.
  */
 
-static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi64(__m512i __W) {
+static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_reduce_add_epi64(__m512i __W) {
   return __builtin_reduce_add((__v8di)__W);
 }
 
-static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_epi64(__m512i __W) {
+static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_reduce_mul_epi64(__m512i __W) {
   return __builtin_reduce_mul((__v8di)__W);
 }
 
-static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_and_epi64(__m512i __W) {
+static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_reduce_and_epi64(__m512i __W) {
   return __builtin_reduce_and((__v8di)__W);
 }
 
-static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_or_epi64(__m512i __W) {
+static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_reduce_or_epi64(__m512i __W) {
   return __builtin_reduce_or((__v8di)__W);
 }
 
@@ -9400,22 +8991,22 @@ _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W) {
   return __builtin_ia32_reduce_fmul_pd512(1.0, __W);
 }
 
-static __inline__ int __DEFAULT_FN_ATTRS512
+static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_reduce_add_epi32(__m512i __W) {
   return __builtin_reduce_add((__v16si)__W);
 }
 
-static __inline__ int __DEFAULT_FN_ATTRS512
+static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_reduce_mul_epi32(__m512i __W) {
   return __builtin_reduce_mul((__v16si)__W);
 }
 
-static __inline__ int __DEFAULT_FN_ATTRS512
+static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_reduce_and_epi32(__m512i __W) {
   return __builtin_reduce_and((__v16si)__W);
 }
 
-static __inline__ int __DEFAULT_FN_ATTRS512
+static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_reduce_or_epi32(__m512i __W) {
   return __builtin_reduce_or((__v16si)__W);
 }
@@ -9466,22 +9057,22 @@ _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W) {
   return __builtin_ia32_reduce_fmul_ps512(1.0f, __W);
 }
 
-static __inline__ long long __DEFAULT_FN_ATTRS512
+static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_reduce_max_epi64(__m512i __V) {
   return __builtin_reduce_max((__v8di)__V);
 }
 
-static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_reduce_max_epu64(__m512i __V) {
   return __builtin_reduce_max((__v8du)__V);
 }
 
-static __inline__ long long __DEFAULT_FN_ATTRS512
+static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_reduce_min_epi64(__m512i __V) {
   return __builtin_reduce_min((__v8di)__V);
 }
 
-static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_reduce_min_epu64(__m512i __V) {
   return __builtin_reduce_min((__v8du)__V);
 }
@@ -9509,22 +9100,22 @@ _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V) {
   __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-1LL), __M, __V);
   return __builtin_reduce_min((__v8du)__V);
 }
-static __inline__ int __DEFAULT_FN_ATTRS512
+static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_reduce_max_epi32(__m512i __V) {
   return __builtin_reduce_max((__v16si)__V);
 }
 
-static __inline__ unsigned int __DEFAULT_FN_ATTRS512
+static __inline__ unsigned int __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_reduce_max_epu32(__m512i __V) {
   return __builtin_reduce_max((__v16su)__V);
 }
 
-static __inline__ int __DEFAULT_FN_ATTRS512
+static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_reduce_min_epi32(__m512i __V) {
   return __builtin_reduce_min((__v16si)__V);
 }
 
-static __inline__ unsigned int __DEFAULT_FN_ATTRS512
+static __inline__ unsigned int __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_reduce_min_epu32(__m512i __V) {
   return __builtin_reduce_min((__v16su)__V);
 }
diff --git a/lib/include/avx512fp16intrin.h b/lib/include/avx512fp16intrin.h
index 92df320b45..9a1d1930f6 100644
--- a/lib/include/avx512fp16intrin.h
+++ b/lib/include/avx512fp16intrin.h
@@ -22,26 +22,36 @@ typedef _Float16 __m512h_u __attribute__((__vector_size__(64), __aligned__(1)));
 
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS512                                                  \
-  __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx512fp16,evex512"), __min_vector_width__(512)))
+  __attribute__((__always_inline__, __nodebug__, __target__("avx512fp16"),     \
+                 __min_vector_width__(512)))
 #define __DEFAULT_FN_ATTRS256                                                  \
-  __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx512fp16,no-evex512"),                          \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx512fp16"),     \
                  __min_vector_width__(256)))
 #define __DEFAULT_FN_ATTRS128                                                  \
-  __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx512fp16,no-evex512"),                          \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx512fp16"),     \
                  __min_vector_width__(128)))
 
-static __inline__ _Float16 __DEFAULT_FN_ATTRS512 _mm512_cvtsh_h(__m512h __a) {
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 constexpr
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
+#else
+#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
+#endif
+
+static __inline__ _Float16 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_cvtsh_h(__m512h __a) {
   return __a[0];
 }
 
-static __inline __m128h __DEFAULT_FN_ATTRS128 _mm_setzero_ph(void) {
+static __inline __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_setzero_ph(void) {
   return (__m128h){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
 }
 
-static __inline __m256h __DEFAULT_FN_ATTRS256 _mm256_setzero_ph(void) {
+static __inline __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_setzero_ph(void) {
   return (__m256h){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                    0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
 }
@@ -50,7 +60,8 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_undefined_ph(void) {
   return (__m256h)__builtin_ia32_undef256();
 }
 
-static __inline __m512h __DEFAULT_FN_ATTRS512 _mm512_setzero_ph(void) {
+static __inline __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_setzero_ph(void) {
   return (__m512h){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                    0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                    0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
@@ -64,14 +75,15 @@ static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_undefined_ph(void) {
   return (__m512h)__builtin_ia32_undef512();
 }
 
-static __inline __m512h __DEFAULT_FN_ATTRS512 _mm512_set1_ph(_Float16 __h) {
+static __inline __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_set1_ph(_Float16 __h) {
   return (__m512h)(__v32hf){__h, __h, __h, __h, __h, __h, __h, __h,
                             __h, __h, __h, __h, __h, __h, __h, __h,
                             __h, __h, __h, __h, __h, __h, __h, __h,
                             __h, __h, __h, __h, __h, __h, __h, __h};
 }
 
-static __inline __m512h __DEFAULT_FN_ATTRS512
+static __inline __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4,
               _Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8,
               _Float16 __h9, _Float16 __h10, _Float16 __h11, _Float16 __h12,
@@ -87,106 +99,111 @@ _mm512_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4,
                             __h4,  __h3,  __h2,  __h1};
 }
 
-#define _mm512_setr_ph(h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11, h12, h13, \
-                       h14, h15, h16, h17, h18, h19, h20, h21, h22, h23, h24,  \
-                       h25, h26, h27, h28, h29, h30, h31, h32)                 \
-  _mm512_set_ph((h32), (h31), (h30), (h29), (h28), (h27), (h26), (h25), (h24), \
-                (h23), (h22), (h21), (h20), (h19), (h18), (h17), (h16), (h15), \
-                (h14), (h13), (h12), (h11), (h10), (h9), (h8), (h7), (h6),     \
-                (h5), (h4), (h3), (h2), (h1))
+static __inline__ __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setr_ph(
+    _Float16 e0, _Float16 e1, _Float16 e2, _Float16 e3, _Float16 e4,
+    _Float16 e5, _Float16 e6, _Float16 e7, _Float16 e8, _Float16 e9,
+    _Float16 e10, _Float16 e11, _Float16 e12, _Float16 e13, _Float16 e14,
+    _Float16 e15, _Float16 e16, _Float16 e17, _Float16 e18, _Float16 e19,
+    _Float16 e20, _Float16 e21, _Float16 e22, _Float16 e23, _Float16 e24,
+    _Float16 e25, _Float16 e26, _Float16 e27, _Float16 e28, _Float16 e29,
+    _Float16 e30, _Float16 e31) {
+  return _mm512_set_ph(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21,
+                       e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10,
+                       e9, e8, e7, e6, e5, e4, e3, e2, e1, e0);
+}
 
-static __inline __m512h __DEFAULT_FN_ATTRS512
+static __inline __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_set1_pch(_Float16 _Complex __h) {
   return (__m512h)_mm512_set1_ps(__builtin_bit_cast(float, __h));
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_castph_ps(__m128h __a) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_castph_ps(__m128h __a) {
   return (__m128)__a;
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_castph_ps(__m256h __a) {
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_castph_ps(__m256h __a) {
   return (__m256)__a;
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_castph_ps(__m512h __a) {
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castph_ps(__m512h __a) {
   return (__m512)__a;
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_castph_pd(__m128h __a) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_castph_pd(__m128h __a) {
   return (__m128d)__a;
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_castph_pd(__m256h __a) {
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_castph_pd(__m256h __a) {
   return (__m256d)__a;
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_castph_pd(__m512h __a) {
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castph_pd(__m512h __a) {
   return (__m512d)__a;
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_castph_si128(__m128h __a) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_castph_si128(__m128h __a) {
   return (__m128i)__a;
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_castph_si256(__m256h __a) {
   return (__m256i)__a;
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_castph_si512(__m512h __a) {
   return (__m512i)__a;
 }
 
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_castps_ph(__m128 __a) {
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_castps_ph(__m128 __a) {
   return (__m128h)__a;
 }
 
-static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_castps_ph(__m256 __a) {
+static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_castps_ph(__m256 __a) {
   return (__m256h)__a;
 }
 
-static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_castps_ph(__m512 __a) {
+static __inline__ __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castps_ph(__m512 __a) {
   return (__m512h)__a;
 }
 
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_castpd_ph(__m128d __a) {
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_castpd_ph(__m128d __a) {
   return (__m128h)__a;
 }
 
-static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_castpd_ph(__m256d __a) {
+static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_castpd_ph(__m256d __a) {
   return (__m256h)__a;
 }
 
-static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_castpd_ph(__m512d __a) {
+static __inline__ __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_castpd_ph(__m512d __a) {
   return (__m512h)__a;
 }
 
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_castsi128_ph(__m128i __a) {
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_castsi128_ph(__m128i __a) {
   return (__m128h)__a;
 }
 
-static __inline__ __m256h __DEFAULT_FN_ATTRS256
+static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_castsi256_ph(__m256i __a) {
   return (__m256h)__a;
 }
 
-static __inline__ __m512h __DEFAULT_FN_ATTRS512
+static __inline__ __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_castsi512_ph(__m512i __a) {
   return (__m512h)__a;
 }
 
-static __inline__ __m128h __DEFAULT_FN_ATTRS256
+static __inline__ __m128h __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_castph256_ph128(__m256h __a) {
   return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7);
 }
 
-static __inline__ __m128h __DEFAULT_FN_ATTRS512
+static __inline__ __m128h __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_castph512_ph128(__m512h __a) {
   return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7);
 }
 
-static __inline__ __m256h __DEFAULT_FN_ATTRS512
+static __inline__ __m256h __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_castph512_ph256(__m512h __a) {
   return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
                                  12, 13, 14, 15);
@@ -229,7 +246,7 @@ _mm512_castph256_ph512(__m256h __a) {
 ///    A 128-bit vector of [8 x half].
 /// \returns A 512-bit floating-point vector of [16 x half]. The lower 128 bits
 ///    contain the value of the parameter. The upper 384 bits are set to zero.
-static __inline__ __m256h __DEFAULT_FN_ATTRS256
+static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_zextph128_ph256(__m128h __a) {
   return __builtin_shufflevector(__a, (__v8hf)_mm_setzero_ph(), 0, 1, 2, 3, 4,
                                  5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
@@ -248,7 +265,7 @@ _mm256_zextph128_ph256(__m128h __a) {
 ///    A 128-bit vector of [8 x half].
 /// \returns A 512-bit floating-point vector of [32 x half]. The lower 128 bits
 ///    contain the value of the parameter. The upper 384 bits are set to zero.
-static __inline__ __m512h __DEFAULT_FN_ATTRS512
+static __inline__ __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_zextph128_ph512(__m128h __a) {
   return __builtin_shufflevector(
       __a, (__v8hf)_mm_setzero_ph(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
@@ -268,7 +285,7 @@ _mm512_zextph128_ph512(__m128h __a) {
 ///    A 256-bit vector of [16 x half].
 /// \returns A 512-bit floating-point vector of [32 x half]. The lower 256 bits
 ///    contain the value of the parameter. The upper 256 bits are set to zero.
-static __inline__ __m512h __DEFAULT_FN_ATTRS512
+static __inline__ __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_zextph256_ph512(__m256h __a) {
   return __builtin_shufflevector(__a, (__v16hf)_mm256_setzero_ph(), 0, 1, 2, 3,
                                  4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
@@ -482,19 +499,19 @@ _mm512_maskz_div_ph(__mmask32 __U, __m512h __A, __m512h __B) {
       (__mmask32)(U), (__v32hf)_mm512_div_round_ph((A), (B), (R)),             \
       (__v32hf)_mm512_setzero_ph()))
 
-static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_min_ph(__m512h __A,
-                                                              __m512h __B) {
+static __inline__ __m512h
+    __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_min_ph(__m512h __A, __m512h __B) {
   return (__m512h)__builtin_ia32_minph512((__v32hf)__A, (__v32hf)__B,
                                           _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512h __DEFAULT_FN_ATTRS512
+static __inline__ __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_min_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) {
   return (__m512h)__builtin_ia32_selectph_512(
       (__mmask32)__U, (__v32hf)_mm512_min_ph(__A, __B), (__v32hf)__W);
 }
 
-static __inline__ __m512h __DEFAULT_FN_ATTRS512
+static __inline__ __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_min_ph(__mmask32 __U, __m512h __A, __m512h __B) {
   return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U,
                                               (__v32hf)_mm512_min_ph(__A, __B),
@@ -515,19 +532,19 @@ _mm512_maskz_min_ph(__mmask32 __U, __m512h __A, __m512h __B) {
       (__mmask32)(U), (__v32hf)_mm512_min_round_ph((A), (B), (R)),             \
       (__v32hf)_mm512_setzero_ph()))
 
-static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_max_ph(__m512h __A,
-                                                              __m512h __B) {
+static __inline__ __m512h
+    __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_max_ph(__m512h __A, __m512h __B) {
   return (__m512h)__builtin_ia32_maxph512((__v32hf)__A, (__v32hf)__B,
                                           _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512h __DEFAULT_FN_ATTRS512
+static __inline__ __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_max_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) {
   return (__m512h)__builtin_ia32_selectph_512(
       (__mmask32)__U, (__v32hf)_mm512_max_ph(__A, __B), (__v32hf)__W);
 }
 
-static __inline__ __m512h __DEFAULT_FN_ATTRS512
+static __inline__ __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_max_ph(__mmask32 __U, __m512h __A, __m512h __B) {
   return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U,
                                               (__v32hf)_mm512_max_ph(__A, __B),
@@ -548,7 +565,8 @@ _mm512_maskz_max_ph(__mmask32 __U, __m512h __A, __m512h __B) {
       (__mmask32)(U), (__v32hf)_mm512_max_round_ph((A), (B), (R)),             \
       (__v32hf)_mm512_setzero_ph()))
 
-static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_abs_ph(__m512h __A) {
+static __inline__ __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_abs_ph(__m512h __A) {
   return (__m512h)_mm512_and_epi32(_mm512_set1_epi32(0x7FFF7FFF), (__m512i)__A);
 }
 
@@ -570,23 +588,20 @@ _mm512_maskz_conj_pch(__mmask16 __U, __m512h __A) {
                                               (__v16sf)_mm512_setzero_ps());
 }
 
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_add_sh(__m128h __A,
-                                                           __m128h __B) {
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_add_sh(__m128h __A, __m128h __B) {
   __A[0] += __B[0];
   return __A;
 }
 
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_add_sh(__m128h __W,
-                                                                __mmask8 __U,
-                                                                __m128h __A,
-                                                                __m128h __B) {
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_add_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
   __A = _mm_add_sh(__A, __B);
   return __builtin_ia32_selectsh_128(__U, __A, __W);
 }
 
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_add_sh(__mmask8 __U,
-                                                                 __m128h __A,
-                                                                 __m128h __B) {
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_add_sh(__mmask8 __U, __m128h __A, __m128h __B) {
   __A = _mm_add_sh(__A, __B);
   return __builtin_ia32_selectsh_128(__U, __A, _mm_setzero_ph());
 }
@@ -606,23 +621,20 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_add_sh(__mmask8 __U,
       (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(),    \
       (__mmask8)(U), (int)(R)))
 
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_sub_sh(__m128h __A,
-                                                           __m128h __B) {
+static __inline__ __m128h
+    __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_sub_sh(__m128h __A, __m128h __B) {
   __A[0] -= __B[0];
   return __A;
 }
 
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_sub_sh(__m128h __W,
-                                                                __mmask8 __U,
-                                                                __m128h __A,
-                                                                __m128h __B) {
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_sub_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
   __A = _mm_sub_sh(__A, __B);
   return __builtin_ia32_selectsh_128(__U, __A, __W);
 }
 
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_sub_sh(__mmask8 __U,
-                                                                 __m128h __A,
-                                                                 __m128h __B) {
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_sub_sh(__mmask8 __U, __m128h __A, __m128h __B) {
   __A = _mm_sub_sh(__A, __B);
   return __builtin_ia32_selectsh_128(__U, __A, _mm_setzero_ph());
 }
@@ -642,23 +654,20 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_sub_sh(__mmask8 __U,
       (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(),    \
       (__mmask8)(U), (int)(R)))
 
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mul_sh(__m128h __A,
-                                                           __m128h __B) {
+static __inline__ __m128h
+    __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mul_sh(__m128h __A, __m128h __B) {
   __A[0] *= __B[0];
   return __A;
 }
 
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_mul_sh(__m128h __W,
-                                                                __mmask8 __U,
-                                                                __m128h __A,
-                                                                __m128h __B) {
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_mul_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
   __A = _mm_mul_sh(__A, __B);
   return __builtin_ia32_selectsh_128(__U, __A, __W);
 }
 
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_mul_sh(__mmask8 __U,
-                                                                 __m128h __A,
-                                                                 __m128h __B) {
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_mul_sh(__mmask8 __U, __m128h __A, __m128h __B) {
   __A = _mm_mul_sh(__A, __B);
   return __builtin_ia32_selectsh_128(__U, __A, _mm_setzero_ph());
 }
@@ -678,23 +687,20 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_mul_sh(__mmask8 __U,
       (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(),    \
       (__mmask8)(U), (int)(R)))
 
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_div_sh(__m128h __A,
-                                                           __m128h __B) {
+static __inline__ __m128h
+    __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_div_sh(__m128h __A, __m128h __B) {
   __A[0] /= __B[0];
   return __A;
 }
 
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_div_sh(__m128h __W,
-                                                                __mmask8 __U,
-                                                                __m128h __A,
-                                                                __m128h __B) {
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_div_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
   __A = _mm_div_sh(__A, __B);
   return __builtin_ia32_selectsh_128(__U, __A, __W);
 }
 
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_div_sh(__mmask8 __U,
-                                                                 __m128h __A,
-                                                                 __m128h __B) {
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_div_sh(__mmask8 __U, __m128h __A, __m128h __B) {
   __A = _mm_div_sh(__A, __B);
   return __builtin_ia32_selectsh_128(__U, __A, _mm_setzero_ph());
 }
@@ -942,22 +948,19 @@ static __inline__ void __DEFAULT_FN_ATTRS128 _mm_storeu_ph(void *__P,
 }
 
 // moves with vmovsh:
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_move_sh(__m128h __a,
-                                                            __m128h __b) {
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_move_sh(__m128h __a, __m128h __b) {
   __a[0] = __b[0];
   return __a;
 }
 
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_move_sh(__m128h __W,
-                                                                 __mmask8 __U,
-                                                                 __m128h __A,
-                                                                 __m128h __B) {
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_move_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
   return __builtin_ia32_selectsh_128(__U, _mm_move_sh(__A, __B), __W);
 }
 
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_move_sh(__mmask8 __U,
-                                                                  __m128h __A,
-                                                                  __m128h __B) {
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_move_sh(__mmask8 __U, __m128h __A, __m128h __B) {
   return __builtin_ia32_selectsh_128(__U, _mm_move_sh(__A, __B),
                                      _mm_setzero_ph());
 }
@@ -1383,24 +1386,20 @@ _mm_maskz_scalef_sh(__mmask8 __U, __m128h __A, __m128h __B) {
       (__v32hf)_mm512_setzero_ph()))
 
 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_sqrt_ph(__m512h __A) {
-  return (__m512h)__builtin_ia32_sqrtph512((__v32hf)__A,
-                                           _MM_FROUND_CUR_DIRECTION);
+  return (__m512h)__builtin_elementwise_sqrt((__v32hf)__A);
 }
 
 static __inline__ __m512h __DEFAULT_FN_ATTRS512
 _mm512_mask_sqrt_ph(__m512h __W, __mmask32 __U, __m512h __A) {
   return (__m512h)__builtin_ia32_selectph_512(
-      (__mmask32)(__U),
-      (__v32hf)__builtin_ia32_sqrtph512((__A), (_MM_FROUND_CUR_DIRECTION)),
-      (__v32hf)(__m512h)(__W));
+      (__mmask32)(__U), (__v32hf)_mm512_sqrt_ph(__A), (__v32hf)(__m512h)(__W));
 }
 
 static __inline__ __m512h __DEFAULT_FN_ATTRS512
 _mm512_maskz_sqrt_ph(__mmask32 __U, __m512h __A) {
-  return (__m512h)__builtin_ia32_selectph_512(
-      (__mmask32)(__U),
-      (__v32hf)__builtin_ia32_sqrtph512((__A), (_MM_FROUND_CUR_DIRECTION)),
-      (__v32hf)_mm512_setzero_ph());
+  return (__m512h)__builtin_ia32_selectph_512((__mmask32)(__U),
+                                              (__v32hf)_mm512_sqrt_ph(__A),
+                                              (__v32hf)_mm512_setzero_ph());
 }
 
 #define _mm_sqrt_round_sh(A, B, R)                                             \
@@ -3292,19 +3291,19 @@ _mm512_reduce_min_ph(__m512h __V) {
   return __builtin_ia32_reduce_fmin_ph512(__V);
 }
 
-static __inline__ __m512h __DEFAULT_FN_ATTRS512
+static __inline__ __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_blend_ph(__mmask32 __U, __m512h __A, __m512h __W) {
   return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U, (__v32hf)__W,
                                               (__v32hf)__A);
 }
 
-static __inline__ __m512h __DEFAULT_FN_ATTRS512
+static __inline__ __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_permutex2var_ph(__m512h __A, __m512i __I, __m512h __B) {
   return (__m512h)__builtin_ia32_vpermi2varhi512((__v32hi)__A, (__v32hi)__I,
                                                  (__v32hi)__B);
 }
 
-static __inline__ __m512h __DEFAULT_FN_ATTRS512
+static __inline__ __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_permutexvar_ph(__m512i __A, __m512h __B) {
   return (__m512h)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A);
 }
@@ -3348,6 +3347,9 @@ _mm512_permutexvar_ph(__m512i __A, __m512h __B) {
 #undef __DEFAULT_FN_ATTRS128
 #undef __DEFAULT_FN_ATTRS256
 #undef __DEFAULT_FN_ATTRS512
+#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
+#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
+#undef __DEFAULT_FN_ATTRS512_CONSTEXPR
 
 #endif
 #endif
diff --git a/lib/include/avx512ifmaintrin.h b/lib/include/avx512ifmaintrin.h
index 9468d17556..f73b607df7 100644
--- a/lib/include/avx512ifmaintrin.h
+++ b/lib/include/avx512ifmaintrin.h
@@ -15,54 +15,52 @@
 #define __IFMAINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
 #define __DEFAULT_FN_ATTRS                                                     \
-  __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx512ifma,evex512"), __min_vector_width__(512)))
+  __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma"),     \
+                 __min_vector_width__(512))) constexpr
+#else
+#define __DEFAULT_FN_ATTRS                                                     \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma"),     \
+                 __min_vector_width__(512)))
+#endif
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_madd52hi_epu64 (__m512i __X, __m512i __Y, __m512i __Z)
-{
-  return (__m512i)__builtin_ia32_vpmadd52huq512((__v8di) __X, (__v8di) __Y,
-                                                (__v8di) __Z);
+_mm512_madd52hi_epu64(__m512i __X, __m512i __Y, __m512i __Z) {
+  return (__m512i)__builtin_ia32_vpmadd52huq512((__v8di)__X, (__v8di)__Y,
+                                                (__v8di)__Z);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_madd52hi_epu64(
+    __m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) {
+  return (__m512i)__builtin_ia32_selectq_512(
+      __M, (__v8di)_mm512_madd52hi_epu64(__W, __X, __Y), (__v8di)__W);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_madd52hi_epu64(
+    __mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z) {
+  return (__m512i)__builtin_ia32_selectq_512(
+      __M, (__v8di)_mm512_madd52hi_epu64(__X, __Y, __Z),
+      (__v8di)_mm512_setzero_si512());
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_madd52hi_epu64 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
-{
-  return (__m512i)__builtin_ia32_selectq_512(__M,
-                                   (__v8di)_mm512_madd52hi_epu64(__W, __X, __Y),
-                                   (__v8di)__W);
+_mm512_madd52lo_epu64(__m512i __X, __m512i __Y, __m512i __Z) {
+  return (__m512i)__builtin_ia32_vpmadd52luq512((__v8di)__X, (__v8di)__Y,
+                                                (__v8di)__Z);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_madd52hi_epu64 (__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z)
-{
-  return (__m512i)__builtin_ia32_selectq_512(__M,
-                                   (__v8di)_mm512_madd52hi_epu64(__X, __Y, __Z),
-                                   (__v8di)_mm512_setzero_si512());
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_madd52lo_epu64(
+    __m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) {
+  return (__m512i)__builtin_ia32_selectq_512(
+      __M, (__v8di)_mm512_madd52lo_epu64(__W, __X, __Y), (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_madd52lo_epu64 (__m512i __X, __m512i __Y, __m512i __Z)
-{
-  return (__m512i)__builtin_ia32_vpmadd52luq512((__v8di) __X, (__v8di) __Y,
-                                                (__v8di) __Z);
-}
-
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_madd52lo_epu64 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
-{
-  return (__m512i)__builtin_ia32_selectq_512(__M,
-                                   (__v8di)_mm512_madd52lo_epu64(__W, __X, __Y),
-                                   (__v8di)__W);
-}
-
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_madd52lo_epu64 (__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z)
-{
-  return (__m512i)__builtin_ia32_selectq_512(__M,
-                                   (__v8di)_mm512_madd52lo_epu64(__X, __Y, __Z),
-                                   (__v8di)_mm512_setzero_si512());
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_madd52lo_epu64(
+    __mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z) {
+  return (__m512i)__builtin_ia32_selectq_512(
+      __M, (__v8di)_mm512_madd52lo_epu64(__X, __Y, __Z),
+      (__v8di)_mm512_setzero_si512());
 }
 
 #undef __DEFAULT_FN_ATTRS
diff --git a/lib/include/avx512ifmavlintrin.h b/lib/include/avx512ifmavlintrin.h
index 8787cd471d..51d5210e5a 100644
--- a/lib/include/avx512ifmavlintrin.h
+++ b/lib/include/avx512ifmavlintrin.h
@@ -8,22 +8,35 @@
  *===-----------------------------------------------------------------------===
  */
 #ifndef __IMMINTRIN_H
-#error "Never use <avx512ifmavlintrin.h> directly; include <immintrin.h> instead."
+#error                                                                         \
+    "Never use <avx512ifmavlintrin.h> directly; include <immintrin.h> instead."
 #endif
 
 #ifndef __IFMAVLINTRIN_H
 #define __IFMAVLINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
 #define __DEFAULT_FN_ATTRS128                                                  \
   __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx512ifma,avx512vl,no-evex512"),                 \
+                 __target__("avx512ifma,avx512vl"),                            \
+                 __min_vector_width__(128))) constexpr
+#define __DEFAULT_FN_ATTRS256                                                  \
+  __attribute__((__always_inline__, __nodebug__,                               \
+                 __target__("avx512ifma,avx512vl"),                            \
+                 __min_vector_width__(256))) constexpr
+#else
+#define __DEFAULT_FN_ATTRS128                                                  \
+  __attribute__((__always_inline__, __nodebug__,                               \
+                 __target__("avx512ifma,avx512vl"),                            \
                  __min_vector_width__(128)))
 #define __DEFAULT_FN_ATTRS256                                                  \
   __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx512ifma,avx512vl,no-evex512"),                 \
+                 __target__("avx512ifma,avx512vl"),                            \
                  __min_vector_width__(256)))
+#endif
 
+#if !(defined(__AVXIFMA__) || defined(__AVX512IFMA__))
 #define _mm_madd52hi_epu64(X, Y, Z)                                            \
   ((__m128i)__builtin_ia32_vpmadd52huq128((__v2di)(X), (__v2di)(Y),            \
                                           (__v2di)(Z)))
@@ -39,71 +52,85 @@
 #define _mm256_madd52lo_epu64(X, Y, Z)                                         \
   ((__m256i)__builtin_ia32_vpmadd52luq256((__v4di)(X), (__v4di)(Y),            \
                                           (__v4di)(Z)))
+#endif
 
+#if defined(__AVX512IFMA__)
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_madd52hi_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
-{
-  return (__m128i)__builtin_ia32_selectq_128(__M,
-                                      (__v2di)_mm_madd52hi_epu64(__W, __X, __Y),
-                                      (__v2di)__W);
-}
-
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_madd52hi_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z)
-{
-  return (__m128i)__builtin_ia32_selectq_128(__M,
-                                      (__v2di)_mm_madd52hi_epu64(__X, __Y, __Z),
-                                      (__v2di)_mm_setzero_si128());
+_mm_madd52hi_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
+  return (__m128i)__builtin_ia32_vpmadd52huq128((__v2di)__X, (__v2di)__Y,
+                                                (__v2di)__Z);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_madd52hi_epu64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
-{
-  return (__m256i)__builtin_ia32_selectq_256(__M,
-                                   (__v4di)_mm256_madd52hi_epu64(__W, __X, __Y),
-                                   (__v4di)__W);
-}
-
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_madd52hi_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z)
-{
-  return (__m256i)__builtin_ia32_selectq_256(__M,
-                                   (__v4di)_mm256_madd52hi_epu64(__X, __Y, __Z),
-                                   (__v4di)_mm256_setzero_si256());
+_mm256_madd52hi_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
+  return (__m256i)__builtin_ia32_vpmadd52huq256((__v4di)__X, (__v4di)__Y,
+                                                (__v4di)__Z);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_madd52lo_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
-{
-  return (__m128i)__builtin_ia32_selectq_128(__M,
-                                      (__v2di)_mm_madd52lo_epu64(__W, __X, __Y),
-                                      (__v2di)__W);
+_mm_madd52lo_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
+  return (__m128i)__builtin_ia32_vpmadd52luq128((__v2di)__X, (__v2di)__Y,
+                                                (__v2di)__Z);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_madd52lo_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
+  return (__m256i)__builtin_ia32_vpmadd52luq256((__v4di)__X, (__v4di)__Y,
+                                                (__v4di)__Z);
+}
+#endif
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_madd52hi_epu64(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) {
+  return (__m128i)__builtin_ia32_selectq_128(
+      __M, (__v2di)__builtin_ia32_vpmadd52huq128(__W, __X, __Y), (__v2di)__W);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_madd52lo_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z)
-{
-  return (__m128i)__builtin_ia32_selectq_128(__M,
-                                      (__v2di)_mm_madd52lo_epu64(__X, __Y, __Z),
-                                      (__v2di)_mm_setzero_si128());
+_mm_maskz_madd52hi_epu64(__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) {
+  return (__m128i)__builtin_ia32_selectq_128(
+      __M, (__v2di)__builtin_ia32_vpmadd52huq128(__X, __Y, __Z),
+      (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_madd52lo_epu64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
-{
-  return (__m256i)__builtin_ia32_selectq_256(__M,
-                                   (__v4di)_mm256_madd52lo_epu64(__W, __X, __Y),
-                                   (__v4di)__W);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_madd52hi_epu64(
+    __m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) {
+  return (__m256i)__builtin_ia32_selectq_256(
+      __M, (__v4di)__builtin_ia32_vpmadd52huq256(__W, __X, __Y), (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_madd52lo_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z)
-{
-  return (__m256i)__builtin_ia32_selectq_256(__M,
-                                   (__v4di)_mm256_madd52lo_epu64(__X, __Y, __Z),
-                                   (__v4di)_mm256_setzero_si256());
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_madd52hi_epu64(
+    __mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) {
+  return (__m256i)__builtin_ia32_selectq_256(
+      __M, (__v4di)__builtin_ia32_vpmadd52huq256(__X, __Y, __Z),
+      (__v4di)_mm256_setzero_si256());
 }
 
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_madd52lo_epu64(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) {
+  return (__m128i)__builtin_ia32_selectq_128(
+      __M, (__v2di)__builtin_ia32_vpmadd52luq128(__W, __X, __Y), (__v2di)__W);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_madd52lo_epu64(__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) {
+  return (__m128i)__builtin_ia32_selectq_128(
+      __M, (__v2di)__builtin_ia32_vpmadd52luq128(__X, __Y, __Z),
+      (__v2di)_mm_setzero_si128());
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_madd52lo_epu64(
+    __m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) {
+  return (__m256i)__builtin_ia32_selectq_256(
+      __M, (__v4di)__builtin_ia32_vpmadd52luq256(__W, __X, __Y), (__v4di)__W);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_madd52lo_epu64(
+    __mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) {
+  return (__m256i)__builtin_ia32_selectq_256(
+      __M, (__v4di)__builtin_ia32_vpmadd52luq256(__X, __Y, __Z),
+      (__v4di)_mm256_setzero_si256());
+}
 
 #undef __DEFAULT_FN_ATTRS128
 #undef __DEFAULT_FN_ATTRS256
diff --git a/lib/include/avx512vbmi2intrin.h b/lib/include/avx512vbmi2intrin.h
index 11598c8887..a24b6e5921 100644
--- a/lib/include/avx512vbmi2intrin.h
+++ b/lib/include/avx512vbmi2intrin.h
@@ -15,8 +15,15 @@
 #define __AVX512VBMI2INTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2,evex512"), __min_vector_width__(512)))
+#define __DEFAULT_FN_ATTRS                                                     \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2"),    \
+                 __min_vector_width__(512)))
 
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
+#else
+#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
+#endif
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_compress_epi16(__m512i __S, __mmask32 __U, __m512i __D)
@@ -212,14 +219,14 @@ _mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P)
                                     (__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \
                                     (__v32hi)_mm512_setzero_si512()))
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm512_shldv_epi64(__m512i __A, __m512i __B, __m512i __C)
 {
-  return (__m512i)__builtin_ia32_vpshldvq512((__v8di)__A, (__v8di)__B,
-                                             (__v8di)__C);
+  return (__m512i)__builtin_elementwise_fshl((__v8du)__A, (__v8du)__B,
+                                             (__v8du)__C);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm512_mask_shldv_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C)
 {
   return (__m512i)__builtin_ia32_selectq_512(__U,
@@ -227,7 +234,7 @@ _mm512_mask_shldv_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C)
                                       (__v8di)__A);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C)
 {
   return (__m512i)__builtin_ia32_selectq_512(__U,
@@ -235,14 +242,14 @@ _mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C)
                                       (__v8di)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm512_shldv_epi32(__m512i __A, __m512i __B, __m512i __C)
 {
-  return (__m512i)__builtin_ia32_vpshldvd512((__v16si)__A, (__v16si)__B,
-                                             (__v16si)__C);
+  return (__m512i)__builtin_elementwise_fshl((__v16su)__A, (__v16su)__B,
+                                             (__v16su)__C);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm512_mask_shldv_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C)
 {
   return (__m512i)__builtin_ia32_selectd_512(__U,
@@ -250,7 +257,7 @@ _mm512_mask_shldv_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C)
                                      (__v16si)__A);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C)
 {
   return (__m512i)__builtin_ia32_selectd_512(__U,
@@ -258,14 +265,14 @@ _mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C)
                                      (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm512_shldv_epi16(__m512i __A, __m512i __B, __m512i __C)
 {
-  return (__m512i)__builtin_ia32_vpshldvw512((__v32hi)__A, (__v32hi)__B,
-                                             (__v32hi)__C);
+  return (__m512i)__builtin_elementwise_fshl((__v32hu)__A, (__v32hu)__B,
+                                             (__v32hu)__C);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm512_mask_shldv_epi16(__m512i __A, __mmask32 __U, __m512i __B, __m512i __C)
 {
   return (__m512i)__builtin_ia32_selectw_512(__U,
@@ -273,7 +280,7 @@ _mm512_mask_shldv_epi16(__m512i __A, __mmask32 __U, __m512i __B, __m512i __C)
                                      (__v32hi)__A);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __A, __m512i __B, __m512i __C)
 {
   return (__m512i)__builtin_ia32_selectw_512(__U,
@@ -281,14 +288,15 @@ _mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __A, __m512i __B, __m512i __C)
                                      (__v32hi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm512_shrdv_epi64(__m512i __A, __m512i __B, __m512i __C)
 {
-  return (__m512i)__builtin_ia32_vpshrdvq512((__v8di)__A, (__v8di)__B,
-                                             (__v8di)__C);
+  // Ops __A and __B are swapped.
+  return (__m512i)__builtin_elementwise_fshr((__v8du)__B, (__v8du)__A,
+                                             (__v8du)__C);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm512_mask_shrdv_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C)
 {
   return (__m512i)__builtin_ia32_selectq_512(__U,
@@ -296,7 +304,7 @@ _mm512_mask_shrdv_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C)
                                       (__v8di)__A);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C)
 {
   return (__m512i)__builtin_ia32_selectq_512(__U,
@@ -304,14 +312,15 @@ _mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C)
                                       (__v8di)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm512_shrdv_epi32(__m512i __A, __m512i __B, __m512i __C)
 {
-  return (__m512i)__builtin_ia32_vpshrdvd512((__v16si)__A, (__v16si)__B,
-                                             (__v16si)__C);
+  // Ops __A and __B are swapped.
+  return (__m512i)__builtin_elementwise_fshr((__v16su)__B, (__v16su)__A,
+                                             (__v16su)__C);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm512_mask_shrdv_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C)
 {
   return (__m512i) __builtin_ia32_selectd_512(__U,
@@ -319,7 +328,7 @@ _mm512_mask_shrdv_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C)
                                      (__v16si)__A);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C)
 {
   return (__m512i) __builtin_ia32_selectd_512(__U,
@@ -327,14 +336,15 @@ _mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C)
                                      (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm512_shrdv_epi16(__m512i __A, __m512i __B, __m512i __C)
 {
-  return (__m512i)__builtin_ia32_vpshrdvw512((__v32hi)__A, (__v32hi)__B,
-                                             (__v32hi)__C);
+  // Ops __A and __B are swapped.
+  return (__m512i)__builtin_elementwise_fshr((__v32hu)__B, (__v32hu)__A,
+                                             (__v32hu)__C);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm512_mask_shrdv_epi16(__m512i __A, __mmask32 __U, __m512i __B, __m512i __C)
 {
   return (__m512i)__builtin_ia32_selectw_512(__U,
@@ -342,7 +352,7 @@ _mm512_mask_shrdv_epi16(__m512i __A, __mmask32 __U, __m512i __B, __m512i __C)
                                      (__v32hi)__A);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __A, __m512i __B, __m512i __C)
 {
   return (__m512i)__builtin_ia32_selectw_512(__U,
@@ -352,6 +362,7 @@ _mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __A, __m512i __B, __m512i __C)
 
 
 #undef __DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS_CONSTEXPR
 
 #endif
 
diff --git a/lib/include/avx512vbmiintrin.h b/lib/include/avx512vbmiintrin.h
index e47cd5cadd..5ac78f0849 100644
--- a/lib/include/avx512vbmiintrin.h
+++ b/lib/include/avx512vbmiintrin.h
@@ -15,63 +15,57 @@
 #define __VBMIINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
 #define __DEFAULT_FN_ATTRS                                                     \
-  __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx512vbmi,evex512"), __min_vector_width__(512)))
+  __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi"),     \
+                 __min_vector_width__(512))) constexpr
+#else
+#define __DEFAULT_FN_ATTRS                                                     \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi"),     \
+                 __min_vector_width__(512)))
+#endif
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_permutex2var_epi8(__m512i __A, __m512i __I, __m512i __B)
-{
+_mm512_permutex2var_epi8(__m512i __A, __m512i __I, __m512i __B) {
   return (__m512i)__builtin_ia32_vpermi2varqi512((__v64qi)__A, (__v64qi)__I,
                                                  (__v64qi) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_permutex2var_epi8(__m512i __A, __mmask64 __U, __m512i __I,
-                              __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_permutex2var_epi8(
+    __m512i __A, __mmask64 __U, __m512i __I, __m512i __B) {
   return (__m512i)__builtin_ia32_selectb_512(__U,
                                (__v64qi)_mm512_permutex2var_epi8(__A, __I, __B),
                                (__v64qi)__A);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask2_permutex2var_epi8(__m512i __A, __m512i __I, __mmask64 __U,
-                               __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask2_permutex2var_epi8(
+    __m512i __A, __m512i __I, __mmask64 __U, __m512i __B) {
   return (__m512i)__builtin_ia32_selectb_512(__U,
                                (__v64qi)_mm512_permutex2var_epi8(__A, __I, __B),
                                (__v64qi)__I);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_permutex2var_epi8(__mmask64 __U, __m512i __A, __m512i __I,
-                               __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_permutex2var_epi8(
+    __mmask64 __U, __m512i __A, __m512i __I, __m512i __B) {
   return (__m512i)__builtin_ia32_selectb_512(__U,
                                (__v64qi)_mm512_permutex2var_epi8(__A, __I, __B),
                                (__v64qi)_mm512_setzero_si512());
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_permutexvar_epi8 (__m512i __A, __m512i __B)
-{
+_mm512_permutexvar_epi8(__m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_permvarqi512((__v64qi) __B, (__v64qi) __A);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_permutexvar_epi8 (__mmask64 __M, __m512i __A,
-        __m512i __B)
-{
+_mm512_maskz_permutexvar_epi8(__mmask64 __M, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
                                      (__v64qi)_mm512_permutexvar_epi8(__A, __B),
                                      (__v64qi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_permutexvar_epi8 (__m512i __W, __mmask64 __M, __m512i __A,
-             __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_permutexvar_epi8(
+    __m512i __W, __mmask64 __M, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
                                      (__v64qi)_mm512_permutexvar_epi8(__A, __B),
                                      (__v64qi)__W);
@@ -100,7 +94,5 @@ _mm512_maskz_multishift_epi64_epi8(__mmask64 __M, __m512i __X, __m512i __Y)
                                 (__v64qi)_mm512_setzero_si512());
 }
 
-
 #undef __DEFAULT_FN_ATTRS
-
 #endif
diff --git a/lib/include/avx512vbmivlintrin.h b/lib/include/avx512vbmivlintrin.h
index 848ca2d18c..40a67bd63c 100644
--- a/lib/include/avx512vbmivlintrin.h
+++ b/lib/include/avx512vbmivlintrin.h
@@ -15,126 +15,114 @@
 #define __VBMIVLINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
 #define __DEFAULT_FN_ATTRS128                                                  \
   __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx512vbmi,avx512vl,no-evex512"),                 \
+                 __target__("avx512vbmi,avx512vl"),                            \
+                 __min_vector_width__(128))) constexpr
+#define __DEFAULT_FN_ATTRS256                                                  \
+  __attribute__((__always_inline__, __nodebug__,                               \
+                 __target__("avx512vbmi,avx512vl"),                            \
+                 __min_vector_width__(256))) constexpr
+#else
+#define __DEFAULT_FN_ATTRS128                                                  \
+  __attribute__((__always_inline__, __nodebug__,                               \
+                 __target__("avx512vbmi,avx512vl"),                            \
                  __min_vector_width__(128)))
 #define __DEFAULT_FN_ATTRS256                                                  \
   __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx512vbmi,avx512vl,no-evex512"),                 \
+                 __target__("avx512vbmi,avx512vl"),                            \
                  __min_vector_width__(256)))
+#endif
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_permutex2var_epi8(__m128i __A, __m128i __I, __m128i __B)
-{
+_mm_permutex2var_epi8(__m128i __A, __m128i __I, __m128i __B) {
   return (__m128i)__builtin_ia32_vpermi2varqi128((__v16qi)__A,
                                                  (__v16qi)__I,
                                                  (__v16qi)__B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_permutex2var_epi8(__m128i __A, __mmask16 __U, __m128i __I,
-                           __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_epi8(
+    __m128i __A, __mmask16 __U, __m128i __I, __m128i __B) {
   return (__m128i)__builtin_ia32_selectb_128(__U,
                                   (__v16qi)_mm_permutex2var_epi8(__A, __I, __B),
                                   (__v16qi)__A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask2_permutex2var_epi8(__m128i __A, __m128i __I, __mmask16 __U,
-                            __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_epi8(
+    __m128i __A, __m128i __I, __mmask16 __U, __m128i __B) {
   return (__m128i)__builtin_ia32_selectb_128(__U,
                                   (__v16qi)_mm_permutex2var_epi8(__A, __I, __B),
                                   (__v16qi)__I);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_permutex2var_epi8(__mmask16 __U, __m128i __A, __m128i __I,
-                            __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_epi8(
+    __mmask16 __U, __m128i __A, __m128i __I, __m128i __B) {
   return (__m128i)__builtin_ia32_selectb_128(__U,
                                   (__v16qi)_mm_permutex2var_epi8(__A, __I, __B),
                                   (__v16qi)_mm_setzero_si128());
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_permutex2var_epi8(__m256i __A, __m256i __I, __m256i __B)
-{
+_mm256_permutex2var_epi8(__m256i __A, __m256i __I, __m256i __B) {
   return (__m256i)__builtin_ia32_vpermi2varqi256((__v32qi)__A, (__v32qi)__I,
                                                  (__v32qi)__B);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_permutex2var_epi8(__m256i __A, __mmask32 __U, __m256i __I,
-                              __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_epi8(
+    __m256i __A, __mmask32 __U, __m256i __I, __m256i __B) {
   return (__m256i)__builtin_ia32_selectb_256(__U,
                                (__v32qi)_mm256_permutex2var_epi8(__A, __I, __B),
                                (__v32qi)__A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask2_permutex2var_epi8(__m256i __A, __m256i __I, __mmask32 __U,
-                               __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_epi8(
+    __m256i __A, __m256i __I, __mmask32 __U, __m256i __B) {
   return (__m256i)__builtin_ia32_selectb_256(__U,
                                (__v32qi)_mm256_permutex2var_epi8(__A, __I, __B),
                                (__v32qi)__I);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_permutex2var_epi8(__mmask32 __U, __m256i __A, __m256i __I,
-                               __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_epi8(
+    __mmask32 __U, __m256i __A, __m256i __I, __m256i __B) {
   return (__m256i)__builtin_ia32_selectb_256(__U,
                                (__v32qi)_mm256_permutex2var_epi8(__A, __I, __B),
                                (__v32qi)_mm256_setzero_si256());
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_permutexvar_epi8 (__m128i __A, __m128i __B)
-{
+_mm_permutexvar_epi8(__m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_permvarqi128((__v16qi)__B, (__v16qi)__A);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_permutexvar_epi8 (__mmask16 __M, __m128i __A, __m128i __B)
-{
+_mm_maskz_permutexvar_epi8(__mmask16 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
                                         (__v16qi)_mm_permutexvar_epi8(__A, __B),
                                         (__v16qi)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_permutexvar_epi8 (__m128i __W, __mmask16 __M, __m128i __A,
-          __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutexvar_epi8(
+    __m128i __W, __mmask16 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
                                         (__v16qi)_mm_permutexvar_epi8(__A, __B),
                                         (__v16qi)__W);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_permutexvar_epi8 (__m256i __A, __m256i __B)
-{
+_mm256_permutexvar_epi8(__m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_permvarqi256((__v32qi) __B, (__v32qi) __A);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_permutexvar_epi8 (__mmask32 __M, __m256i __A,
-        __m256i __B)
-{
+_mm256_maskz_permutexvar_epi8(__mmask32 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
                                      (__v32qi)_mm256_permutexvar_epi8(__A, __B),
                                      (__v32qi)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_permutexvar_epi8 (__m256i __W, __mmask32 __M, __m256i __A,
-             __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_epi8(
+    __m256i __W, __mmask32 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
                                      (__v32qi)_mm256_permutexvar_epi8(__A, __B),
                                      (__v32qi)__W);
@@ -186,8 +174,6 @@ _mm256_maskz_multishift_epi64_epi8(__mmask32 __M, __m256i __X, __m256i __Y)
                                 (__v32qi)_mm256_setzero_si256());
 }
 
-
 #undef __DEFAULT_FN_ATTRS128
 #undef __DEFAULT_FN_ATTRS256
-
 #endif
diff --git a/lib/include/avx512vlbf16intrin.h b/lib/include/avx512vlbf16intrin.h
index 89c9f49c7a..8543402065 100644
--- a/lib/include/avx512vlbf16intrin.h
+++ b/lib/include/avx512vlbf16intrin.h
@@ -17,13 +17,21 @@
 
 #define __DEFAULT_FN_ATTRS128                                                  \
   __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx512vl,avx512bf16,no-evex512"),                 \
+                 __target__("avx512vl,avx512bf16"),                            \
                  __min_vector_width__(128)))
 #define __DEFAULT_FN_ATTRS256                                                  \
   __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx512vl,avx512bf16,no-evex512"),                 \
+                 __target__("avx512vl,avx512bf16"),                            \
                  __min_vector_width__(256)))
 
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
+#else
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
+#endif
+
 /// Convert Two Packed Single Data to One Packed BF16 Data.
 ///
 /// \headerfile <x86intrin.h>
@@ -421,9 +429,10 @@ static __inline__ __bf16 __DEFAULT_FN_ATTRS128 _mm_cvtness_sbh(float __A) {
 /// \param __A
 ///    A 128-bit vector of [4 x bfloat].
 /// \returns A 128-bit vector of [4 x float] come from conversion of __A
-static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtpbh_ps(__m128bh __A) {
-  return _mm_castsi128_ps(
-      (__m128i)_mm_slli_epi32((__m128i)_mm_cvtepi16_epi32((__m128i)__A), 16));
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_cvtpbh_ps(__m128bh __A) {
+  return (__m128)_mm256_castps256_ps128(
+      (__m256) __builtin_convertvector(__A, __v8sf));
 }
 
 /// Convert Packed BF16 Data to Packed float Data.
@@ -433,9 +442,9 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtpbh_ps(__m128bh __A) {
 /// \param __A
 ///    A 128-bit vector of [8 x bfloat].
 /// \returns A 256-bit vector of [8 x float] come from conversion of __A
-static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtpbh_ps(__m128bh __A) {
-  return _mm256_castsi256_ps((__m256i)_mm256_slli_epi32(
-      (__m256i)_mm256_cvtepi16_epi32((__m128i)__A), 16));
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_cvtpbh_ps(__m128bh __A) {
+  return (__m256) __builtin_convertvector(__A, __v8sf);
 }
 
 /// Convert Packed BF16 Data to Packed float Data using zeroing mask.
@@ -448,10 +457,10 @@ static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtpbh_ps(__m128bh __A) {
 /// \param __A
 ///    A 128-bit vector of [4 x bfloat].
 /// \returns A 128-bit vector of [4 x float] come from conversion of __A
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_cvtpbh_ps(__mmask8 __U, __m128bh __A) {
-  return _mm_castsi128_ps((__m128i)_mm_slli_epi32(
-      (__m128i)_mm_maskz_cvtepi16_epi32((__mmask8)__U, (__m128i)__A), 16));
+  return (__m128)__builtin_ia32_selectps_128(
+      (__mmask8)__U, (__v4sf)_mm_cvtpbh_ps(__A), (__v4sf)_mm_setzero_ps());
 }
 
 /// Convert Packed BF16 Data to Packed float Data using zeroing mask.
@@ -464,10 +473,11 @@ _mm_maskz_cvtpbh_ps(__mmask8 __U, __m128bh __A) {
 /// \param __A
 ///    A 128-bit vector of [8 x bfloat].
 /// \returns A 256-bit vector of [8 x float] come from conversion of __A
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_cvtpbh_ps(__mmask8 __U, __m128bh __A) {
-  return _mm256_castsi256_ps((__m256i)_mm256_slli_epi32(
-      (__m256i)_mm256_maskz_cvtepi16_epi32((__mmask8)__U, (__m128i)__A), 16));
+  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
+                                             (__v8sf)_mm256_cvtpbh_ps(__A),
+                                             (__v8sf)_mm256_setzero_ps());
 }
 
 /// Convert Packed BF16 Data to Packed float Data using merging mask.
@@ -483,11 +493,10 @@ _mm256_maskz_cvtpbh_ps(__mmask8 __U, __m128bh __A) {
 /// \param __A
 ///    A 128-bit vector of [4 x bfloat].
 /// \returns A 128-bit vector of [4 x float] come from conversion of __A
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_cvtpbh_ps(__m128 __S, __mmask8 __U, __m128bh __A) {
-  return _mm_castsi128_ps((__m128i)_mm_mask_slli_epi32(
-      (__m128i)__S, (__mmask8)__U, (__m128i)_mm_cvtepi16_epi32((__m128i)__A),
-      16));
+  return (__m128)__builtin_ia32_selectps_128(
+      (__mmask8)__U, (__v4sf)_mm_cvtpbh_ps(__A), (__v4sf)__S);
 }
 
 /// Convert Packed BF16 Data to Packed float Data using merging mask.
@@ -503,15 +512,16 @@ _mm_mask_cvtpbh_ps(__m128 __S, __mmask8 __U, __m128bh __A) {
 /// \param __A
 ///    A 128-bit vector of [8 x bfloat].
 /// \returns A 256-bit vector of [8 x float] come from conversion of __A
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_cvtpbh_ps(__m256 __S, __mmask8 __U, __m128bh __A) {
-  return _mm256_castsi256_ps((__m256i)_mm256_mask_slli_epi32(
-      (__m256i)__S, (__mmask8)__U, (__m256i)_mm256_cvtepi16_epi32((__m128i)__A),
-      16));
+  return (__m256)__builtin_ia32_selectps_256(
+      (__mmask8)__U, (__v8sf)_mm256_cvtpbh_ps(__A), (__v8sf)__S);
 }
 
 #undef __DEFAULT_FN_ATTRS128
 #undef __DEFAULT_FN_ATTRS256
+#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
+#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
 
 #endif
 #endif
diff --git a/lib/include/avx512vlbitalgintrin.h b/lib/include/avx512vlbitalgintrin.h
index 1b01fe0b9d..edfb9c1e1f 100644
--- a/lib/include/avx512vlbitalgintrin.h
+++ b/lib/include/avx512vlbitalgintrin.h
@@ -15,101 +15,86 @@
 #define __AVX512VLBITALGINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
 #define __DEFAULT_FN_ATTRS128                                                  \
   __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx512vl,avx512bitalg,no-evex512"),               \
+                 __target__("avx512vl,avx512bitalg"),                          \
+                 __min_vector_width__(128))) constexpr
+#define __DEFAULT_FN_ATTRS256                                                  \
+  __attribute__((__always_inline__, __nodebug__,                               \
+                 __target__("avx512vl,avx512bitalg"),                          \
+                 __min_vector_width__(256))) constexpr
+#else
+#define __DEFAULT_FN_ATTRS128                                                  \
+  __attribute__((__always_inline__, __nodebug__,                               \
+                 __target__("avx512vl,avx512bitalg"),                          \
                  __min_vector_width__(128)))
 #define __DEFAULT_FN_ATTRS256                                                  \
   __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx512vl,avx512bitalg,no-evex512"),               \
+                 __target__("avx512vl,avx512bitalg"),                          \
                  __min_vector_width__(256)))
+#endif
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_popcnt_epi16(__m256i __A)
-{
+_mm256_popcnt_epi16(__m256i __A) {
   return (__m256i)__builtin_elementwise_popcount((__v16hu)__A);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_selectw_256((__mmask16) __U,
-              (__v16hi) _mm256_popcnt_epi16(__B),
-              (__v16hi) __A);
+_mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U, __m256i __B) {
+  return (__m256i)__builtin_ia32_selectw_256(
+      (__mmask16)__U, (__v16hi)_mm256_popcnt_epi16(__B), (__v16hi)__A);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B)
-{
-  return _mm256_mask_popcnt_epi16((__m256i) _mm256_setzero_si256(),
-              __U,
-              __B);
+_mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B) {
+  return _mm256_mask_popcnt_epi16((__m256i)_mm256_setzero_si256(), __U, __B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_popcnt_epi16(__m128i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_popcnt_epi16(__m128i __A) {
   return (__m128i)__builtin_elementwise_popcount((__v8hu)__A);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_selectw_128((__mmask8) __U,
-              (__v8hi) _mm_popcnt_epi16(__B),
-              (__v8hi) __A);
+_mm_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B) {
+  return (__m128i)__builtin_ia32_selectw_128(
+      (__mmask8)__U, (__v8hi)_mm_popcnt_epi16(__B), (__v8hi)__A);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_popcnt_epi16(__mmask8 __U, __m128i __B)
-{
-  return _mm_mask_popcnt_epi16((__m128i) _mm_setzero_si128(),
-              __U,
-              __B);
+_mm_maskz_popcnt_epi16(__mmask8 __U, __m128i __B) {
+  return _mm_mask_popcnt_epi16((__m128i)_mm_setzero_si128(), __U, __B);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_popcnt_epi8(__m256i __A)
-{
+_mm256_popcnt_epi8(__m256i __A) {
   return (__m256i)__builtin_elementwise_popcount((__v32qu)__A);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U, __m256i __B)
-{
-  return (__m256i) __builtin_ia32_selectb_256((__mmask32) __U,
-              (__v32qi) _mm256_popcnt_epi8(__B),
-              (__v32qi) __A);
+_mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U, __m256i __B) {
+  return (__m256i)__builtin_ia32_selectb_256(
+      (__mmask32)__U, (__v32qi)_mm256_popcnt_epi8(__B), (__v32qi)__A);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B)
-{
-  return _mm256_mask_popcnt_epi8((__m256i) _mm256_setzero_si256(),
-              __U,
-              __B);
+_mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B) {
+  return _mm256_mask_popcnt_epi8((__m256i)_mm256_setzero_si256(), __U, __B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_popcnt_epi8(__m128i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_popcnt_epi8(__m128i __A) {
   return (__m128i)__builtin_elementwise_popcount((__v16qu)__A);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B)
-{
-  return (__m128i) __builtin_ia32_selectb_128((__mmask16) __U,
-              (__v16qi) _mm_popcnt_epi8(__B),
-              (__v16qi) __A);
+_mm_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B) {
+  return (__m128i)__builtin_ia32_selectb_128(
+      (__mmask16)__U, (__v16qi)_mm_popcnt_epi8(__B), (__v16qi)__A);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_popcnt_epi8(__mmask16 __U, __m128i __B)
-{
-  return _mm_mask_popcnt_epi8((__m128i) _mm_setzero_si128(),
-              __U,
-              __B);
+_mm_maskz_popcnt_epi8(__mmask16 __U, __m128i __B) {
+  return _mm_mask_popcnt_epi8((__m128i)_mm_setzero_si128(), __U, __B);
 }
 
 static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
@@ -147,5 +132,4 @@ _mm_bitshuffle_epi64_mask(__m128i __A, __m128i __B)
 
 #undef __DEFAULT_FN_ATTRS128
 #undef __DEFAULT_FN_ATTRS256
-
 #endif
diff --git a/lib/include/avx512vlbwintrin.h b/lib/include/avx512vlbwintrin.h
index 9aedba0669..fb5d9d4dcc 100644
--- a/lib/include/avx512vlbwintrin.h
+++ b/lib/include/avx512vlbwintrin.h
@@ -17,12 +17,18 @@
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS128                                                  \
   __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx512vl,avx512bw,no-evex512"),                   \
-                 __min_vector_width__(128)))
+                 __target__("avx512vl,avx512bw"), __min_vector_width__(128)))
 #define __DEFAULT_FN_ATTRS256                                                  \
   __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx512vl,avx512bw,no-evex512"),                   \
-                 __min_vector_width__(256)))
+                 __target__("avx512vl,avx512bw"), __min_vector_width__(256)))
+
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
+#else
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
+#endif
 
 /* Integer compare */
 
@@ -306,250 +312,238 @@
 #define _mm256_mask_cmpneq_epu16_mask(k, A, B) \
     _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE)
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_add_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B){
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_add_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
                                              (__v32qi)_mm256_add_epi8(__A, __B),
                                              (__v32qi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_add_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
                                              (__v32qi)_mm256_add_epi8(__A, __B),
                                              (__v32qi)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_add_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
                                              (__v16hi)_mm256_add_epi16(__A, __B),
                                              (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_add_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
                                              (__v16hi)_mm256_add_epi16(__A, __B),
                                              (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_sub_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
                                              (__v32qi)_mm256_sub_epi8(__A, __B),
                                              (__v32qi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_sub_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
                                              (__v32qi)_mm256_sub_epi8(__A, __B),
                                              (__v32qi)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_sub_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
                                              (__v16hi)_mm256_sub_epi16(__A, __B),
                                              (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_sub_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
                                              (__v16hi)_mm256_sub_epi16(__A, __B),
                                              (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_add_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
                                              (__v16qi)_mm_add_epi8(__A, __B),
                                              (__v16qi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_add_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
                                              (__v16qi)_mm_add_epi8(__A, __B),
                                              (__v16qi)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_add_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
                                              (__v8hi)_mm_add_epi16(__A, __B),
                                              (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_add_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
                                              (__v8hi)_mm_add_epi16(__A, __B),
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_sub_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
                                              (__v16qi)_mm_sub_epi8(__A, __B),
                                              (__v16qi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_sub_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
                                              (__v16qi)_mm_sub_epi8(__A, __B),
                                              (__v16qi)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_sub_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
                                              (__v8hi)_mm_sub_epi16(__A, __B),
                                              (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_sub_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
                                              (__v8hi)_mm_sub_epi16(__A, __B),
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_mullo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
                                              (__v16hi)_mm256_mullo_epi16(__A, __B),
                                              (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_mullo_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
                                              (__v16hi)_mm256_mullo_epi16(__A, __B),
                                              (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_mullo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
                                              (__v8hi)_mm_mullo_epi16(__A, __B),
                                              (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_mullo_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
                                              (__v8hi)_mm_mullo_epi16(__A, __B),
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_blend_epi8 (__mmask16 __U, __m128i __A, __m128i __W)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_blend_epi8(__mmask16 __U, __m128i __A, __m128i __W) {
   return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U,
               (__v16qi) __W,
               (__v16qi) __A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_blend_epi8 (__mmask32 __U, __m256i __A, __m256i __W)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_blend_epi8(__mmask32 __U, __m256i __A, __m256i __W) {
   return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U,
                (__v32qi) __W,
                (__v32qi) __A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_blend_epi16 (__mmask8 __U, __m128i __A, __m128i __W)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_blend_epi16(__mmask8 __U, __m128i __A, __m128i __W) {
   return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U,
                (__v8hi) __W,
                (__v8hi) __A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_blend_epi16 (__mmask16 __U, __m256i __A, __m256i __W)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_blend_epi16(__mmask16 __U, __m256i __A, __m256i __W) {
   return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U,
                (__v16hi) __W,
                (__v16hi) __A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_abs_epi8(__m128i __W, __mmask16 __U, __m128i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_abs_epi8(__m128i __W, __mmask16 __U, __m128i __A) {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
                                              (__v16qi)_mm_abs_epi8(__A),
                                              (__v16qi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_abs_epi8(__mmask16 __U, __m128i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_abs_epi8(__mmask16 __U, __m128i __A) {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
                                              (__v16qi)_mm_abs_epi8(__A),
                                              (__v16qi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_abs_epi8(__m256i __W, __mmask32 __U, __m256i __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_abs_epi8(__m256i __W, __mmask32 __U, __m256i __A) {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
                                              (__v32qi)_mm256_abs_epi8(__A),
                                              (__v32qi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_abs_epi8 (__mmask32 __U, __m256i __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_abs_epi8(__mmask32 __U, __m256i __A) {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
                                              (__v32qi)_mm256_abs_epi8(__A),
                                              (__v32qi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_abs_epi16(__m128i __W, __mmask8 __U, __m128i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_abs_epi16(__m128i __W, __mmask8 __U, __m128i __A) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
                                              (__v8hi)_mm_abs_epi16(__A),
                                              (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_abs_epi16(__mmask8 __U, __m128i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_abs_epi16(__mmask8 __U, __m128i __A) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
                                              (__v8hi)_mm_abs_epi16(__A),
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_abs_epi16(__m256i __W, __mmask16 __U, __m256i __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_abs_epi16(__m256i __W, __mmask16 __U, __m256i __A) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
                                              (__v16hi)_mm256_abs_epi16(__A),
                                              (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_abs_epi16(__mmask16 __U, __m256i __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_abs_epi16(__mmask16 __U, __m256i __A) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
                                              (__v16hi)_mm256_abs_epi16(__A),
                                              (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_packs_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
                                              (__v8hi)_mm_packs_epi32(__A, __B),
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_packs_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
@@ -557,7 +551,7 @@ _mm_mask_packs_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
                                              (__v8hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_packs_epi32(__mmask16 __M, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
@@ -565,7 +559,7 @@ _mm256_maskz_packs_epi32(__mmask16 __M, __m256i __A, __m256i __B)
                                           (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_packs_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
@@ -573,7 +567,7 @@ _mm256_mask_packs_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
                                           (__v16hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_packs_epi16(__mmask16 __M, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
@@ -581,7 +575,7 @@ _mm_maskz_packs_epi16(__mmask16 __M, __m128i __A, __m128i __B)
                                              (__v16qi)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_packs_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
@@ -589,7 +583,7 @@ _mm_mask_packs_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
                                              (__v16qi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_packs_epi16(__mmask32 __M, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
@@ -597,7 +591,7 @@ _mm256_maskz_packs_epi16(__mmask32 __M, __m256i __A, __m256i __B)
                                           (__v32qi)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_packs_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
@@ -605,7 +599,7 @@ _mm256_mask_packs_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
                                           (__v32qi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_packus_epi32(__mmask8 __M, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
@@ -613,7 +607,7 @@ _mm_maskz_packus_epi32(__mmask8 __M, __m128i __A, __m128i __B)
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_packus_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
@@ -621,7 +615,7 @@ _mm_mask_packus_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
                                              (__v8hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_packus_epi32(__mmask16 __M, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
@@ -629,7 +623,7 @@ _mm256_maskz_packus_epi32(__mmask16 __M, __m256i __A, __m256i __B)
                                          (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_packus_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
@@ -637,7 +631,7 @@ _mm256_mask_packus_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
                                          (__v16hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_packus_epi16(__mmask16 __M, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
@@ -645,7 +639,7 @@ _mm_maskz_packus_epi16(__mmask16 __M, __m128i __A, __m128i __B)
                                             (__v16qi)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_packus_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
@@ -653,7 +647,7 @@ _mm_mask_packus_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
                                             (__v16qi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_packus_epi16(__mmask32 __M, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
@@ -661,7 +655,7 @@ _mm256_maskz_packus_epi16(__mmask32 __M, __m256i __A, __m256i __B)
                                          (__v32qi)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_packus_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
@@ -669,7 +663,7 @@ _mm256_mask_packus_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
                                          (__v32qi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_adds_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
@@ -677,7 +671,7 @@ _mm_mask_adds_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
                                              (__v16qi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_adds_epi8(__mmask16 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
@@ -685,7 +679,7 @@ _mm_maskz_adds_epi8(__mmask16 __U, __m128i __A, __m128i __B)
                                              (__v16qi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_adds_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
@@ -693,7 +687,7 @@ _mm256_mask_adds_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
                                             (__v32qi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_adds_epi8(__mmask32 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
@@ -701,7 +695,7 @@ _mm256_maskz_adds_epi8(__mmask32 __U, __m256i __A, __m256i __B)
                                             (__v32qi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_adds_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -709,7 +703,7 @@ _mm_mask_adds_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
                                              (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_adds_epi16(__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -717,7 +711,7 @@ _mm_maskz_adds_epi16(__mmask8 __U, __m128i __A, __m128i __B)
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_adds_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -725,7 +719,7 @@ _mm256_mask_adds_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
                                            (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_adds_epi16(__mmask16 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -733,7 +727,7 @@ _mm256_maskz_adds_epi16(__mmask16 __U, __m256i __A, __m256i __B)
                                            (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_adds_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
@@ -741,7 +735,7 @@ _mm_mask_adds_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
                                              (__v16qi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_adds_epu8(__mmask16 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
@@ -749,7 +743,7 @@ _mm_maskz_adds_epu8(__mmask16 __U, __m128i __A, __m128i __B)
                                              (__v16qi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_adds_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
@@ -757,7 +751,7 @@ _mm256_mask_adds_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
                                             (__v32qi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_adds_epu8(__mmask32 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
@@ -765,7 +759,7 @@ _mm256_maskz_adds_epu8(__mmask32 __U, __m256i __A, __m256i __B)
                                             (__v32qi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_adds_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -773,7 +767,7 @@ _mm_mask_adds_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
                                              (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_adds_epu16(__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -781,7 +775,7 @@ _mm_maskz_adds_epu16(__mmask8 __U, __m128i __A, __m128i __B)
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_adds_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -789,7 +783,7 @@ _mm256_mask_adds_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
                                            (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_adds_epu16(__mmask16 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -797,359 +791,311 @@ _mm256_maskz_adds_epu16(__mmask16 __U, __m256i __A, __m256i __B)
                                            (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_avg_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
-                                             (__v16qi)_mm_avg_epu8(__A, __B),
-                                             (__v16qi)__W);
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_avg_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
+  return (__m128i)__builtin_ia32_selectb_128(
+      (__mmask16)__U, (__v16qi)_mm_avg_epu8(__A, __B), (__v16qi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_avg_epu8(__mmask16 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_avg_epu8(__mmask16 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
                                              (__v16qi)_mm_avg_epu8(__A, __B),
                                              (__v16qi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_avg_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
-                                             (__v32qi)_mm256_avg_epu8(__A, __B),
-                                             (__v32qi)__W);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_avg_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
+  return (__m256i)__builtin_ia32_selectb_256(
+      (__mmask32)__U, (__v32qi)_mm256_avg_epu8(__A, __B), (__v32qi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_avg_epu8(__mmask32 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_avg_epu8(__mmask32 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
                                              (__v32qi)_mm256_avg_epu8(__A, __B),
                                              (__v32qi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_avg_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
-{
-  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
-                                             (__v8hi)_mm_avg_epu16(__A, __B),
-                                             (__v8hi)__W);
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_avg_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
+  return (__m128i)__builtin_ia32_selectw_128(
+      (__mmask8)__U, (__v8hi)_mm_avg_epu16(__A, __B), (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_avg_epu16(__mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_avg_epu16(__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
                                              (__v8hi)_mm_avg_epu16(__A, __B),
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_avg_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
-                                            (__v16hi)_mm256_avg_epu16(__A, __B),
-                                            (__v16hi)__W);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_avg_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
+  return (__m256i)__builtin_ia32_selectw_256(
+      (__mmask16)__U, (__v16hi)_mm256_avg_epu16(__A, __B), (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_avg_epu16(__mmask16 __U, __m256i __A, __m256i __B)
-{
-  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
-                                            (__v16hi)_mm256_avg_epu16(__A, __B),
-                                            (__v16hi)_mm256_setzero_si256());
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_avg_epu16(__mmask16 __U, __m256i __A, __m256i __B) {
+  return (__m256i)__builtin_ia32_selectw_256(
+      (__mmask16)__U, (__v16hi)_mm256_avg_epu16(__A, __B),
+      (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_max_epi8(__mmask16 __M, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_max_epi8(__mmask16 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
                                              (__v16qi)_mm_max_epi8(__A, __B),
                                              (__v16qi)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_max_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_max_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
                                              (__v16qi)_mm_max_epi8(__A, __B),
                                              (__v16qi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_max_epi8(__mmask32 __M, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_max_epi8(__mmask32 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
                                              (__v32qi)_mm256_max_epi8(__A, __B),
                                              (__v32qi)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_max_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_max_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
                                              (__v32qi)_mm256_max_epi8(__A, __B),
                                              (__v32qi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_max_epi16(__mmask8 __M, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_max_epi16(__mmask8 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
                                              (__v8hi)_mm_max_epi16(__A, __B),
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_max_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_max_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
                                              (__v8hi)_mm_max_epi16(__A, __B),
                                              (__v8hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_max_epi16(__mmask16 __M, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_max_epi16(__mmask16 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
                                             (__v16hi)_mm256_max_epi16(__A, __B),
                                             (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_max_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_max_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
                                             (__v16hi)_mm256_max_epi16(__A, __B),
                                             (__v16hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_max_epu8(__mmask16 __M, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_max_epu8(__mmask16 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
                                              (__v16qi)_mm_max_epu8(__A, __B),
                                              (__v16qi)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_max_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_max_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
                                              (__v16qi)_mm_max_epu8(__A, __B),
                                              (__v16qi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_max_epu8 (__mmask32 __M, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_max_epu8(__mmask32 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
                                              (__v32qi)_mm256_max_epu8(__A, __B),
                                              (__v32qi)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_max_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_max_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
                                              (__v32qi)_mm256_max_epu8(__A, __B),
                                              (__v32qi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_max_epu16(__mmask8 __M, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_max_epu16(__mmask8 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
                                              (__v8hi)_mm_max_epu16(__A, __B),
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_max_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_max_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
                                              (__v8hi)_mm_max_epu16(__A, __B),
                                              (__v8hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_max_epu16(__mmask16 __M, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_max_epu16(__mmask16 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
                                             (__v16hi)_mm256_max_epu16(__A, __B),
                                             (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_max_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_max_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
                                             (__v16hi)_mm256_max_epu16(__A, __B),
                                             (__v16hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_min_epi8(__mmask16 __M, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_min_epi8(__mmask16 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
                                              (__v16qi)_mm_min_epi8(__A, __B),
                                              (__v16qi)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_min_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_min_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
                                              (__v16qi)_mm_min_epi8(__A, __B),
                                              (__v16qi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_min_epi8(__mmask32 __M, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_min_epi8(__mmask32 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
                                              (__v32qi)_mm256_min_epi8(__A, __B),
                                              (__v32qi)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_min_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_min_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
                                              (__v32qi)_mm256_min_epi8(__A, __B),
                                              (__v32qi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_min_epi16(__mmask8 __M, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_min_epi16(__mmask8 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
                                              (__v8hi)_mm_min_epi16(__A, __B),
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_min_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_min_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
                                              (__v8hi)_mm_min_epi16(__A, __B),
                                              (__v8hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_min_epi16(__mmask16 __M, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_min_epi16(__mmask16 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
                                             (__v16hi)_mm256_min_epi16(__A, __B),
                                             (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_min_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_min_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
                                             (__v16hi)_mm256_min_epi16(__A, __B),
                                             (__v16hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_min_epu8(__mmask16 __M, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_min_epu8(__mmask16 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
                                              (__v16qi)_mm_min_epu8(__A, __B),
                                              (__v16qi)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_min_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_min_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
                                              (__v16qi)_mm_min_epu8(__A, __B),
                                              (__v16qi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_min_epu8 (__mmask32 __M, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_min_epu8(__mmask32 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
                                              (__v32qi)_mm256_min_epu8(__A, __B),
                                              (__v32qi)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_min_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_min_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
                                              (__v32qi)_mm256_min_epu8(__A, __B),
                                              (__v32qi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_min_epu16(__mmask8 __M, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_min_epu16(__mmask8 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
                                              (__v8hi)_mm_min_epu16(__A, __B),
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_min_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_min_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
                                              (__v8hi)_mm_min_epu16(__A, __B),
                                              (__v8hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_min_epu16(__mmask16 __M, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_min_epu16(__mmask16 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
                                             (__v16hi)_mm256_min_epu16(__A, __B),
                                             (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_min_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_min_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
                                             (__v16hi)_mm256_min_epu16(__A, __B),
                                             (__v16hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_shuffle_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_shuffle_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
                                             (__v16qi)_mm_shuffle_epi8(__A, __B),
                                             (__v16qi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_shuffle_epi8(__mmask16 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_shuffle_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
                                             (__v16qi)_mm_shuffle_epi8(__A, __B),
                                             (__v16qi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_shuffle_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_shuffle_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
                                          (__v32qi)_mm256_shuffle_epi8(__A, __B),
                                          (__v32qi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_shuffle_epi8(__mmask32 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_shuffle_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
                                          (__v32qi)_mm256_shuffle_epi8(__A, __B),
                                          (__v32qi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_subs_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
@@ -1157,7 +1103,7 @@ _mm_mask_subs_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
                                              (__v16qi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_subs_epi8(__mmask16 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
@@ -1165,7 +1111,7 @@ _mm_maskz_subs_epi8(__mmask16 __U, __m128i __A, __m128i __B)
                                              (__v16qi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_subs_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
@@ -1173,7 +1119,7 @@ _mm256_mask_subs_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
                                             (__v32qi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_subs_epi8(__mmask32 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
@@ -1181,7 +1127,7 @@ _mm256_maskz_subs_epi8(__mmask32 __U, __m256i __A, __m256i __B)
                                             (__v32qi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_subs_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -1189,7 +1135,7 @@ _mm_mask_subs_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
                                              (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_subs_epi16(__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -1197,7 +1143,7 @@ _mm_maskz_subs_epi16(__mmask8 __U, __m128i __A, __m128i __B)
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_subs_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -1205,7 +1151,7 @@ _mm256_mask_subs_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
                                            (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_subs_epi16(__mmask16 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -1213,7 +1159,7 @@ _mm256_maskz_subs_epi16(__mmask16 __U, __m256i __A, __m256i __B)
                                            (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_subs_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
@@ -1221,7 +1167,7 @@ _mm_mask_subs_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
                                              (__v16qi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_subs_epu8(__mmask16 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
@@ -1229,7 +1175,7 @@ _mm_maskz_subs_epu8(__mmask16 __U, __m128i __A, __m128i __B)
                                              (__v16qi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_subs_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
@@ -1237,7 +1183,7 @@ _mm256_mask_subs_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
                                             (__v32qi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_subs_epu8(__mmask32 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
@@ -1245,7 +1191,7 @@ _mm256_maskz_subs_epu8(__mmask32 __U, __m256i __A, __m256i __B)
                                             (__v32qi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_subs_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -1253,7 +1199,7 @@ _mm_mask_subs_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
                                              (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_subs_epu16(__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -1261,7 +1207,7 @@ _mm_maskz_subs_epu16(__mmask8 __U, __m128i __A, __m128i __B)
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_subs_epu16(__m256i __W, __mmask16 __U, __m256i __A,
       __m256i __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -1269,7 +1215,7 @@ _mm256_mask_subs_epu16(__m256i __W, __mmask16 __U, __m256i __A,
                                            (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_subs_epu16(__mmask16 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -1277,89 +1223,81 @@ _mm256_maskz_subs_epu16(__mmask16 __U, __m256i __A, __m256i __B)
                                            (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_permutex2var_epi16(__m128i __A, __m128i __I, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_permutex2var_epi16(__m128i __A, __m128i __I, __m128i __B) {
   return (__m128i)__builtin_ia32_vpermi2varhi128((__v8hi)__A, (__v8hi)__I,
                                                  (__v8hi) __B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_permutex2var_epi16(__m128i __A, __mmask8 __U, __m128i __I,
-                            __m128i __B)
-{
+                            __m128i __B) {
   return (__m128i)__builtin_ia32_selectw_128(__U,
                                   (__v8hi)_mm_permutex2var_epi16(__A, __I, __B),
                                   (__v8hi)__A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask2_permutex2var_epi16(__m128i __A, __m128i __I, __mmask8 __U,
-                             __m128i __B)
-{
+                             __m128i __B) {
   return (__m128i)__builtin_ia32_selectw_128(__U,
                                   (__v8hi)_mm_permutex2var_epi16(__A, __I, __B),
                                   (__v8hi)__I);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_permutex2var_epi16 (__mmask8 __U, __m128i __A, __m128i __I,
-            __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_permutex2var_epi16(__mmask8 __U, __m128i __A, __m128i __I,
+                             __m128i __B) {
   return (__m128i)__builtin_ia32_selectw_128(__U,
                                   (__v8hi)_mm_permutex2var_epi16(__A, __I, __B),
                                   (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_permutex2var_epi16(__m256i __A, __m256i __I, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_permutex2var_epi16(__m256i __A, __m256i __I, __m256i __B) {
   return (__m256i)__builtin_ia32_vpermi2varhi256((__v16hi)__A, (__v16hi)__I,
                                                  (__v16hi)__B);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_permutex2var_epi16(__m256i __A, __mmask16 __U, __m256i __I,
-                               __m256i __B)
-{
+                               __m256i __B) {
   return (__m256i)__builtin_ia32_selectw_256(__U,
                               (__v16hi)_mm256_permutex2var_epi16(__A, __I, __B),
                               (__v16hi)__A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask2_permutex2var_epi16(__m256i __A, __m256i __I, __mmask16 __U,
-                                __m256i __B)
-{
+                                __m256i __B) {
   return (__m256i)__builtin_ia32_selectw_256(__U,
                               (__v16hi)_mm256_permutex2var_epi16(__A, __I, __B),
                               (__v16hi)__I);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_permutex2var_epi16 (__mmask16 __U, __m256i __A, __m256i __I,
-                                 __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_permutex2var_epi16(__mmask16 __U, __m256i __A, __m256i __I,
+                                __m256i __B) {
   return (__m256i)__builtin_ia32_selectw_256(__U,
                               (__v16hi)_mm256_permutex2var_epi16(__A, __I, __B),
                               (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_maddubs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
                                             (__v8hi)_mm_maddubs_epi16(__X, __Y),
                                             (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_maddubs_epi16(__mmask8 __U, __m128i __X, __m128i __Y) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
                                             (__v8hi)_mm_maddubs_epi16(__X, __Y),
                                             (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_maddubs_epi16(__m256i __W, __mmask16 __U, __m256i __X,
                           __m256i __Y) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -1367,35 +1305,35 @@ _mm256_mask_maddubs_epi16(__m256i __W, __mmask16 __U, __m256i __X,
                                         (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_maddubs_epi16(__mmask16 __U, __m256i __X, __m256i __Y) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
                                         (__v16hi)_mm256_maddubs_epi16(__X, __Y),
                                         (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_madd_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
                                              (__v4si)_mm_madd_epi16(__A, __B),
                                              (__v4si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_madd_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
                                              (__v4si)_mm_madd_epi16(__A, __B),
                                              (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_madd_epi16(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                             (__v8si)_mm256_madd_epi16(__A, __B),
                                             (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_madd_epi16(__mmask8 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                             (__v8si)_mm256_madd_epi16(__A, __B),
@@ -1486,8 +1424,8 @@ _mm256_maskz_cvtusepi16_epi8 (__mmask16 __M, __m256i __A) {
                 __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_cvtepi16_epi8 (__m128i __A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_cvtepi16_epi8(__m128i __A) {
   return (__m128i)__builtin_shufflevector(
       __builtin_convertvector((__v8hi)__A, __v8qi),
       (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
@@ -1527,20 +1465,20 @@ _mm_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
   __builtin_ia32_pmovuswb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS256
-_mm256_cvtepi16_epi8 (__m256i __A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_cvtepi16_epi8(__m256i __A) {
   return (__m128i)__builtin_convertvector((__v16hi) __A, __v16qi);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS256
-_mm256_mask_cvtepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_cvtepi16_epi8(__m128i __O, __mmask16 __M, __m256i __A) {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
                                              (__v16qi)_mm256_cvtepi16_epi8(__A),
                                              (__v16qi)__O);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS256
-_mm256_maskz_cvtepi16_epi8 (__mmask16 __M, __m256i __A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_cvtepi16_epi8(__mmask16 __M, __m256i __A) {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
                                              (__v16qi)_mm256_cvtepi16_epi8(__A),
                                              (__v16qi)_mm_setzero_si128());
@@ -1564,203 +1502,198 @@ _mm256_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A)
   __builtin_ia32_pmovuswb256mem_mask ((__v16qi*) __P, (__v16hi) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_mulhrs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
                                              (__v8hi)_mm_mulhrs_epi16(__X, __Y),
                                              (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_mulhrs_epi16(__mmask8 __U, __m128i __X, __m128i __Y) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
                                              (__v8hi)_mm_mulhrs_epi16(__X, __Y),
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_mulhrs_epi16(__m256i __W, __mmask16 __U, __m256i __X, __m256i __Y) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
                                          (__v16hi)_mm256_mulhrs_epi16(__X, __Y),
                                          (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_mulhrs_epi16(__mmask16 __U, __m256i __X, __m256i __Y) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
                                          (__v16hi)_mm256_mulhrs_epi16(__X, __Y),
                                          (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_mulhi_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
-  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
-                                             (__v8hi)_mm_mulhi_epu16(__A, __B),
-                                             (__v8hi)__W);
+  return (__m128i)__builtin_ia32_selectw_128(
+      (__mmask8)__U, (__v8hi)_mm_mulhi_epu16(__A, __B), (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_mulhi_epu16(__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
                                              (__v8hi)_mm_mulhi_epu16(__A, __B),
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_mulhi_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
-  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
-                                          (__v16hi)_mm256_mulhi_epu16(__A, __B),
-                                          (__v16hi)__W);
+  return (__m256i)__builtin_ia32_selectw_256(
+      (__mmask16)__U, (__v16hi)_mm256_mulhi_epu16(__A, __B), (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_mulhi_epu16(__mmask16 __U, __m256i __A, __m256i __B) {
-  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
-                                          (__v16hi)_mm256_mulhi_epu16(__A, __B),
-                                          (__v16hi)_mm256_setzero_si256());
+  return (__m256i)__builtin_ia32_selectw_256(
+      (__mmask16)__U, (__v16hi)_mm256_mulhi_epu16(__A, __B),
+      (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_mulhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
-  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
-                                             (__v8hi)_mm_mulhi_epi16(__A, __B),
-                                             (__v8hi)__W);
+  return (__m128i)__builtin_ia32_selectw_128(
+      (__mmask8)__U, (__v8hi)_mm_mulhi_epi16(__A, __B), (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_mulhi_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
                                              (__v8hi)_mm_mulhi_epi16(__A, __B),
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_mulhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
-  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
-                                          (__v16hi)_mm256_mulhi_epi16(__A, __B),
-                                          (__v16hi)__W);
+  return (__m256i)__builtin_ia32_selectw_256(
+      (__mmask16)__U, (__v16hi)_mm256_mulhi_epi16(__A, __B), (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_mulhi_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
-  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
-                                          (__v16hi)_mm256_mulhi_epi16(__A, __B),
-                                          (__v16hi)_mm256_setzero_si256());
+  return (__m256i)__builtin_ia32_selectw_256(
+      (__mmask16)__U, (__v16hi)_mm256_mulhi_epi16(__A, __B),
+      (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_unpackhi_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
-  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
-                                           (__v16qi)_mm_unpackhi_epi8(__A, __B),
-                                           (__v16qi)__W);
+  return (__m128i)__builtin_ia32_selectb_128(
+      (__mmask16)__U, (__v16qi)_mm_unpackhi_epi8(__A, __B), (__v16qi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_unpackhi_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
                                            (__v16qi)_mm_unpackhi_epi8(__A, __B),
                                            (__v16qi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_unpackhi_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
                                         (__v32qi)_mm256_unpackhi_epi8(__A, __B),
                                         (__v32qi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_unpackhi_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
                                         (__v32qi)_mm256_unpackhi_epi8(__A, __B),
                                         (__v32qi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_unpackhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
                                            (__v8hi)_mm_unpackhi_epi16(__A, __B),
                                            (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_unpackhi_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
                                            (__v8hi)_mm_unpackhi_epi16(__A, __B),
                                            (__v8hi) _mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_unpackhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
                                        (__v16hi)_mm256_unpackhi_epi16(__A, __B),
                                        (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_unpackhi_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
                                        (__v16hi)_mm256_unpackhi_epi16(__A, __B),
                                        (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_unpacklo_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
                                            (__v16qi)_mm_unpacklo_epi8(__A, __B),
                                            (__v16qi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_unpacklo_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
                                            (__v16qi)_mm_unpacklo_epi8(__A, __B),
                                            (__v16qi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_unpacklo_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
                                         (__v32qi)_mm256_unpacklo_epi8(__A, __B),
                                         (__v32qi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_unpacklo_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
                                         (__v32qi)_mm256_unpacklo_epi8(__A, __B),
                                         (__v32qi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_unpacklo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
                                            (__v8hi)_mm_unpacklo_epi16(__A, __B),
                                            (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_unpacklo_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
                                            (__v8hi)_mm_unpacklo_epi16(__A, __B),
                                            (__v8hi) _mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_unpacklo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
                                        (__v16hi)_mm256_unpacklo_epi16(__A, __B),
                                        (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_unpacklo_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
                                        (__v16hi)_mm256_unpacklo_epi16(__A, __B),
                                        (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_cvtepi8_epi16(__m128i __W, __mmask8 __U, __m128i __A)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -1768,7 +1701,7 @@ _mm_mask_cvtepi8_epi16(__m128i __W, __mmask8 __U, __m128i __A)
                                              (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_cvtepi8_epi16(__mmask8 __U, __m128i __A)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -1776,7 +1709,7 @@ _mm_maskz_cvtepi8_epi16(__mmask8 __U, __m128i __A)
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_cvtepi8_epi16(__m256i __W, __mmask16 __U, __m128i __A)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -1784,7 +1717,7 @@ _mm256_mask_cvtepi8_epi16(__m256i __W, __mmask16 __U, __m128i __A)
                                              (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_cvtepi8_epi16(__mmask16 __U, __m128i __A)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -1793,7 +1726,7 @@ _mm256_maskz_cvtepi8_epi16(__mmask16 __U, __m128i __A)
 }
 
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_cvtepu8_epi16(__m128i __W, __mmask8 __U, __m128i __A)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -1801,7 +1734,7 @@ _mm_mask_cvtepu8_epi16(__m128i __W, __mmask8 __U, __m128i __A)
                                              (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_cvtepu8_epi16(__mmask8 __U, __m128i __A)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -1809,7 +1742,7 @@ _mm_maskz_cvtepu8_epi16(__mmask8 __U, __m128i __A)
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_cvtepu8_epi16(__m256i __W, __mmask16 __U, __m128i __A)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -1817,7 +1750,7 @@ _mm256_mask_cvtepu8_epi16(__m256i __W, __mmask16 __U, __m128i __A)
                                              (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_cvtepu8_epi16 (__mmask16 __U, __m128i __A)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -1868,13 +1801,13 @@ _mm256_maskz_cvtepu8_epi16 (__mmask16 __U, __m128i __A)
                                                                        (imm)), \
                                        (__v16hi)_mm256_setzero_si256()))
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_sllv_epi16(__m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_psllv16hi((__v16hi)__A, (__v16hi)__B);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_sllv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -1882,7 +1815,7 @@ _mm256_mask_sllv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
                                            (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_sllv_epi16(__mmask16 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -1890,13 +1823,13 @@ _mm256_maskz_sllv_epi16(__mmask16 __U, __m256i __A, __m256i __B)
                                            (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_sllv_epi16(__m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_psllv8hi((__v8hi)__A, (__v8hi)__B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_sllv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -1904,7 +1837,7 @@ _mm_mask_sllv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
                                              (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_sllv_epi16(__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -1944,7 +1877,7 @@ _mm256_maskz_sll_epi16(__mmask16 __U, __m256i __A, __m128i __B)
                                           (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_slli_epi16(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -1952,7 +1885,7 @@ _mm_mask_slli_epi16(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
                                              (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, unsigned int __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -1960,30 +1893,28 @@ _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, unsigned int __B)
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_slli_epi16(__m256i __W, __mmask16 __U, __m256i __A,
-                       unsigned int __B)
-{
+                       unsigned int __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
                                          (__v16hi)_mm256_slli_epi16(__A, (int)__B),
                                          (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_slli_epi16(__mmask16 __U, __m256i __A, unsigned int __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_slli_epi16(__mmask16 __U, __m256i __A, unsigned int __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
                                          (__v16hi)_mm256_slli_epi16(__A, (int)__B),
                                          (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_srlv_epi16(__m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_psrlv16hi((__v16hi)__A, (__v16hi)__B);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_srlv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -1991,7 +1922,7 @@ _mm256_mask_srlv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
                                            (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_srlv_epi16(__mmask16 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -1999,13 +1930,13 @@ _mm256_maskz_srlv_epi16(__mmask16 __U, __m256i __A, __m256i __B)
                                            (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_srlv_epi16(__m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_psrlv8hi((__v8hi)__A, (__v8hi)__B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_srlv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -2013,7 +1944,7 @@ _mm_mask_srlv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
                                              (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_srlv_epi16(__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -2021,13 +1952,13 @@ _mm_maskz_srlv_epi16(__mmask8 __U, __m128i __A, __m128i __B)
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_srav_epi16(__m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_psrav16hi((__v16hi)__A, (__v16hi)__B);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_srav_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -2035,7 +1966,7 @@ _mm256_mask_srav_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
                                            (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_srav_epi16(__mmask16 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -2043,13 +1974,13 @@ _mm256_maskz_srav_epi16(__mmask16 __U, __m256i __A, __m256i __B)
                                            (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_srav_epi16(__m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_psrav8hi((__v8hi)__A, (__v8hi)__B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_srav_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -2057,7 +1988,7 @@ _mm_mask_srav_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
                                              (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_srav_epi16(__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -2097,34 +2028,30 @@ _mm256_maskz_sra_epi16(__mmask16 __U, __m256i __A, __m128i __B)
                                           (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_srai_epi16(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_srai_epi16(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
                                              (__v8hi)_mm_srai_epi16(__A, (int)__B),
                                              (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_srai_epi16(__mmask8 __U, __m128i __A, unsigned int __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_srai_epi16(__mmask8 __U, __m128i __A, unsigned int __B) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
                                              (__v8hi)_mm_srai_epi16(__A, (int)__B),
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_srai_epi16(__m256i __W, __mmask16 __U, __m256i __A,
-                       unsigned int __B)
-{
+                       unsigned int __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
                                          (__v16hi)_mm256_srai_epi16(__A, (int)__B),
                                          (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_srai_epi16(__mmask16 __U, __m256i __A, unsigned int __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_srai_epi16(__mmask16 __U, __m256i __A, unsigned int __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
                                          (__v16hi)_mm256_srai_epi16(__A, (int)__B),
                                          (__v16hi)_mm256_setzero_si256());
@@ -2162,104 +2089,91 @@ _mm256_maskz_srl_epi16(__mmask16 __U, __m256i __A, __m128i __B)
                                           (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_srli_epi16(__m128i __W, __mmask8 __U, __m128i __A, int __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_srli_epi16(__m128i __W, __mmask8 __U, __m128i __A, int __B) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
                                              (__v8hi)_mm_srli_epi16(__A, __B),
                                              (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_srli_epi16 (__mmask8 __U, __m128i __A, int __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_srli_epi16(__mmask8 __U, __m128i __A, int __B) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
                                              (__v8hi)_mm_srli_epi16(__A, __B),
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_srli_epi16(__m256i __W, __mmask16 __U, __m256i __A, int __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_srli_epi16(__m256i __W, __mmask16 __U, __m256i __A, int __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
                                          (__v16hi)_mm256_srli_epi16(__A, __B),
                                          (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_srli_epi16(__mmask16 __U, __m256i __A, int __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_srli_epi16(__mmask16 __U, __m256i __A, int __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
                                          (__v16hi)_mm256_srli_epi16(__A, __B),
                                          (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_mov_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_mov_epi16(__m128i __W, __mmask8 __U, __m128i __A) {
   return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U,
                 (__v8hi) __A,
                 (__v8hi) __W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_mov_epi16 (__mmask8 __U, __m128i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_mov_epi16(__mmask8 __U, __m128i __A) {
   return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U,
                 (__v8hi) __A,
                 (__v8hi) _mm_setzero_si128 ());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_mov_epi16 (__m256i __W, __mmask16 __U, __m256i __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_mov_epi16(__m256i __W, __mmask16 __U, __m256i __A) {
   return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U,
                 (__v16hi) __A,
                 (__v16hi) __W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_mov_epi16 (__mmask16 __U, __m256i __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_mov_epi16(__mmask16 __U, __m256i __A) {
   return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U,
                 (__v16hi) __A,
                 (__v16hi) _mm256_setzero_si256 ());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_mov_epi8 (__m128i __W, __mmask16 __U, __m128i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_mov_epi8(__m128i __W, __mmask16 __U, __m128i __A) {
   return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U,
                 (__v16qi) __A,
                 (__v16qi) __W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_mov_epi8 (__mmask16 __U, __m128i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_mov_epi8(__mmask16 __U, __m128i __A) {
   return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U,
                 (__v16qi) __A,
                 (__v16qi) _mm_setzero_si128 ());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_mov_epi8 (__m256i __W, __mmask32 __U, __m256i __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_mov_epi8(__m256i __W, __mmask32 __U, __m256i __A) {
   return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U,
                 (__v32qi) __A,
                 (__v32qi) __W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_mov_epi8 (__mmask32 __U, __m256i __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_mov_epi8(__mmask32 __U, __m256i __A) {
   return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U,
                 (__v32qi) __A,
                 (__v32qi) _mm256_setzero_si256 ());
 }
 
-
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_set1_epi8 (__m128i __O, __mmask16 __M, char __A)
 {
   return (__m128i) __builtin_ia32_selectb_128(__M,
@@ -2267,7 +2181,7 @@ _mm_mask_set1_epi8 (__m128i __O, __mmask16 __M, char __A)
                                               (__v16qi) __O);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_set1_epi8 (__mmask16 __M, char __A)
 {
  return (__m128i) __builtin_ia32_selectb_128(__M,
@@ -2275,7 +2189,7 @@ _mm_maskz_set1_epi8 (__mmask16 __M, char __A)
                                              (__v16qi) _mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_set1_epi8 (__m256i __O, __mmask32 __M, char __A)
 {
   return (__m256i) __builtin_ia32_selectb_256(__M,
@@ -2283,7 +2197,7 @@ _mm256_mask_set1_epi8 (__m256i __O, __mmask32 __M, char __A)
                                               (__v32qi) __O);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_set1_epi8 (__mmask32 __M, char __A)
 {
   return (__m256i) __builtin_ia32_selectb_256(__M,
@@ -2463,22 +2377,19 @@ _mm256_mask_storeu_epi8 (void *__P, __mmask32 __U, __m256i __A)
              (__mmask32) __U);
 }
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
-_mm_test_epi8_mask (__m128i __A, __m128i __B)
-{
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_test_epi8_mask(__m128i __A, __m128i __B) {
   return _mm_cmpneq_epi8_mask (_mm_and_si128(__A, __B), _mm_setzero_si128());
 }
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
-_mm_mask_test_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_test_epi8_mask(__mmask16 __U, __m128i __A, __m128i __B) {
   return _mm_mask_cmpneq_epi8_mask (__U, _mm_and_si128 (__A, __B),
                                     _mm_setzero_si128());
 }
 
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
-_mm256_test_epi8_mask (__m256i __A, __m256i __B)
-{
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_test_epi8_mask(__m256i __A, __m256i __B) {
   return _mm256_cmpneq_epi8_mask (_mm256_and_si256(__A, __B),
                                   _mm256_setzero_si256());
 }
@@ -2517,9 +2428,8 @@ _mm256_mask_test_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
                                         _mm256_setzero_si256());
 }
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
-_mm_testn_epi8_mask (__m128i __A, __m128i __B)
-{
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_testn_epi8_mask(__m128i __A, __m128i __B) {
   return _mm_cmpeq_epi8_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
 }
 
@@ -2570,55 +2480,47 @@ _mm256_mask_testn_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
                                        _mm256_setzero_si256());
 }
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
-_mm_movepi8_mask (__m128i __A)
-{
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_movepi8_mask(__m128i __A) {
   return (__mmask16) __builtin_ia32_cvtb2mask128 ((__v16qi) __A);
 }
 
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
-_mm256_movepi8_mask (__m256i __A)
-{
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_movepi8_mask(__m256i __A) {
   return (__mmask32) __builtin_ia32_cvtb2mask256 ((__v32qi) __A);
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
-_mm_movepi16_mask (__m128i __A)
-{
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_movepi16_mask(__m128i __A) {
   return (__mmask8) __builtin_ia32_cvtw2mask128 ((__v8hi) __A);
 }
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS256
-_mm256_movepi16_mask (__m256i __A)
-{
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_movepi16_mask(__m256i __A) {
   return (__mmask16) __builtin_ia32_cvtw2mask256 ((__v16hi) __A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_movm_epi8 (__mmask16 __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_movm_epi8(__mmask16 __A) {
   return (__m128i) __builtin_ia32_cvtmask2b128 (__A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_movm_epi8 (__mmask32 __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_movm_epi8(__mmask32 __A) {
   return (__m256i) __builtin_ia32_cvtmask2b256 (__A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_movm_epi16 (__mmask8 __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_movm_epi16(__mmask8 __A) {
   return (__m128i) __builtin_ia32_cvtmask2w128 (__A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_movm_epi16 (__mmask16 __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_movm_epi16(__mmask16 __A) {
   return (__m256i) __builtin_ia32_cvtmask2w256 (__A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_broadcastb_epi8 (__m128i __O, __mmask16 __M, __m128i __A)
 {
   return (__m128i)__builtin_ia32_selectb_128(__M,
@@ -2626,7 +2528,7 @@ _mm_mask_broadcastb_epi8 (__m128i __O, __mmask16 __M, __m128i __A)
                                              (__v16qi) __O);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_broadcastb_epi8 (__mmask16 __M, __m128i __A)
 {
   return (__m128i)__builtin_ia32_selectb_128(__M,
@@ -2634,7 +2536,7 @@ _mm_maskz_broadcastb_epi8 (__mmask16 __M, __m128i __A)
                                              (__v16qi) _mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_broadcastb_epi8 (__m256i __O, __mmask32 __M, __m128i __A)
 {
   return (__m256i)__builtin_ia32_selectb_256(__M,
@@ -2642,7 +2544,7 @@ _mm256_mask_broadcastb_epi8 (__m256i __O, __mmask32 __M, __m128i __A)
                                              (__v32qi) __O);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_broadcastb_epi8 (__mmask32 __M, __m128i __A)
 {
   return (__m256i)__builtin_ia32_selectb_256(__M,
@@ -2650,7 +2552,7 @@ _mm256_maskz_broadcastb_epi8 (__mmask32 __M, __m128i __A)
                                              (__v32qi) _mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_broadcastw_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
 {
   return (__m128i)__builtin_ia32_selectw_128(__M,
@@ -2658,7 +2560,7 @@ _mm_mask_broadcastw_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
                                              (__v8hi) __O);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_broadcastw_epi16 (__mmask8 __M, __m128i __A)
 {
   return (__m128i)__builtin_ia32_selectw_128(__M,
@@ -2666,7 +2568,7 @@ _mm_maskz_broadcastw_epi16 (__mmask8 __M, __m128i __A)
                                              (__v8hi) _mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_broadcastw_epi16 (__m256i __O, __mmask16 __M, __m128i __A)
 {
   return (__m256i)__builtin_ia32_selectw_256(__M,
@@ -2674,7 +2576,7 @@ _mm256_mask_broadcastw_epi16 (__m256i __O, __mmask16 __M, __m128i __A)
                                              (__v16hi) __O);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_broadcastw_epi16 (__mmask16 __M, __m128i __A)
 {
   return (__m256i)__builtin_ia32_selectw_256(__M,
@@ -2682,7 +2584,7 @@ _mm256_maskz_broadcastw_epi16 (__mmask16 __M, __m128i __A)
                                              (__v16hi) _mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_set1_epi16 (__m256i __O, __mmask16 __M, short __A)
 {
   return (__m256i) __builtin_ia32_selectw_256 (__M,
@@ -2690,7 +2592,7 @@ _mm256_mask_set1_epi16 (__m256i __O, __mmask16 __M, short __A)
                                                (__v16hi) __O);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_set1_epi16 (__mmask16 __M, short __A)
 {
   return (__m256i) __builtin_ia32_selectw_256(__M,
@@ -2698,7 +2600,7 @@ _mm256_maskz_set1_epi16 (__mmask16 __M, short __A)
                                               (__v16hi) _mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_set1_epi16 (__m128i __O, __mmask8 __M, short __A)
 {
   return (__m128i) __builtin_ia32_selectw_128(__M,
@@ -2706,7 +2608,7 @@ _mm_mask_set1_epi16 (__m128i __O, __mmask8 __M, short __A)
                                               (__v8hi) __O);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_set1_epi16 (__mmask8 __M, short __A)
 {
   return (__m128i) __builtin_ia32_selectw_128(__M,
@@ -2714,48 +2616,41 @@ _mm_maskz_set1_epi16 (__mmask8 __M, short __A)
                                               (__v8hi) _mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_permutexvar_epi16 (__m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_permutexvar_epi16(__m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_permvarhi128((__v8hi) __B, (__v8hi) __A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_permutexvar_epi16 (__mmask8 __M, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_permutexvar_epi16(__mmask8 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
                                         (__v8hi)_mm_permutexvar_epi16(__A, __B),
                                         (__v8hi) _mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_permutexvar_epi16 (__m128i __W, __mmask8 __M, __m128i __A,
-          __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_permutexvar_epi16(__m128i __W, __mmask8 __M, __m128i __A,
+                           __m128i __B) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
                                         (__v8hi)_mm_permutexvar_epi16(__A, __B),
                                         (__v8hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_permutexvar_epi16 (__m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_permutexvar_epi16(__m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_permvarhi256((__v16hi) __B, (__v16hi) __A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_permutexvar_epi16 (__mmask16 __M, __m256i __A,
-        __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_permutexvar_epi16(__mmask16 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
                                     (__v16hi)_mm256_permutexvar_epi16(__A, __B),
                                     (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_permutexvar_epi16 (__m256i __W, __mmask16 __M, __m256i __A,
-             __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_permutexvar_epi16(__m256i __W, __mmask16 __M, __m256i __A,
+                              __m256i __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
                                     (__v16hi)_mm256_permutexvar_epi16(__A, __B),
                                     (__v16hi)__W);
@@ -2809,353 +2704,353 @@ _mm256_mask_permutexvar_epi16 (__m256i __W, __mmask16 __M, __m256i __A,
                                   (__v16hi)_mm256_dbsad_epu8((A), (B), (imm)), \
                                   (__v16hi)_mm256_setzero_si256()))
 
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_reduce_add_epi16(__m128i __W) {
   return __builtin_reduce_add((__v8hi)__W);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_reduce_mul_epi16(__m128i __W) {
   return __builtin_reduce_mul((__v8hi)__W);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_reduce_and_epi16(__m128i __W) {
   return __builtin_reduce_and((__v8hi)__W);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_reduce_or_epi16(__m128i __W) {
   return __builtin_reduce_or((__v8hi)__W);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS128
-_mm_mask_reduce_add_epi16( __mmask8 __M, __m128i __W) {
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_reduce_add_epi16(__mmask8 __M, __m128i __W) {
   __W = _mm_maskz_mov_epi16(__M, __W);
   return __builtin_reduce_add((__v8hi)__W);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS128
-_mm_mask_reduce_mul_epi16( __mmask8 __M, __m128i __W) {
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_reduce_mul_epi16(__mmask8 __M, __m128i __W) {
   __W = _mm_mask_mov_epi16(_mm_set1_epi16(1), __M, __W);
   return __builtin_reduce_mul((__v8hi)__W);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS128
-_mm_mask_reduce_and_epi16( __mmask8 __M, __m128i __W) {
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_reduce_and_epi16(__mmask8 __M, __m128i __W) {
   __W = _mm_mask_mov_epi16(_mm_set1_epi16(-1), __M, __W);
   return __builtin_reduce_and((__v8hi)__W);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_reduce_or_epi16(__mmask8 __M, __m128i __W) {
   __W = _mm_maskz_mov_epi16(__M, __W);
   return __builtin_reduce_or((__v8hi)__W);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_reduce_max_epi16(__m128i __V) {
   return __builtin_reduce_max((__v8hi)__V);
 }
 
-static __inline__ unsigned short __DEFAULT_FN_ATTRS128
+static __inline__ unsigned short __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_reduce_max_epu16(__m128i __V) {
   return __builtin_reduce_max((__v8hu)__V);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_reduce_min_epi16(__m128i __V) {
   return __builtin_reduce_min((__v8hi)__V);
 }
 
-static __inline__ unsigned short __DEFAULT_FN_ATTRS128
+static __inline__ unsigned short __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_reduce_min_epu16(__m128i __V) {
   return __builtin_reduce_min((__v8hu)__V);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_reduce_max_epi16(__mmask16 __M, __m128i __V) {
   __V = _mm_mask_mov_epi16(_mm_set1_epi16(-32767-1), __M, __V);
   return __builtin_reduce_max((__v8hi)__V);
 }
 
-static __inline__ unsigned short __DEFAULT_FN_ATTRS128
+static __inline__ unsigned short __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_reduce_max_epu16(__mmask16 __M, __m128i __V) {
   __V = _mm_maskz_mov_epi16(__M, __V);
   return __builtin_reduce_max((__v8hu)__V);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_reduce_min_epi16(__mmask16 __M, __m128i __V) {
   __V = _mm_mask_mov_epi16(_mm_set1_epi16(32767), __M, __V);
   return __builtin_reduce_min((__v8hi)__V);
 }
 
-static __inline__ unsigned short __DEFAULT_FN_ATTRS128
+static __inline__ unsigned short __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_reduce_min_epu16(__mmask16 __M, __m128i __V) {
   __V = _mm_mask_mov_epi16(_mm_set1_epi16(-1), __M, __V);
   return __builtin_reduce_min((__v8hu)__V);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_reduce_add_epi16(__m256i __W) {
   return __builtin_reduce_add((__v16hi)__W);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_reduce_mul_epi16(__m256i __W) {
   return __builtin_reduce_mul((__v16hi)__W);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_reduce_and_epi16(__m256i __W) {
   return __builtin_reduce_and((__v16hi)__W);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_reduce_or_epi16(__m256i __W) {
   return __builtin_reduce_or((__v16hi)__W);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS256
-_mm256_mask_reduce_add_epi16( __mmask16 __M, __m256i __W) {
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_reduce_add_epi16(__mmask16 __M, __m256i __W) {
   __W = _mm256_maskz_mov_epi16(__M, __W);
   return __builtin_reduce_add((__v16hi)__W);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS256
-_mm256_mask_reduce_mul_epi16( __mmask16 __M, __m256i __W) {
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_reduce_mul_epi16(__mmask16 __M, __m256i __W) {
   __W = _mm256_mask_mov_epi16(_mm256_set1_epi16(1), __M, __W);
   return __builtin_reduce_mul((__v16hi)__W);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS256
-_mm256_mask_reduce_and_epi16( __mmask16 __M, __m256i __W) {
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_reduce_and_epi16(__mmask16 __M, __m256i __W) {
   __W = _mm256_mask_mov_epi16(_mm256_set1_epi16(-1), __M, __W);
   return __builtin_reduce_and((__v16hi)__W);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_reduce_or_epi16(__mmask16 __M, __m256i __W) {
   __W = _mm256_maskz_mov_epi16(__M, __W);
   return __builtin_reduce_or((__v16hi)__W);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_reduce_max_epi16(__m256i __V) {
   return __builtin_reduce_max((__v16hi)__V);
 }
 
-static __inline__ unsigned short __DEFAULT_FN_ATTRS256
+static __inline__ unsigned short __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_reduce_max_epu16(__m256i __V) {
   return __builtin_reduce_max((__v16hu)__V);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_reduce_min_epi16(__m256i __V) {
   return __builtin_reduce_min((__v16hi)__V);
 }
 
-static __inline__ unsigned short __DEFAULT_FN_ATTRS256
+static __inline__ unsigned short __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_reduce_min_epu16(__m256i __V) {
   return __builtin_reduce_min((__v16hu)__V);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_reduce_max_epi16(__mmask16 __M, __m256i __V) {
   __V = _mm256_mask_mov_epi16(_mm256_set1_epi16(-32767-1), __M, __V);
   return __builtin_reduce_max((__v16hi)__V);
 }
 
-static __inline__ unsigned short __DEFAULT_FN_ATTRS256
+static __inline__ unsigned short __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_reduce_max_epu16(__mmask16 __M, __m256i __V) {
   __V = _mm256_maskz_mov_epi16(__M, __V);
   return __builtin_reduce_max((__v16hu)__V);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_reduce_min_epi16(__mmask16 __M, __m256i __V) {
   __V = _mm256_mask_mov_epi16(_mm256_set1_epi16(32767), __M, __V);
   return __builtin_reduce_min((__v16hi)__V);
 }
 
-static __inline__ unsigned short __DEFAULT_FN_ATTRS256
+static __inline__ unsigned short __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_reduce_min_epu16(__mmask16 __M, __m256i __V) {
   __V = _mm256_mask_mov_epi16(_mm256_set1_epi16(-1), __M, __V);
   return __builtin_reduce_min((__v16hu)__V);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_reduce_add_epi8(__m128i __W) {
   return __builtin_reduce_add((__v16qs)__W);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_reduce_mul_epi8(__m128i __W) {
   return __builtin_reduce_mul((__v16qs)__W);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_reduce_and_epi8(__m128i __W) {
   return __builtin_reduce_and((__v16qs)__W);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_reduce_or_epi8(__m128i __W) {
   return __builtin_reduce_or((__v16qs)__W);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_reduce_add_epi8(__mmask16 __M, __m128i __W) {
   __W = _mm_maskz_mov_epi8(__M, __W);
   return __builtin_reduce_add((__v16qs)__W);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_reduce_mul_epi8(__mmask16 __M, __m128i __W) {
   __W = _mm_mask_mov_epi8(_mm_set1_epi8(1), __M, __W);
   return __builtin_reduce_mul((__v16qs)__W);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_reduce_and_epi8(__mmask16 __M, __m128i __W) {
   __W = _mm_mask_mov_epi8(_mm_set1_epi8(-1), __M, __W);
   return __builtin_reduce_and((__v16qs)__W);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_reduce_or_epi8(__mmask16 __M, __m128i __W) {
   __W = _mm_maskz_mov_epi8(__M, __W);
   return __builtin_reduce_or((__v16qs)__W);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_reduce_max_epi8(__m128i __V) {
   return __builtin_reduce_max((__v16qs)__V);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS128
+static __inline__ unsigned char __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_reduce_max_epu8(__m128i __V) {
   return __builtin_reduce_max((__v16qu)__V);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_reduce_min_epi8(__m128i __V) {
   return __builtin_reduce_min((__v16qs)__V);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS128
+static __inline__ unsigned char __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_reduce_min_epu8(__m128i __V) {
   return __builtin_reduce_min((__v16qu)__V);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_reduce_max_epi8(__mmask16 __M, __m128i __V) {
   __V = _mm_mask_mov_epi8(_mm_set1_epi8(-127-1), __M, __V);
   return __builtin_reduce_max((__v16qs)__V);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS128
+static __inline__ unsigned char __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_reduce_max_epu8(__mmask16 __M, __m128i __V) {
   __V = _mm_maskz_mov_epi8(__M, __V);
   return __builtin_reduce_max((__v16qu)__V);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_reduce_min_epi8(__mmask16 __M, __m128i __V) {
   __V = _mm_mask_mov_epi8(_mm_set1_epi8(127), __M, __V);
   return __builtin_reduce_min((__v16qs)__V);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS128
+static __inline__ unsigned char __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_reduce_min_epu8(__mmask16 __M, __m128i __V) {
   __V = _mm_mask_mov_epi8(_mm_set1_epi8(-1), __M, __V);
   return __builtin_reduce_min((__v16qu)__V);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_reduce_add_epi8(__m256i __W) {
   return __builtin_reduce_add((__v32qs)__W);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_reduce_mul_epi8(__m256i __W) {
   return __builtin_reduce_mul((__v32qs)__W);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_reduce_and_epi8(__m256i __W) {
   return __builtin_reduce_and((__v32qs)__W);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_reduce_or_epi8(__m256i __W) {
   return __builtin_reduce_or((__v32qs)__W);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_reduce_add_epi8(__mmask32 __M, __m256i __W) {
   __W = _mm256_maskz_mov_epi8(__M, __W);
   return __builtin_reduce_add((__v32qs)__W);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_reduce_mul_epi8(__mmask32 __M, __m256i __W) {
   __W = _mm256_mask_mov_epi8(_mm256_set1_epi8(1), __M, __W);
   return __builtin_reduce_mul((__v32qs)__W);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_reduce_and_epi8(__mmask32 __M, __m256i __W) {
   __W = _mm256_mask_mov_epi8(_mm256_set1_epi8(-1), __M, __W);
   return __builtin_reduce_and((__v32qs)__W);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_reduce_or_epi8(__mmask32 __M, __m256i __W) {
   __W = _mm256_maskz_mov_epi8(__M, __W);
   return __builtin_reduce_or((__v32qs)__W);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_reduce_max_epi8(__m256i __V) {
   return __builtin_reduce_max((__v32qs)__V);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS256
+static __inline__ unsigned char __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_reduce_max_epu8(__m256i __V) {
   return __builtin_reduce_max((__v32qu)__V);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_reduce_min_epi8(__m256i __V) {
   return __builtin_reduce_min((__v32qs)__V);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS256
+static __inline__ unsigned char __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_reduce_min_epu8(__m256i __V) {
   return __builtin_reduce_min((__v32qu)__V);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_reduce_max_epi8(__mmask32 __M, __m256i __V) {
   __V = _mm256_mask_mov_epi8(_mm256_set1_epi8(-127-1), __M, __V);
   return __builtin_reduce_max((__v32qs)__V);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS256
+static __inline__ unsigned char __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_reduce_max_epu8(__mmask32 __M, __m256i __V) {
   __V = _mm256_maskz_mov_epi8(__M, __V);
   return __builtin_reduce_max((__v32qu)__V);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_reduce_min_epi8(__mmask32 __M, __m256i __V) {
   __V = _mm256_mask_mov_epi8(_mm256_set1_epi8(127), __M, __V);
   return __builtin_reduce_min((__v32qs)__V);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS256
+static __inline__ unsigned char __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_reduce_min_epu8(__mmask32 __M, __m256i __V) {
   __V = _mm256_mask_mov_epi8(_mm256_set1_epi8(-1), __M, __V);
   return __builtin_reduce_min((__v32qu)__V);
@@ -3163,5 +3058,7 @@ _mm256_mask_reduce_min_epu8(__mmask32 __M, __m256i __V) {
 
 #undef __DEFAULT_FN_ATTRS128
 #undef __DEFAULT_FN_ATTRS256
+#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
+#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
 
 #endif /* __AVX512VLBWINTRIN_H */
diff --git a/lib/include/avx512vlcdintrin.h b/lib/include/avx512vlcdintrin.h
index 923e2c551a..df66e1df3b 100644
--- a/lib/include/avx512vlcdintrin.h
+++ b/lib/include/avx512vlcdintrin.h
@@ -14,211 +14,183 @@
 #define __AVX512VLCDINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
 #define __DEFAULT_FN_ATTRS128                                                  \
   __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx512vl,avx512cd,no-evex512"),                   \
-                 __min_vector_width__(128)))
+                 __target__("avx512vl,avx512cd"),                              \
+                 __min_vector_width__(128))) constexpr
 #define __DEFAULT_FN_ATTRS256                                                  \
   __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx512vl,avx512cd,no-evex512"),                   \
-                 __min_vector_width__(256)))
+                 __target__("avx512vl,avx512cd"),                              \
+                 __min_vector_width__(256))) constexpr
+#else
+#define __DEFAULT_FN_ATTRS128                                                  \
+  __attribute__((__always_inline__, __nodebug__,                               \
+                 __target__("avx512vl,avx512cd"), __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS256                                                  \
+  __attribute__((__always_inline__, __nodebug__,                               \
+                 __target__("avx512vl,avx512cd"), __min_vector_width__(256)))
+#endif
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_broadcastmb_epi64 (__mmask8 __A)
-{
+_mm_broadcastmb_epi64(__mmask8 __A) {
   return (__m128i) _mm_set1_epi64x((long long) __A);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_broadcastmb_epi64 (__mmask8 __A)
-{
-  return (__m256i) _mm256_set1_epi64x((long long)__A);
+_mm256_broadcastmb_epi64(__mmask8 __A) {
+  return (__m256i)_mm256_set1_epi64x((long long)__A);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_broadcastmw_epi32 (__mmask16 __A)
-{
+_mm_broadcastmw_epi32(__mmask16 __A) {
   return (__m128i) _mm_set1_epi32((int)__A);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_broadcastmw_epi32 (__mmask16 __A)
-{
+_mm256_broadcastmw_epi32(__mmask16 __A) {
   return (__m256i) _mm256_set1_epi32((int)__A);
 }
 
-
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_conflict_epi64 (__m128i __A)
-{
-  return (__m128i) __builtin_ia32_vpconflictdi_128 ((__v2di) __A);
+_mm_conflict_epi64(__m128i __A) {
+  return (__m128i)__builtin_ia32_vpconflictdi_128((__v2di)__A);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
-{
-  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
-                                             (__v2di)_mm_conflict_epi64(__A),
-                                             (__v2di)__W);
+_mm_mask_conflict_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
+  return (__m128i)__builtin_ia32_selectq_128(
+      (__mmask8)__U, (__v2di)_mm_conflict_epi64(__A), (__v2di)__W);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A)
-{
+_mm_maskz_conflict_epi64(__mmask8 __U, __m128i __A) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
                                              (__v2di)_mm_conflict_epi64(__A),
                                              (__v2di)_mm_setzero_si128());
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_conflict_epi64 (__m256i __A)
-{
-  return (__m256i) __builtin_ia32_vpconflictdi_256 ((__v4di) __A);
+_mm256_conflict_epi64(__m256i __A) {
+  return (__m256i)__builtin_ia32_vpconflictdi_256((__v4di)__A);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
-{
-  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
-                                             (__v4di)_mm256_conflict_epi64(__A),
-                                             (__v4di)__W);
+_mm256_mask_conflict_epi64(__m256i __W, __mmask8 __U, __m256i __A) {
+  return (__m256i)__builtin_ia32_selectq_256(
+      (__mmask8)__U, (__v4di)_mm256_conflict_epi64(__A), (__v4di)__W);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A)
-{
+_mm256_maskz_conflict_epi64(__mmask8 __U, __m256i __A) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
                                              (__v4di)_mm256_conflict_epi64(__A),
                                              (__v4di)_mm256_setzero_si256());
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_conflict_epi32 (__m128i __A)
-{
-  return (__m128i) __builtin_ia32_vpconflictsi_128 ((__v4si) __A);
+_mm_conflict_epi32(__m128i __A) {
+  return (__m128i)__builtin_ia32_vpconflictsi_128((__v4si)__A);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
-{
-  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
-                                             (__v4si)_mm_conflict_epi32(__A),
-                                             (__v4si)__W);
+_mm_mask_conflict_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
+  return (__m128i)__builtin_ia32_selectd_128(
+      (__mmask8)__U, (__v4si)_mm_conflict_epi32(__A), (__v4si)__W);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A)
-{
+_mm_maskz_conflict_epi32(__mmask8 __U, __m128i __A) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
                                              (__v4si)_mm_conflict_epi32(__A),
                                              (__v4si)_mm_setzero_si128());
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_conflict_epi32 (__m256i __A)
-{
-  return (__m256i) __builtin_ia32_vpconflictsi_256 ((__v8si) __A);
+_mm256_conflict_epi32(__m256i __A) {
+  return (__m256i)__builtin_ia32_vpconflictsi_256((__v8si)__A);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
-{
-  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
-                                             (__v8si)_mm256_conflict_epi32(__A),
-                                             (__v8si)__W);
+_mm256_mask_conflict_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
+  return (__m256i)__builtin_ia32_selectd_256(
+      (__mmask8)__U, (__v8si)_mm256_conflict_epi32(__A), (__v8si)__W);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A)
-{
+_mm256_maskz_conflict_epi32(__mmask8 __U, __m256i __A) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                              (__v8si)_mm256_conflict_epi32(__A),
                                              (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_lzcnt_epi32 (__m128i __A)
-{
-  return (__m128i) __builtin_ia32_vplzcntd_128 ((__v4si) __A);
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_lzcnt_epi32(__m128i __A) {
+  return (__m128i)__builtin_elementwise_clzg((__v4si)__A,
+                                             (__v4si)_mm_set1_epi32(32));
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_lzcnt_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
-{
-  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
-                                             (__v4si)_mm_lzcnt_epi32(__A),
-                                             (__v4si)__W);
+_mm_mask_lzcnt_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
+  return (__m128i)__builtin_ia32_selectd_128(
+      (__mmask8)__U, (__v4si)_mm_lzcnt_epi32(__A), (__v4si)__W);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_lzcnt_epi32 (__mmask8 __U, __m128i __A)
-{
-  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
-                                             (__v4si)_mm_lzcnt_epi32(__A),
-                                             (__v4si)_mm_setzero_si128());
+_mm_maskz_lzcnt_epi32(__mmask8 __U, __m128i __A) {
+  return (__m128i)__builtin_ia32_selectd_128(
+      (__mmask8)__U, (__v4si)_mm_lzcnt_epi32(__A), (__v4si)_mm_setzero_si128());
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_lzcnt_epi32 (__m256i __A)
-{
-  return (__m256i) __builtin_ia32_vplzcntd_256 ((__v8si) __A);
+_mm256_lzcnt_epi32(__m256i __A) {
+  return (__m256i)__builtin_elementwise_clzg((__v8si)__A,
+                                             (__v8si)_mm256_set1_epi32(32));
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_lzcnt_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
-{
-  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
-                                             (__v8si)_mm256_lzcnt_epi32(__A),
-                                             (__v8si)__W);
+_mm256_mask_lzcnt_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
+  return (__m256i)__builtin_ia32_selectd_256(
+      (__mmask8)__U, (__v8si)_mm256_lzcnt_epi32(__A), (__v8si)__W);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_lzcnt_epi32 (__mmask8 __U, __m256i __A)
-{
+_mm256_maskz_lzcnt_epi32(__mmask8 __U, __m256i __A) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                              (__v8si)_mm256_lzcnt_epi32(__A),
                                              (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_lzcnt_epi64 (__m128i __A)
-{
-  return (__m128i) __builtin_ia32_vplzcntq_128 ((__v2di) __A);
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_lzcnt_epi64(__m128i __A) {
+  return (__m128i)__builtin_elementwise_clzg(
+      (__v2di)__A, (__v2di)_mm_set1_epi64x((long long)64));
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_lzcnt_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
-{
-  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
-                                             (__v2di)_mm_lzcnt_epi64(__A),
-                                             (__v2di)__W);
+_mm_mask_lzcnt_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
+  return (__m128i)__builtin_ia32_selectq_128(
+      (__mmask8)__U, (__v2di)_mm_lzcnt_epi64(__A), (__v2di)__W);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_lzcnt_epi64 (__mmask8 __U, __m128i __A)
-{
-  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
-                                             (__v2di)_mm_lzcnt_epi64(__A),
-                                             (__v2di)_mm_setzero_si128());
+_mm_maskz_lzcnt_epi64(__mmask8 __U, __m128i __A) {
+  return (__m128i)__builtin_ia32_selectq_128(
+      (__mmask8)__U, (__v2di)_mm_lzcnt_epi64(__A), (__v2di)_mm_setzero_si128());
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_lzcnt_epi64 (__m256i __A)
-{
-  return (__m256i) __builtin_ia32_vplzcntq_256 ((__v4di) __A);
+_mm256_lzcnt_epi64(__m256i __A) {
+  return (__m256i)__builtin_elementwise_clzg(
+      (__v4di)__A, (__v4di)_mm256_set1_epi64x((long long)64));
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_lzcnt_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
-{
-  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
-                                             (__v4di)_mm256_lzcnt_epi64(__A),
-                                             (__v4di)__W);
+_mm256_mask_lzcnt_epi64(__m256i __W, __mmask8 __U, __m256i __A) {
+  return (__m256i)__builtin_ia32_selectq_256(
+      (__mmask8)__U, (__v4di)_mm256_lzcnt_epi64(__A), (__v4di)__W);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_lzcnt_epi64 (__mmask8 __U, __m256i __A)
-{
+_mm256_maskz_lzcnt_epi64(__mmask8 __U, __m256i __A) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
                                              (__v4di)_mm256_lzcnt_epi64(__A),
                                              (__v4di)_mm256_setzero_si256());
diff --git a/lib/include/avx512vldqintrin.h b/lib/include/avx512vldqintrin.h
index 272cdd89e2..cd1effdec2 100644
--- a/lib/include/avx512vldqintrin.h
+++ b/lib/include/avx512vldqintrin.h
@@ -17,45 +17,51 @@
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS128                                                  \
   __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx512vl,avx512dq,no-evex512"),                   \
-                 __min_vector_width__(128)))
+                 __target__("avx512vl,avx512dq"), __min_vector_width__(128)))
 #define __DEFAULT_FN_ATTRS256                                                  \
   __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx512vl,avx512dq,no-evex512"),                   \
-                 __min_vector_width__(256)))
+                 __target__("avx512vl,avx512dq"), __min_vector_width__(256)))
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mullo_epi64 (__m256i __A, __m256i __B) {
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
+#else
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
+#endif
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm256_mullo_epi64(__m256i __A, __m256i __B) {
   return (__m256i) ((__v4du) __A * (__v4du) __B);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm256_mask_mullo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
                                              (__v4di)_mm256_mullo_epi64(__A, __B),
                                              (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm256_maskz_mullo_epi64(__mmask8 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
                                              (__v4di)_mm256_mullo_epi64(__A, __B),
                                              (__v4di)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mullo_epi64 (__m128i __A, __m128i __B) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mullo_epi64(__m128i __A, __m128i __B) {
   return (__m128i) ((__v2du) __A * (__v2du) __B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_mullo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
                                              (__v2di)_mm_mullo_epi64(__A, __B),
                                              (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_mullo_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
                                              (__v2di)_mm_mullo_epi64(__A, __B),
@@ -454,39 +460,39 @@ _mm256_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) {
                 (__mmask8) __U);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_cvtepi64_pd (__m128i __A) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_cvtepi64_pd(__m128i __A) {
   return (__m128d)__builtin_convertvector((__v2di)__A, __v2df);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_mask_cvtepi64_pd (__m128d __W, __mmask8 __U, __m128i __A) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_cvtepi64_pd(__m128d __W, __mmask8 __U, __m128i __A) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_cvtepi64_pd(__A),
                                               (__v2df)__W);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_cvtepi64_pd(__mmask8 __U, __m128i __A) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_cvtepi64_pd(__A),
                                               (__v2df)_mm_setzero_pd());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_cvtepi64_pd (__m256i __A) {
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_cvtepi64_pd(__m256i __A) {
   return (__m256d)__builtin_convertvector((__v4di)__A, __v4df);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_mask_cvtepi64_pd (__m256d __W, __mmask8 __U, __m256i __A) {
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_cvtepi64_pd(__m256d __W, __mmask8 __U, __m256i __A) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                               (__v4df)_mm256_cvtepi64_pd(__A),
                                               (__v4df)__W);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_maskz_cvtepi64_pd (__mmask8 __U, __m256i __A) {
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_cvtepi64_pd(__mmask8 __U, __m256i __A) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                               (__v4df)_mm256_cvtepi64_pd(__A),
                                               (__v4df)_mm256_setzero_pd());
@@ -513,20 +519,20 @@ _mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A) {
                 (__mmask8) __U);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS256
-_mm256_cvtepi64_ps (__m256i __A) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_cvtepi64_ps(__m256i __A) {
   return (__m128)__builtin_convertvector((__v4di)__A, __v4sf);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS256
-_mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_cvtepi64_ps(__m128 __W, __mmask8 __U, __m256i __A) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm256_cvtepi64_ps(__A),
                                              (__v4sf)__W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS256
-_mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_cvtepi64_ps(__mmask8 __U, __m256i __A) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm256_cvtepi64_ps(__A),
                                              (__v4sf)_mm_setzero_ps());
@@ -700,39 +706,39 @@ _mm256_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) {
                 (__mmask8) __U);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_cvtepu64_pd (__m128i __A) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_cvtepu64_pd(__m128i __A) {
   return (__m128d)__builtin_convertvector((__v2du)__A, __v2df);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_mask_cvtepu64_pd (__m128d __W, __mmask8 __U, __m128i __A) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_cvtepu64_pd(__m128d __W, __mmask8 __U, __m128i __A) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_cvtepu64_pd(__A),
                                               (__v2df)__W);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_cvtepu64_pd(__mmask8 __U, __m128i __A) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_cvtepu64_pd(__A),
                                               (__v2df)_mm_setzero_pd());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_cvtepu64_pd (__m256i __A) {
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_cvtepu64_pd(__m256i __A) {
   return (__m256d)__builtin_convertvector((__v4du)__A, __v4df);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_mask_cvtepu64_pd (__m256d __W, __mmask8 __U, __m256i __A) {
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_cvtepu64_pd(__m256d __W, __mmask8 __U, __m256i __A) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                               (__v4df)_mm256_cvtepu64_pd(__A),
                                               (__v4df)__W);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_maskz_cvtepu64_pd (__mmask8 __U, __m256i __A) {
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_cvtepu64_pd(__mmask8 __U, __m256i __A) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                               (__v4df)_mm256_cvtepu64_pd(__A),
                                               (__v4df)_mm256_setzero_pd());
@@ -759,20 +765,20 @@ _mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A) {
                 (__mmask8) __U);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS256
-_mm256_cvtepu64_ps (__m256i __A) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_cvtepu64_ps(__m256i __A) {
   return (__m128)__builtin_convertvector((__v4du)__A, __v4sf);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS256
-_mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_cvtepu64_ps(__m128 __W, __mmask8 __U, __m256i __A) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm256_cvtepu64_ps(__A),
                                              (__v4sf)__W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS256
-_mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_cvtepu64_ps(__mmask8 __U, __m256i __A) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm256_cvtepu64_ps(__A),
                                              (__v4sf)_mm_setzero_ps());
@@ -908,174 +914,150 @@ _mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A) {
                                            (__v8sf)_mm256_setzero_ps(), \
                                            (__mmask8)(U)))
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
-_mm_movepi32_mask (__m128i __A)
-{
+static __inline__ __mmask8
+    __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_movepi32_mask(__m128i __A) {
   return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A);
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
-_mm256_movepi32_mask (__m256i __A)
-{
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_movepi32_mask(__m256i __A) {
   return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_movm_epi32 (__mmask8 __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_movm_epi32(__mmask8 __A) {
   return (__m128i) __builtin_ia32_cvtmask2d128 (__A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_movm_epi32 (__mmask8 __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_movm_epi32(__mmask8 __A) {
   return (__m256i) __builtin_ia32_cvtmask2d256 (__A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_movm_epi64 (__mmask8 __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_movm_epi64(__mmask8 __A) {
   return (__m128i) __builtin_ia32_cvtmask2q128 (__A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_movm_epi64 (__mmask8 __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_movm_epi64(__mmask8 __A) {
   return (__m256i) __builtin_ia32_cvtmask2q256 (__A);
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
-_mm_movepi64_mask (__m128i __A)
-{
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_movepi64_mask(__m128i __A) {
   return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A);
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
-_mm256_movepi64_mask (__m256i __A)
-{
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_movepi64_mask(__m256i __A) {
   return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_broadcast_f32x2 (__m128 __A)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_broadcast_f32x2(__m128 __A) {
   return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
                                          0, 1, 0, 1, 0, 1, 0, 1);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_broadcast_f32x2(__m256 __O, __mmask8 __M, __m128 __A) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
                                              (__v8sf)_mm256_broadcast_f32x2(__A),
                                              (__v8sf)__O);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_broadcast_f32x2(__mmask8 __M, __m128 __A) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
                                              (__v8sf)_mm256_broadcast_f32x2(__A),
                                              (__v8sf)_mm256_setzero_ps());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_broadcast_f64x2(__m128d __A)
-{
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_broadcast_f64x2(__m128d __A) {
   return (__m256d)__builtin_shufflevector((__v2df)__A, (__v2df)__A,
                                           0, 1, 0, 1);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, __m128d __A)
-{
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, __m128d __A) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M,
                                             (__v4df)_mm256_broadcast_f64x2(__A),
                                             (__v4df)__O);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
-{
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M,
                                             (__v4df)_mm256_broadcast_f64x2(__A),
                                             (__v4df)_mm256_setzero_pd());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_broadcast_i32x2 (__m128i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_broadcast_i32x2(__m128i __A) {
   return (__m128i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
                                           0, 1, 0, 1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_broadcast_i32x2(__m128i __O, __mmask8 __M, __m128i __A) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
                                              (__v4si)_mm_broadcast_i32x2(__A),
                                              (__v4si)__O);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_broadcast_i32x2(__mmask8 __M, __m128i __A) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
                                              (__v4si)_mm_broadcast_i32x2(__A),
                                              (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_broadcast_i32x2 (__m128i __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_broadcast_i32x2(__m128i __A) {
   return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
                                           0, 1, 0, 1, 0, 1, 0, 1);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_broadcast_i32x2(__m256i __O, __mmask8 __M, __m128i __A) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
                                              (__v8si)_mm256_broadcast_i32x2(__A),
                                              (__v8si)__O);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_broadcast_i32x2(__mmask8 __M, __m128i __A) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
                                              (__v8si)_mm256_broadcast_i32x2(__A),
                                              (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_broadcast_i64x2(__m128i __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_broadcast_i64x2(__m128i __A) {
   return (__m256i)__builtin_shufflevector((__v2di)__A, (__v2di)__A,
                                           0, 1, 0, 1);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i __A) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
                                             (__v4di)_mm256_broadcast_i64x2(__A),
                                             (__v4di)__O);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
                                             (__v4di)_mm256_broadcast_i64x2(__A),
                                             (__v4di)_mm256_setzero_si256());
 }
 
-#define _mm256_extractf64x2_pd(A, imm) \
-  ((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
-                                                 (int)(imm), \
-                                                 (__v2df)_mm_undefined_pd(), \
-                                                 (__mmask8)-1))
+#define _mm256_extractf64x2_pd(A, imm)                                         \
+  ((__m128d)__builtin_ia32_extractf64x2_256_mask(                              \
+      (__v4df)(__m256d)(A), (int)(imm), (__v2df)_mm_setzero_pd(),              \
+      (__mmask8) - 1))
 
 #define _mm256_mask_extractf64x2_pd(W, U, A, imm) \
   ((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
@@ -1089,11 +1071,10 @@ _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
                                                  (__v2df)_mm_setzero_pd(), \
                                                  (__mmask8)(U)))
 
-#define _mm256_extracti64x2_epi64(A, imm) \
-  ((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
-                                                (int)(imm), \
-                                                (__v2di)_mm_undefined_si128(), \
-                                                (__mmask8)-1))
+#define _mm256_extracti64x2_epi64(A, imm)                                      \
+  ((__m128i)__builtin_ia32_extracti64x2_256_mask(                              \
+      (__v4di)(__m256i)(A), (int)(imm), (__v2di)_mm_setzero_si128(),           \
+      (__mmask8) - 1))
 
 #define _mm256_mask_extracti64x2_epi64(W, U, A, imm) \
   ((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
@@ -1169,5 +1150,7 @@ _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
 
 #undef __DEFAULT_FN_ATTRS128
 #undef __DEFAULT_FN_ATTRS256
+#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
+#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
 
 #endif
diff --git a/lib/include/avx512vlfp16intrin.h b/lib/include/avx512vlfp16intrin.h
index a12acb7d9a..4f9c7cb79e 100644
--- a/lib/include/avx512vlfp16intrin.h
+++ b/lib/include/avx512vlfp16intrin.h
@@ -19,51 +19,64 @@
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS256                                                  \
   __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx512fp16,avx512vl,no-evex512"),                 \
+                 __target__("avx512fp16,avx512vl"),                            \
                  __min_vector_width__(256)))
 #define __DEFAULT_FN_ATTRS128                                                  \
   __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx512fp16,avx512vl,no-evex512"),                 \
+                 __target__("avx512fp16,avx512vl"),                            \
                  __min_vector_width__(128)))
 
-static __inline__ _Float16 __DEFAULT_FN_ATTRS128 _mm_cvtsh_h(__m128h __a) {
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
+#else
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
+#endif
+
+static __inline__ _Float16 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_cvtsh_h(__m128h __a) {
   return __a[0];
 }
 
-static __inline__ _Float16 __DEFAULT_FN_ATTRS256 _mm256_cvtsh_h(__m256h __a) {
+static __inline__ _Float16 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_cvtsh_h(__m256h __a) {
   return __a[0];
 }
 
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_set_sh(_Float16 __h) {
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_set_sh(_Float16 __h) {
   return __extension__(__m128h){__h, 0, 0, 0, 0, 0, 0, 0};
 }
 
-static __inline __m128h __DEFAULT_FN_ATTRS128 _mm_set1_ph(_Float16 __h) {
+static __inline __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_set1_ph(_Float16 __h) {
   return (__m128h)(__v8hf){__h, __h, __h, __h, __h, __h, __h, __h};
 }
 
-static __inline __m256h __DEFAULT_FN_ATTRS256 _mm256_set1_ph(_Float16 __h) {
+static __inline __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_set1_ph(_Float16 __h) {
   return (__m256h)(__v16hf){__h, __h, __h, __h, __h, __h, __h, __h,
                             __h, __h, __h, __h, __h, __h, __h, __h};
 }
 
-static __inline __m128h __DEFAULT_FN_ATTRS128
+static __inline __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4,
            _Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8) {
   return (__m128h)(__v8hf){__h8, __h7, __h6, __h5, __h4, __h3, __h2, __h1};
 }
 
-static __inline __m256h __DEFAULT_FN_ATTRS256
+static __inline __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_set1_pch(_Float16 _Complex h) {
   return (__m256h)_mm256_set1_ps(__builtin_bit_cast(float, h));
 }
 
-static __inline __m128h __DEFAULT_FN_ATTRS128
+static __inline __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_set1_pch(_Float16 _Complex h) {
   return (__m128h)_mm_set1_ps(__builtin_bit_cast(float, h));
 }
 
-static __inline __m256h __DEFAULT_FN_ATTRS256
+static __inline __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4,
               _Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8,
               _Float16 __h9, _Float16 __h10, _Float16 __h11, _Float16 __h12,
@@ -73,13 +86,20 @@ _mm256_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4,
                             __h4,  __h3,  __h2,  __h1};
 }
 
-#define _mm_setr_ph(h1, h2, h3, h4, h5, h6, h7, h8)                            \
-  _mm_set_ph((h8), (h7), (h6), (h5), (h4), (h3), (h2), (h1))
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_setr_ph(_Float16 e0, _Float16 e1, _Float16 e2, _Float16 e3, _Float16 e4,
+            _Float16 e5, _Float16 e6, _Float16 e7) {
+  return _mm_set_ph(e7, e6, e5, e4, e3, e2, e1, e0);
+}
 
-#define _mm256_setr_ph(h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11, h12, h13, \
-                       h14, h15, h16)                                          \
-  _mm256_set_ph((h16), (h15), (h14), (h13), (h12), (h11), (h10), (h9), (h8),   \
-                (h7), (h6), (h5), (h4), (h3), (h2), (h1))
+static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_setr_ph(_Float16 e0, _Float16 e1, _Float16 e2, _Float16 e3, _Float16 e4,
+               _Float16 e5, _Float16 e6, _Float16 e7, _Float16 e8, _Float16 e9,
+               _Float16 e10, _Float16 e11, _Float16 e12, _Float16 e13,
+               _Float16 e14, _Float16 e15) {
+  return _mm256_set_ph(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3,
+                       e2, e1, e0);
+}
 
 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_add_ph(__m256h __A,
                                                               __m256h __B) {
@@ -229,12 +249,12 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_div_ph(__mmask8 __U,
                                               (__v8hf)_mm_setzero_ph());
 }
 
-static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_min_ph(__m256h __A,
-                                                              __m256h __B) {
+static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_min_ph(__m256h __A, __m256h __B) {
   return (__m256h)__builtin_ia32_minph256((__v16hf)__A, (__v16hf)__B);
 }
 
-static __inline__ __m256h __DEFAULT_FN_ATTRS256
+static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_min_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
   return (__m256h)__builtin_ia32_selectph_256(
       (__mmask16)__U,
@@ -242,7 +262,7 @@ _mm256_mask_min_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
       (__v16hf)__W);
 }
 
-static __inline__ __m256h __DEFAULT_FN_ATTRS256
+static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_min_ph(__mmask16 __U, __m256h __A, __m256h __B) {
   return (__m256h)__builtin_ia32_selectph_256(
       (__mmask16)__U,
@@ -250,34 +270,31 @@ _mm256_maskz_min_ph(__mmask16 __U, __m256h __A, __m256h __B) {
       (__v16hf)_mm256_setzero_ph());
 }
 
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_min_ph(__m128h __A,
-                                                           __m128h __B) {
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_min_ph(__m128h __A, __m128h __B) {
   return (__m128h)__builtin_ia32_minph128((__v8hf)__A, (__v8hf)__B);
 }
 
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_min_ph(__m128h __W,
-                                                                __mmask8 __U,
-                                                                __m128h __A,
-                                                                __m128h __B) {
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_min_ph(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
   return (__m128h)__builtin_ia32_selectph_128(
       (__mmask8)__U, (__v8hf)__builtin_ia32_minph128((__v8hf)__A, (__v8hf)__B),
       (__v8hf)__W);
 }
 
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_min_ph(__mmask8 __U,
-                                                                 __m128h __A,
-                                                                 __m128h __B) {
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_min_ph(__mmask8 __U, __m128h __A, __m128h __B) {
   return (__m128h)__builtin_ia32_selectph_128(
       (__mmask8)__U, (__v8hf)__builtin_ia32_minph128((__v8hf)__A, (__v8hf)__B),
       (__v8hf)_mm_setzero_ph());
 }
 
-static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_max_ph(__m256h __A,
-                                                              __m256h __B) {
+static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_max_ph(__m256h __A, __m256h __B) {
   return (__m256h)__builtin_ia32_maxph256((__v16hf)__A, (__v16hf)__B);
 }
 
-static __inline__ __m256h __DEFAULT_FN_ATTRS256
+static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_max_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
   return (__m256h)__builtin_ia32_selectph_256(
       (__mmask16)__U,
@@ -285,7 +302,7 @@ _mm256_mask_max_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
       (__v16hf)__W);
 }
 
-static __inline__ __m256h __DEFAULT_FN_ATTRS256
+static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_max_ph(__mmask16 __U, __m256h __A, __m256h __B) {
   return (__m256h)__builtin_ia32_selectph_256(
       (__mmask16)__U,
@@ -293,33 +310,32 @@ _mm256_maskz_max_ph(__mmask16 __U, __m256h __A, __m256h __B) {
       (__v16hf)_mm256_setzero_ph());
 }
 
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_max_ph(__m128h __A,
-                                                           __m128h __B) {
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_max_ph(__m128h __A, __m128h __B) {
   return (__m128h)__builtin_ia32_maxph128((__v8hf)__A, (__v8hf)__B);
 }
 
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_max_ph(__m128h __W,
-                                                                __mmask8 __U,
-                                                                __m128h __A,
-                                                                __m128h __B) {
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_max_ph(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
   return (__m128h)__builtin_ia32_selectph_128(
       (__mmask8)__U, (__v8hf)__builtin_ia32_maxph128((__v8hf)__A, (__v8hf)__B),
       (__v8hf)__W);
 }
 
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_max_ph(__mmask8 __U,
-                                                                 __m128h __A,
-                                                                 __m128h __B) {
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_max_ph(__mmask8 __U, __m128h __A, __m128h __B) {
   return (__m128h)__builtin_ia32_selectph_128(
       (__mmask8)__U, (__v8hf)__builtin_ia32_maxph128((__v8hf)__A, (__v8hf)__B),
       (__v8hf)_mm_setzero_ph());
 }
 
-static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_abs_ph(__m256h __A) {
+static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_abs_ph(__m256h __A) {
   return (__m256h)_mm256_and_epi32(_mm256_set1_epi32(0x7FFF7FFF), (__m256i)__A);
 }
 
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_abs_ph(__m128h __A) {
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_abs_ph(__m128h __A) {
   return (__m128h)_mm_and_epi32(_mm_set1_epi32(0x7FFF7FFF), (__m128i)__A);
 }
 
@@ -601,7 +617,7 @@ _mm256_maskz_scalef_ph(__mmask16 __U, __m256h __A, __m256h __B) {
                                             (__mmask16)(U)))
 
 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_sqrt_ph(__m128h __a) {
-  return __builtin_ia32_sqrtph((__v8hf)__a);
+  return __builtin_elementwise_sqrt(__a);
 }
 
 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_ph(__m128h __W,
@@ -618,7 +634,7 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_ph(__mmask8 __U,
 }
 
 static __inline __m256h __DEFAULT_FN_ATTRS256 _mm256_sqrt_ph(__m256h __a) {
-  return (__m256h)__builtin_ia32_sqrtph256((__v16hf)__a);
+  return __builtin_elementwise_sqrt(__a);
 }
 
 static __inline__ __m256h __DEFAULT_FN_ATTRS256
@@ -790,34 +806,35 @@ _mm256_maskz_cvttph_epi16(__mmask16 __U, __m256h __A) {
       (__v16hf)__A, (__v16hi)_mm256_setzero_si256(), (__mmask16)__U);
 }
 
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepi16_ph(__m128i __A) {
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_cvtepi16_ph(__m128i __A) {
   return (__m128h) __builtin_convertvector((__v8hi)__A, __v8hf);
 }
 
-static __inline__ __m128h __DEFAULT_FN_ATTRS128
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_cvtepi16_ph(__m128h __W, __mmask8 __U, __m128i __A) {
   return (__m128h)__builtin_ia32_selectph_128(
       (__mmask8)__U, (__v8hf)_mm_cvtepi16_ph(__A), (__v8hf)__W);
 }
 
-static __inline__ __m128h __DEFAULT_FN_ATTRS128
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_cvtepi16_ph(__mmask8 __U, __m128i __A) {
   return (__m128h)__builtin_ia32_selectph_128(
       (__mmask8)__U, (__v8hf)_mm_cvtepi16_ph(__A), (__v8hf)_mm_setzero_ph());
 }
 
-static __inline__ __m256h __DEFAULT_FN_ATTRS256
+static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_cvtepi16_ph(__m256i __A) {
   return (__m256h) __builtin_convertvector((__v16hi)__A, __v16hf);
 }
 
-static __inline__ __m256h __DEFAULT_FN_ATTRS256
+static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_cvtepi16_ph(__m256h __W, __mmask16 __U, __m256i __A) {
   return (__m256h)__builtin_ia32_selectph_256(
       (__mmask16)__U, (__v16hf)_mm256_cvtepi16_ph(__A), (__v16hf)__W);
 }
 
-static __inline__ __m256h __DEFAULT_FN_ATTRS256
+static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_cvtepi16_ph(__mmask16 __U, __m256i __A) {
   return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U,
                                               (__v16hf)_mm256_cvtepi16_ph(__A),
@@ -894,34 +911,35 @@ _mm256_maskz_cvttph_epu16(__mmask16 __U, __m256h __A) {
       (__v16hf)__A, (__v16hu)_mm256_setzero_si256(), (__mmask16)__U);
 }
 
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepu16_ph(__m128i __A) {
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_cvtepu16_ph(__m128i __A) {
   return (__m128h) __builtin_convertvector((__v8hu)__A, __v8hf);
 }
 
-static __inline__ __m128h __DEFAULT_FN_ATTRS128
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_cvtepu16_ph(__m128h __W, __mmask8 __U, __m128i __A) {
   return (__m128h)__builtin_ia32_selectph_128(
       (__mmask8)__U, (__v8hf)_mm_cvtepu16_ph(__A), (__v8hf)__W);
 }
 
-static __inline__ __m128h __DEFAULT_FN_ATTRS128
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_cvtepu16_ph(__mmask8 __U, __m128i __A) {
   return (__m128h)__builtin_ia32_selectph_128(
       (__mmask8)__U, (__v8hf)_mm_cvtepu16_ph(__A), (__v8hf)_mm_setzero_ph());
 }
 
-static __inline__ __m256h __DEFAULT_FN_ATTRS256
+static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_cvtepu16_ph(__m256i __A) {
   return (__m256h) __builtin_convertvector((__v16hu)__A, __v16hf);
 }
 
-static __inline__ __m256h __DEFAULT_FN_ATTRS256
+static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_cvtepu16_ph(__m256h __W, __mmask16 __U, __m256i __A) {
   return (__m256h)__builtin_ia32_selectph_256(
       (__mmask16)__U, (__v16hf)_mm256_cvtepu16_ph(__A), (__v16hf)__W);
 }
 
-static __inline__ __m256h __DEFAULT_FN_ATTRS256
+static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_cvtepu16_ph(__mmask16 __U, __m256i __A) {
   return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U,
                                               (__v16hf)_mm256_cvtepu16_ph(__A),
@@ -1015,18 +1033,18 @@ _mm_maskz_cvtepi32_ph(__mmask8 __U, __m128i __A) {
       (__v4si)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
 }
 
-static __inline__ __m128h __DEFAULT_FN_ATTRS256
+static __inline__ __m128h __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_cvtepi32_ph(__m256i __A) {
   return (__m128h) __builtin_convertvector((__v8si)__A, __v8hf);
 }
 
-static __inline__ __m128h __DEFAULT_FN_ATTRS256
+static __inline__ __m128h __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_cvtepi32_ph(__m128h __W, __mmask8 __U, __m256i __A) {
   return (__m128h)__builtin_ia32_selectph_128(
       (__mmask8)__U, (__v8hf)_mm256_cvtepi32_ph(__A), (__v8hf)__W);
 }
 
-static __inline__ __m128h __DEFAULT_FN_ATTRS256
+static __inline__ __m128h __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_cvtepi32_ph(__mmask8 __U, __m256i __A) {
   return (__m128h)__builtin_ia32_selectph_128(
       (__mmask8)__U, (__v8hf)_mm256_cvtepi32_ph(__A), (__v8hf)_mm_setzero_ph());
@@ -1049,18 +1067,18 @@ _mm_maskz_cvtepu32_ph(__mmask8 __U, __m128i __A) {
       (__v4su)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
 }
 
-static __inline__ __m128h __DEFAULT_FN_ATTRS256
+static __inline__ __m128h __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_cvtepu32_ph(__m256i __A) {
   return (__m128h) __builtin_convertvector((__v8su)__A, __v8hf);
 }
 
-static __inline__ __m128h __DEFAULT_FN_ATTRS256
+static __inline__ __m128h __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_cvtepu32_ph(__m128h __W, __mmask8 __U, __m256i __A) {
   return (__m128h)__builtin_ia32_selectph_128(
       (__mmask8)__U, (__v8hf)_mm256_cvtepu32_ph(__A), (__v8hf)__W);
 }
 
-static __inline__ __m128h __DEFAULT_FN_ATTRS256
+static __inline__ __m128h __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_cvtepu32_ph(__mmask8 __U, __m256i __A) {
   return (__m128h)__builtin_ia32_selectph_128(
       (__mmask8)__U, (__v8hf)_mm256_cvtepu32_ph(__A), (__v8hf)_mm_setzero_ph());
@@ -1419,8 +1437,8 @@ _mm256_maskz_cvtxps_ph(__mmask8 __U, __m256 __A) {
 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmadd_ph(__m128h __A,
                                                              __m128h __B,
                                                              __m128h __C) {
-  return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B,
-                                          (__v8hf)__C);
+  return (__m128h)__builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B,
+                                            (__v8hf)__C);
 }
 
 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ph(__m128h __A,
@@ -1429,7 +1447,7 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ph(__m128h __A,
                                                                   __m128h __C) {
   return (__m128h)__builtin_ia32_selectph_128(
       (__mmask8)__U,
-      __builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
+      __builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
       (__v8hf)__A);
 }
 
@@ -1437,7 +1455,7 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128
 _mm_mask3_fmadd_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
   return (__m128h)__builtin_ia32_selectph_128(
       (__mmask8)__U,
-      __builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
+      __builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
       (__v8hf)__C);
 }
 
@@ -1445,15 +1463,15 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128
 _mm_maskz_fmadd_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
   return (__m128h)__builtin_ia32_selectph_128(
       (__mmask8)__U,
-      __builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
+      __builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
       (__v8hf)_mm_setzero_ph());
 }
 
 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmsub_ph(__m128h __A,
                                                              __m128h __B,
                                                              __m128h __C) {
-  return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B,
-                                          -(__v8hf)__C);
+  return (__m128h)__builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B,
+                                            -(__v8hf)__C);
 }
 
 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ph(__m128h __A,
@@ -1476,7 +1494,7 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128
 _mm_mask3_fnmadd_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
   return (__m128h)__builtin_ia32_selectph_128(
       (__mmask8)__U,
-      __builtin_ia32_vfmaddph(-(__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
+      __builtin_elementwise_fma(-(__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
       (__v8hf)__C);
 }
 
@@ -1484,7 +1502,7 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128
 _mm_maskz_fnmadd_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
   return (__m128h)__builtin_ia32_selectph_128(
       (__mmask8)__U,
-      __builtin_ia32_vfmaddph(-(__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
+      __builtin_elementwise_fma(-(__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
       (__v8hf)_mm_setzero_ph());
 }
 
@@ -1492,22 +1510,22 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128
 _mm_maskz_fnmsub_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
   return (__m128h)__builtin_ia32_selectph_128(
       (__mmask8)__U,
-      __builtin_ia32_vfmaddph(-(__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
+      __builtin_elementwise_fma(-(__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
       (__v8hf)_mm_setzero_ph());
 }
 
 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmadd_ph(__m256h __A,
                                                                 __m256h __B,
                                                                 __m256h __C) {
-  return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B,
-                                             (__v16hf)__C);
+  return (__m256h)__builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B,
+                                            (__v16hf)__C);
 }
 
 static __inline__ __m256h __DEFAULT_FN_ATTRS256
 _mm256_mask_fmadd_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
   return (__m256h)__builtin_ia32_selectph_256(
       (__mmask16)__U,
-      __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
+      __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
       (__v16hf)__A);
 }
 
@@ -1515,7 +1533,7 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256
 _mm256_mask3_fmadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
   return (__m256h)__builtin_ia32_selectph_256(
       (__mmask16)__U,
-      __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
+      __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
       (__v16hf)__C);
 }
 
@@ -1523,22 +1541,22 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256
 _mm256_maskz_fmadd_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
   return (__m256h)__builtin_ia32_selectph_256(
       (__mmask16)__U,
-      __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
+      __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
       (__v16hf)_mm256_setzero_ph());
 }
 
 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmsub_ph(__m256h __A,
                                                                 __m256h __B,
                                                                 __m256h __C) {
-  return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B,
-                                             -(__v16hf)__C);
+  return (__m256h)__builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B,
+                                            -(__v16hf)__C);
 }
 
 static __inline__ __m256h __DEFAULT_FN_ATTRS256
 _mm256_mask_fmsub_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
   return (__m256h)__builtin_ia32_selectph_256(
       (__mmask16)__U,
-      __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
+      __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
       (__v16hf)__A);
 }
 
@@ -1546,7 +1564,7 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256
 _mm256_maskz_fmsub_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
   return (__m256h)__builtin_ia32_selectph_256(
       (__mmask16)__U,
-      __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
+      __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
       (__v16hf)_mm256_setzero_ph());
 }
 
@@ -1554,7 +1572,7 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256
 _mm256_mask3_fnmadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
   return (__m256h)__builtin_ia32_selectph_256(
       (__mmask16)__U,
-      __builtin_ia32_vfmaddph256(-(__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
+      __builtin_elementwise_fma(-(__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
       (__v16hf)__C);
 }
 
@@ -1562,7 +1580,7 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256
 _mm256_maskz_fnmadd_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
   return (__m256h)__builtin_ia32_selectph_256(
       (__mmask16)__U,
-      __builtin_ia32_vfmaddph256(-(__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
+      __builtin_elementwise_fma(-(__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
       (__v16hf)_mm256_setzero_ph());
 }
 
@@ -1570,7 +1588,7 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256
 _mm256_maskz_fnmsub_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
   return (__m256h)__builtin_ia32_selectph_256(
       (__mmask16)__U,
-      __builtin_ia32_vfmaddph256(-(__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
+      __builtin_elementwise_fma(-(__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
       (__v16hf)_mm256_setzero_ph());
 }
 
@@ -1684,7 +1702,7 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128
 _mm_mask3_fmsub_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
   return (__m128h)__builtin_ia32_selectph_128(
       (__mmask8)__U,
-      __builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
+      __builtin_elementwise_fma((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
       (__v8hf)__C);
 }
 
@@ -1692,7 +1710,7 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256
 _mm256_mask3_fmsub_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
   return (__m256h)__builtin_ia32_selectph_256(
       (__mmask16)__U,
-      __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
+      __builtin_elementwise_fma((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
       (__v16hf)__C);
 }
 
@@ -1715,45 +1733,45 @@ _mm256_mask3_fmsubadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fnmadd_ph(__m128h __A,
                                                               __m128h __B,
                                                               __m128h __C) {
-  return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B,
-                                          (__v8hf)__C);
+  return (__m128h)__builtin_elementwise_fma((__v8hf)__A, -(__v8hf)__B,
+                                            (__v8hf)__C);
 }
 
 static __inline__ __m128h __DEFAULT_FN_ATTRS128
 _mm_mask_fnmadd_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
   return (__m128h)__builtin_ia32_selectph_128(
       (__mmask8)__U,
-      __builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, (__v8hf)__C),
+      __builtin_elementwise_fma((__v8hf)__A, -(__v8hf)__B, (__v8hf)__C),
       (__v8hf)__A);
 }
 
 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fnmadd_ph(__m256h __A,
                                                                  __m256h __B,
                                                                  __m256h __C) {
-  return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B,
-                                             (__v16hf)__C);
+  return (__m256h)__builtin_elementwise_fma((__v16hf)__A, -(__v16hf)__B,
+                                            (__v16hf)__C);
 }
 
 static __inline__ __m256h __DEFAULT_FN_ATTRS256
 _mm256_mask_fnmadd_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
   return (__m256h)__builtin_ia32_selectph_256(
       (__mmask16)__U,
-      __builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B, (__v16hf)__C),
+      __builtin_elementwise_fma((__v16hf)__A, -(__v16hf)__B, (__v16hf)__C),
       (__v16hf)__A);
 }
 
 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fnmsub_ph(__m128h __A,
                                                               __m128h __B,
                                                               __m128h __C) {
-  return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B,
-                                          -(__v8hf)__C);
+  return (__m128h)__builtin_elementwise_fma((__v8hf)__A, -(__v8hf)__B,
+                                            -(__v8hf)__C);
 }
 
 static __inline__ __m128h __DEFAULT_FN_ATTRS128
 _mm_mask_fnmsub_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
   return (__m128h)__builtin_ia32_selectph_128(
       (__mmask8)__U,
-      __builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C),
+      __builtin_elementwise_fma((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C),
       (__v8hf)__A);
 }
 
@@ -1761,22 +1779,22 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128
 _mm_mask3_fnmsub_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
   return (__m128h)__builtin_ia32_selectph_128(
       (__mmask8)__U,
-      __builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C),
+      __builtin_elementwise_fma((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C),
       (__v8hf)__C);
 }
 
 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fnmsub_ph(__m256h __A,
                                                                  __m256h __B,
                                                                  __m256h __C) {
-  return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B,
-                                             -(__v16hf)__C);
+  return (__m256h)__builtin_elementwise_fma((__v16hf)__A, -(__v16hf)__B,
+                                            -(__v16hf)__C);
 }
 
 static __inline__ __m256h __DEFAULT_FN_ATTRS256
 _mm256_mask_fnmsub_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
   return (__m256h)__builtin_ia32_selectph_256(
       (__mmask16)__U,
-      __builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B, -(__v16hf)__C),
+      __builtin_elementwise_fma((__v16hf)__A, -(__v16hf)__B, -(__v16hf)__C),
       (__v16hf)__A);
 }
 
@@ -1784,7 +1802,7 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256
 _mm256_mask3_fnmsub_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
   return (__m256h)__builtin_ia32_selectph_256(
       (__mmask16)__U,
-      __builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B, -(__v16hf)__C),
+      __builtin_elementwise_fma((__v16hf)__A, -(__v16hf)__B, -(__v16hf)__C),
       (__v16hf)__C);
 }
 
@@ -1974,37 +1992,36 @@ _mm256_maskz_fmadd_pch(__mmask8 __U, __m256h __A, __m256h __B, __m256h __C) {
                                                     (__v8sf)__C, (__mmask8)__U);
 }
 
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_blend_ph(__mmask8 __U,
-                                                                  __m128h __A,
-                                                                  __m128h __W) {
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_blend_ph(__mmask8 __U, __m128h __A, __m128h __W) {
   return (__m128h)__builtin_ia32_selectph_128((__mmask8)__U, (__v8hf)__W,
                                               (__v8hf)__A);
 }
 
-static __inline__ __m256h __DEFAULT_FN_ATTRS256
+static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_blend_ph(__mmask16 __U, __m256h __A, __m256h __W) {
   return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U, (__v16hf)__W,
                                               (__v16hf)__A);
 }
 
-static __inline__ __m128h __DEFAULT_FN_ATTRS128
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_permutex2var_ph(__m128h __A, __m128i __I, __m128h __B) {
   return (__m128h)__builtin_ia32_vpermi2varhi128((__v8hi)__A, (__v8hi)__I,
                                                  (__v8hi)__B);
 }
 
-static __inline__ __m256h __DEFAULT_FN_ATTRS256
+static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_permutex2var_ph(__m256h __A, __m256i __I, __m256h __B) {
   return (__m256h)__builtin_ia32_vpermi2varhi256((__v16hi)__A, (__v16hi)__I,
                                                  (__v16hi)__B);
 }
 
-static __inline__ __m128h __DEFAULT_FN_ATTRS128
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_permutexvar_ph(__m128i __A, __m128h __B) {
   return (__m128h)__builtin_ia32_permvarhi128((__v8hi)__B, (__v8hi)__A);
 }
 
-static __inline__ __m256h __DEFAULT_FN_ATTRS256
+static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_permutexvar_ph(__m256i __A, __m256h __B) {
   return (__m256h)__builtin_ia32_permvarhi256((__v16hi)__B, (__v16hi)__A);
 }
@@ -2066,6 +2083,8 @@ _mm_reduce_min_ph(__m128h __V) {
 
 #undef __DEFAULT_FN_ATTRS128
 #undef __DEFAULT_FN_ATTRS256
+#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
+#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
 
 #endif
 #endif
diff --git a/lib/include/avx512vlintrin.h b/lib/include/avx512vlintrin.h
index 2a5f7b43f6..ea43046240 100644
--- a/lib/include/avx512vlintrin.h
+++ b/lib/include/avx512vlintrin.h
@@ -15,14 +15,20 @@
 #define __AVX512VLINTRIN_H
 
 #define __DEFAULT_FN_ATTRS128                                                  \
-  __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx512vl,no-evex512"),                            \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"),       \
                  __min_vector_width__(128)))
 #define __DEFAULT_FN_ATTRS256                                                  \
-  __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx512vl,no-evex512"),                            \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"),       \
                  __min_vector_width__(256)))
 
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
+#else
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
+#endif
+
 typedef short __v2hi __attribute__((__vector_size__(4)));
 typedef char __v4qi __attribute__((__vector_size__(4)));
 typedef char __v2qi __attribute__((__vector_size__(2)));
@@ -229,209 +235,183 @@ typedef char __v2qi __attribute__((__vector_size__(2)));
 #define _mm256_mask_cmpneq_epu64_mask(k, A, B) \
     _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                              (__v8si)_mm256_add_epi32(__A, __B),
                                              (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                              (__v8si)_mm256_add_epi32(__A, __B),
                                              (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
                                              (__v4di)_mm256_add_epi64(__A, __B),
                                              (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
                                              (__v4di)_mm256_add_epi64(__A, __B),
                                              (__v4di)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                              (__v8si)_mm256_sub_epi32(__A, __B),
                                              (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                              (__v8si)_mm256_sub_epi32(__A, __B),
                                              (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
                                              (__v4di)_mm256_sub_epi64(__A, __B),
                                              (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
                                              (__v4di)_mm256_sub_epi64(__A, __B),
                                              (__v4di)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
                                              (__v4si)_mm_add_epi32(__A, __B),
                                              (__v4si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
                                              (__v4si)_mm_add_epi32(__A, __B),
                                              (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
                                              (__v2di)_mm_add_epi64(__A, __B),
                                              (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
                                              (__v2di)_mm_add_epi64(__A, __B),
                                              (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
                                              (__v4si)_mm_sub_epi32(__A, __B),
                                              (__v4si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
                                              (__v4si)_mm_sub_epi32(__A, __B),
                                              (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
                                              (__v2di)_mm_sub_epi64(__A, __B),
                                              (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
                                              (__v2di)_mm_sub_epi64(__A, __B),
                                              (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
                                              (__v4di)_mm256_mul_epi32(__X, __Y),
                                              (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
                                              (__v4di)_mm256_mul_epi32(__X, __Y),
                                              (__v4di)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
                                              (__v2di)_mm_mul_epi32(__X, __Y),
                                              (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
                                              (__v2di)_mm_mul_epi32(__X, __Y),
                                              (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
                                              (__v4di)_mm256_mul_epu32(__X, __Y),
                                              (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
                                              (__v4di)_mm256_mul_epu32(__X, __Y),
                                              (__v4di)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
                                              (__v2di)_mm_mul_epu32(__X, __Y),
                                              (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
                                              (__v2di)_mm_mul_epu32(__X, __Y),
                                              (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
                                              (__v8si)_mm256_mullo_epi32(__A, __B),
                                              (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
                                              (__v8si)_mm256_mullo_epi32(__A, __B),
                                              (__v8si)__W);
@@ -453,9 +433,8 @@ _mm_mask_mullo_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
                                              (__v4si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_and_epi32(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_and_epi32(__m256i __a, __m256i __b) {
   return (__m256i)((__v8su)__a & (__v8su)__b);
 }
 
@@ -473,9 +452,8 @@ _mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B)
   return (__m256i)_mm256_mask_and_epi32(_mm256_setzero_si256(), __U, __A, __B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_and_epi32(__m128i __a, __m128i __b)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_and_epi32(__m128i __a, __m128i __b) {
   return (__m128i)((__v4su)__a & (__v4su)__b);
 }
 
@@ -896,329 +874,312 @@ _mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B)
                                           (__v2df)(__m128d)(b), (int)(p), \
                                           (__mmask8)(m)))
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
-{
-  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
-                    __builtin_ia32_vfmaddpd ((__v2df) __A,
-                                             (__v2df) __B,
-                                             (__v2df) __C),
-                    (__v2df) __A);
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
+  return (__m128d)__builtin_ia32_selectpd_128(
+      (__mmask8)__U, (__v2df)_mm_fmadd_pd(__A, __B, __C), (__v2df)__A);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
+  return (__m128d)__builtin_ia32_selectpd_128(
+      (__mmask8)__U, (__v2df)_mm_fmadd_pd(__A, __B, __C), (__v2df)__C);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) {
+  return (__m128d)__builtin_ia32_selectpd_128(
+      (__mmask8)__U, (__v2df)_mm_fmadd_pd(__A, __B, __C),
+      (__v2df)_mm_setzero_pd());
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
+  return (__m128d)__builtin_ia32_selectpd_128(
+      (__mmask8)__U, (__v2df)_mm_fmsub_pd(__A, __B, __C), (__v2df)__A);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
+  return (__m128d)__builtin_ia32_selectpd_128(
+      (__mmask8)__U, (__v2df)_mm_fmsub_pd(__A, __B, __C), (__v2df)__C);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) {
+  return (__m128d)__builtin_ia32_selectpd_128(
+      (__mmask8)__U, (__v2df)_mm_fmsub_pd(__A, __B, __C),
+      (__v2df)_mm_setzero_pd());
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
+  return (__m128d)__builtin_ia32_selectpd_128(
+      (__mmask8)__U, (__v2df)_mm_fnmadd_pd(__A, __B, __C), (__v2df)__A);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
+  return (__m128d)__builtin_ia32_selectpd_128(
+      (__mmask8)__U, (__v2df)_mm_fnmadd_pd(__A, __B, __C), (__v2df)__C);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) {
+  return (__m128d)__builtin_ia32_selectpd_128(
+      (__mmask8)__U, (__v2df)_mm_fnmadd_pd(__A, __B, __C),
+      (__v2df)_mm_setzero_pd());
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
+  return (__m128d)__builtin_ia32_selectpd_128(
+      (__mmask8)__U, (__v2df)_mm_fnmsub_pd(__A, __B, __C), (__v2df)__A);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
+  return (__m128d)__builtin_ia32_selectpd_128(
+      (__mmask8)__U, (__v2df)_mm_fnmsub_pd(__A, __B, __C), (__v2df)__C);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) {
+  return (__m128d)__builtin_ia32_selectpd_128(
+      (__mmask8)__U, (__v2df)_mm_fnmsub_pd(__A, __B, __C),
+      (__v2df)_mm_setzero_pd());
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
+  return (__m256d)__builtin_ia32_selectpd_256(
+      (__mmask8)__U, (__v4df)_mm256_fmadd_pd(__A, __B, __C), (__v4df)__A);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
+  return (__m256d)__builtin_ia32_selectpd_256(
+      (__mmask8)__U, (__v4df)_mm256_fmadd_pd(__A, __B, __C), (__v4df)__C);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) {
+  return (__m256d)__builtin_ia32_selectpd_256(
+      (__mmask8)__U, (__v4df)_mm256_fmadd_pd(__A, __B, __C),
+      (__v4df)_mm256_setzero_pd());
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
+  return (__m256d)__builtin_ia32_selectpd_256(
+      (__mmask8)__U, (__v4df)_mm256_fmsub_pd(__A, __B, __C), (__v4df)__A);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
+  return (__m256d)__builtin_ia32_selectpd_256(
+      (__mmask8)__U, (__v4df)_mm256_fmsub_pd(__A, __B, __C), (__v4df)__C);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) {
+  return (__m256d)__builtin_ia32_selectpd_256(
+      (__mmask8)__U, (__v4df)_mm256_fmsub_pd(__A, __B, __C),
+      (__v4df)_mm256_setzero_pd());
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
+  return (__m256d)__builtin_ia32_selectpd_256(
+      (__mmask8)__U, (__v4df)_mm256_fnmadd_pd(__A, __B, __C), (__v4df)__A);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
+  return (__m256d)__builtin_ia32_selectpd_256(
+      (__mmask8)__U, (__v4df)_mm256_fnmadd_pd(__A, __B, __C), (__v4df)__C);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) {
+  return (__m256d)__builtin_ia32_selectpd_256(
+      (__mmask8)__U, (__v4df)_mm256_fnmadd_pd(__A, __B, __C),
+      (__v4df)_mm256_setzero_pd());
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
+  return (__m256d)__builtin_ia32_selectpd_256(
+      (__mmask8)__U, (__v4df)_mm256_fnmsub_pd(__A, __B, __C), (__v4df)__A);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
+  return (__m256d)__builtin_ia32_selectpd_256(
+      (__mmask8)__U, (__v4df)_mm256_fnmsub_pd(__A, __B, __C), (__v4df)__C);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) {
+  return (__m256d)__builtin_ia32_selectpd_256(
+      (__mmask8)__U, (__v4df)_mm256_fnmsub_pd(__A, __B, __C),
+      (__v4df)_mm256_setzero_pd());
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) {
+  return (__m128)__builtin_ia32_selectps_128(
+      (__mmask8)__U, (__v4sf)_mm_fmadd_ps(__A, __B, __C), (__v4sf)__A);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) {
+  return (__m128)__builtin_ia32_selectps_128(
+      (__mmask8)__U, (__v4sf)_mm_fmadd_ps(__A, __B, __C), (__v4sf)__C);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) {
+  return (__m128)__builtin_ia32_selectps_128(
+      (__mmask8)__U, (__v4sf)_mm_fmadd_ps(__A, __B, __C),
+      (__v4sf)_mm_setzero_ps());
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) {
+  return (__m128)__builtin_ia32_selectps_128(
+      (__mmask8)__U, (__v4sf)_mm_fmsub_ps(__A, __B, __C), (__v4sf)__A);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) {
+  return (__m128)__builtin_ia32_selectps_128(
+      (__mmask8)__U, (__v4sf)_mm_fmsub_ps(__A, __B, __C), (__v4sf)__C);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) {
+  return (__m128)__builtin_ia32_selectps_128(
+      (__mmask8)__U, (__v4sf)_mm_fmsub_ps(__A, __B, __C),
+      (__v4sf)_mm_setzero_ps());
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) {
+  return (__m128)__builtin_ia32_selectps_128(
+      (__mmask8)__U, (__v4sf)_mm_fnmadd_ps(__A, __B, __C), (__v4sf)__A);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) {
+  return (__m128)__builtin_ia32_selectps_128(
+      (__mmask8)__U, (__v4sf)_mm_fnmadd_ps(__A, __B, __C), (__v4sf)__C);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) {
+  return (__m128)__builtin_ia32_selectps_128(
+      (__mmask8)__U, (__v4sf)_mm_fnmadd_ps(__A, __B, __C),
+      (__v4sf)_mm_setzero_ps());
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) {
+  return (__m128)__builtin_ia32_selectps_128(
+      (__mmask8)__U, (__v4sf)_mm_fnmsub_ps(__A, __B, __C), (__v4sf)__A);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) {
+  return (__m128)__builtin_ia32_selectps_128(
+      (__mmask8)__U, (__v4sf)_mm_fnmsub_ps(__A, __B, __C), (__v4sf)__C);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) {
+  return (__m128)__builtin_ia32_selectps_128(
+      (__mmask8)__U, (__v4sf)_mm_fnmsub_ps(__A, __B, __C),
+      (__v4sf)_mm_setzero_ps());
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
+  return (__m256)__builtin_ia32_selectps_256(
+      (__mmask8)__U, (__v8sf)_mm256_fmadd_ps(__A, __B, __C), (__v8sf)__A);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
+  return (__m256)__builtin_ia32_selectps_256(
+      (__mmask8)__U, (__v8sf)_mm256_fmadd_ps(__A, __B, __C), (__v8sf)__C);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) {
+  return (__m256)__builtin_ia32_selectps_256(
+      (__mmask8)__U, (__v8sf)_mm256_fmadd_ps(__A, __B, __C),
+      (__v8sf)_mm256_setzero_ps());
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
+  return (__m256)__builtin_ia32_selectps_256(
+      (__mmask8)__U, (__v8sf)_mm256_fmsub_ps(__A, __B, __C), (__v8sf)__A);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
+  return (__m256)__builtin_ia32_selectps_256(
+      (__mmask8)__U, (__v8sf)_mm256_fmsub_ps(__A, __B, __C), (__v8sf)__C);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) {
+  return (__m256)__builtin_ia32_selectps_256(
+      (__mmask8)__U, (__v8sf)_mm256_fmsub_ps(__A, __B, __C),
+      (__v8sf)_mm256_setzero_ps());
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
+  return (__m256)__builtin_ia32_selectps_256(
+      (__mmask8)__U, (__v8sf)_mm256_fnmadd_ps(__A, __B, __C), (__v8sf)__A);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
+  return (__m256)__builtin_ia32_selectps_256(
+      (__mmask8)__U, (__v8sf)_mm256_fnmadd_ps(__A, __B, __C), (__v8sf)__C);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) {
+  return (__m256)__builtin_ia32_selectps_256(
+      (__mmask8)__U, (__v8sf)_mm256_fnmadd_ps(__A, __B, __C),
+      (__v8sf)_mm256_setzero_ps());
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
+  return (__m256)__builtin_ia32_selectps_256(
+      (__mmask8)__U, (__v8sf)_mm256_fnmsub_ps(__A, __B, __C), (__v8sf)__A);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
+  return (__m256)__builtin_ia32_selectps_256(
+      (__mmask8)__U, (__v8sf)_mm256_fnmsub_ps(__A, __B, __C), (__v8sf)__C);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) {
+  return (__m256)__builtin_ia32_selectps_256(
+      (__mmask8)__U, (__v8sf)_mm256_fnmsub_ps(__A, __B, __C),
+      (__v8sf)_mm256_setzero_ps());
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
-{
-  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
-                    __builtin_ia32_vfmaddpd ((__v2df) __A,
-                                             (__v2df) __B,
-                                             (__v2df) __C),
-                    (__v2df) __C);
-}
-
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
-{
-  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
-                    __builtin_ia32_vfmaddpd ((__v2df) __A,
-                                             (__v2df) __B,
-                                             (__v2df) __C),
-                    (__v2df)_mm_setzero_pd());
-}
-
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
-{
-  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
-                    __builtin_ia32_vfmaddpd ((__v2df) __A,
-                                             (__v2df) __B,
-                                             -(__v2df) __C),
-                    (__v2df) __A);
-}
-
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
-{
-  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
-                    __builtin_ia32_vfmaddpd ((__v2df) __A,
-                                             (__v2df) __B,
-                                             -(__v2df) __C),
-                    (__v2df)_mm_setzero_pd());
-}
-
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
-{
-  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
-                    __builtin_ia32_vfmaddpd (-(__v2df) __A,
-                                             (__v2df) __B,
-                                             (__v2df) __C),
-                    (__v2df) __C);
-}
-
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
-{
-  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
-                    __builtin_ia32_vfmaddpd (-(__v2df) __A,
-                                             (__v2df) __B,
-                                             (__v2df) __C),
-                    (__v2df)_mm_setzero_pd());
-}
-
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
-{
-  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
-                    __builtin_ia32_vfmaddpd (-(__v2df) __A,
-                                             (__v2df) __B,
-                                             -(__v2df) __C),
-                    (__v2df)_mm_setzero_pd());
-}
-
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
-{
-  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
-                    __builtin_ia32_vfmaddpd256 ((__v4df) __A,
-                                                (__v4df) __B,
-                                                (__v4df) __C),
-                    (__v4df) __A);
-}
-
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
-{
-  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
-                    __builtin_ia32_vfmaddpd256 ((__v4df) __A,
-                                                (__v4df) __B,
-                                                (__v4df) __C),
-                    (__v4df) __C);
-}
-
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
-{
-  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
-                    __builtin_ia32_vfmaddpd256 ((__v4df) __A,
-                                                (__v4df) __B,
-                                                (__v4df) __C),
-                    (__v4df)_mm256_setzero_pd());
-}
-
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
-{
-  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
-                    __builtin_ia32_vfmaddpd256 ((__v4df) __A,
-                                                (__v4df) __B,
-                                                -(__v4df) __C),
-                    (__v4df) __A);
-}
-
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
-{
-  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
-                    __builtin_ia32_vfmaddpd256 ((__v4df) __A,
-                                                (__v4df) __B,
-                                                -(__v4df) __C),
-                    (__v4df)_mm256_setzero_pd());
-}
-
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
-{
-  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
-                    __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
-                                                (__v4df) __B,
-                                                (__v4df) __C),
-                    (__v4df) __C);
-}
-
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
-{
-  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
-                    __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
-                                                (__v4df) __B,
-                                                (__v4df) __C),
-                    (__v4df)_mm256_setzero_pd());
-}
-
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
-{
-  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
-                    __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
-                                                (__v4df) __B,
-                                                -(__v4df) __C),
-                    (__v4df)_mm256_setzero_pd());
-}
-
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
-{
-  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
-                    __builtin_ia32_vfmaddps ((__v4sf) __A,
-                                             (__v4sf) __B,
-                                             (__v4sf) __C),
-                    (__v4sf) __A);
-}
-
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
-{
-  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
-                    __builtin_ia32_vfmaddps ((__v4sf) __A,
-                                             (__v4sf) __B,
-                                             (__v4sf) __C),
-                    (__v4sf) __C);
-}
-
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
-{
-  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
-                    __builtin_ia32_vfmaddps ((__v4sf) __A,
-                                             (__v4sf) __B,
-                                             (__v4sf) __C),
-                    (__v4sf)_mm_setzero_ps());
-}
-
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
-{
-  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
-                    __builtin_ia32_vfmaddps ((__v4sf) __A,
-                                             (__v4sf) __B,
-                                             -(__v4sf) __C),
-                    (__v4sf) __A);
-}
-
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
-{
-  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
-                    __builtin_ia32_vfmaddps ((__v4sf) __A,
-                                             (__v4sf) __B,
-                                             -(__v4sf) __C),
-                    (__v4sf)_mm_setzero_ps());
-}
-
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
-{
-  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
-                    __builtin_ia32_vfmaddps (-(__v4sf) __A,
-                                             (__v4sf) __B,
-                                             (__v4sf) __C),
-                    (__v4sf) __C);
-}
-
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
-{
-  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
-                    __builtin_ia32_vfmaddps (-(__v4sf) __A,
-                                             (__v4sf) __B,
-                                             (__v4sf) __C),
-                    (__v4sf)_mm_setzero_ps());
-}
-
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
-{
-  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
-                    __builtin_ia32_vfmaddps (-(__v4sf) __A,
-                                             (__v4sf) __B,
-                                             -(__v4sf) __C),
-                    (__v4sf)_mm_setzero_ps());
-}
-
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
-{
-  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
-                    __builtin_ia32_vfmaddps256 ((__v8sf) __A,
-                                                (__v8sf) __B,
-                                                (__v8sf) __C),
-                    (__v8sf) __A);
-}
-
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
-{
-  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
-                    __builtin_ia32_vfmaddps256 ((__v8sf) __A,
-                                                (__v8sf) __B,
-                                                (__v8sf) __C),
-                    (__v8sf) __C);
-}
-
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
-{
-  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
-                    __builtin_ia32_vfmaddps256 ((__v8sf) __A,
-                                                (__v8sf) __B,
-                                                (__v8sf) __C),
-                    (__v8sf)_mm256_setzero_ps());
-}
-
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
-{
-  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
-                    __builtin_ia32_vfmaddps256 ((__v8sf) __A,
-                                                (__v8sf) __B,
-                                                -(__v8sf) __C),
-                    (__v8sf) __A);
-}
-
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
-{
-  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
-                    __builtin_ia32_vfmaddps256 ((__v8sf) __A,
-                                                (__v8sf) __B,
-                                                -(__v8sf) __C),
-                    (__v8sf)_mm256_setzero_ps());
-}
-
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
-{
-  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
-                    __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
-                                                (__v8sf) __B,
-                                                (__v8sf) __C),
-                    (__v8sf) __C);
-}
-
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
-{
-  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
-                    __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
-                                                (__v8sf) __B,
-                                                (__v8sf) __C),
-                    (__v8sf)_mm256_setzero_ps());
-}
-
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
-{
-  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
-                    __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
-                                                (__v8sf) __B,
-                                                -(__v8sf) __C),
-                    (__v8sf)_mm256_setzero_ps());
-}
-
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
-{
+_mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
                     __builtin_ia32_vfmaddsubpd ((__v2df) __A,
                                                 (__v2df) __B,
@@ -1417,46 +1378,6 @@ _mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
                     (__v8sf)_mm256_setzero_ps());
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
-{
-  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
-                    __builtin_ia32_vfmaddpd ((__v2df) __A,
-                                             (__v2df) __B,
-                                             -(__v2df) __C),
-                    (__v2df) __C);
-}
-
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
-{
-  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
-                    __builtin_ia32_vfmaddpd256 ((__v4df) __A,
-                                                (__v4df) __B,
-                                                -(__v4df) __C),
-                    (__v4df) __C);
-}
-
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
-{
-  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
-                    __builtin_ia32_vfmaddps ((__v4sf) __A,
-                                             (__v4sf) __B,
-                                             -(__v4sf) __C),
-                    (__v4sf) __C);
-}
-
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
-{
-  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
-                    __builtin_ia32_vfmaddps256 ((__v8sf) __A,
-                                                (__v8sf) __B,
-                                                -(__v8sf) __C),
-                    (__v8sf) __C);
-}
-
 static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
 {
@@ -1497,233 +1418,113 @@ _mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
                     (__v8sf) __C);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
-{
-  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
-                    __builtin_ia32_vfmaddpd ((__v2df) __A,
-                                             -(__v2df) __B,
-                                             (__v2df) __C),
-                    (__v2df) __A);
-}
-
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
-{
-  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
-                    __builtin_ia32_vfmaddpd256 ((__v4df) __A,
-                                                -(__v4df) __B,
-                                                (__v4df) __C),
-                    (__v4df) __A);
-}
-
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
-{
-  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
-                    __builtin_ia32_vfmaddps ((__v4sf) __A,
-                                             -(__v4sf) __B,
-                                             (__v4sf) __C),
-                    (__v4sf) __A);
-}
-
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
-{
-  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
-                    __builtin_ia32_vfmaddps256 ((__v8sf) __A,
-                                                -(__v8sf) __B,
-                                                (__v8sf) __C),
-                    (__v8sf) __A);
-}
-
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
-{
-  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
-                    __builtin_ia32_vfmaddpd ((__v2df) __A,
-                                             -(__v2df) __B,
-                                             -(__v2df) __C),
-                    (__v2df) __A);
-}
-
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
-{
-  return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
-                    __builtin_ia32_vfmaddpd ((__v2df) __A,
-                                             -(__v2df) __B,
-                                             -(__v2df) __C),
-                    (__v2df) __C);
-}
-
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
-{
-  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
-                    __builtin_ia32_vfmaddpd256 ((__v4df) __A,
-                                                -(__v4df) __B,
-                                                -(__v4df) __C),
-                    (__v4df) __A);
-}
-
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
-{
-  return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
-                    __builtin_ia32_vfmaddpd256 ((__v4df) __A,
-                                                -(__v4df) __B,
-                                                -(__v4df) __C),
-                    (__v4df) __C);
-}
-
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
-{
-  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
-                    __builtin_ia32_vfmaddps ((__v4sf) __A,
-                                             -(__v4sf) __B,
-                                             -(__v4sf) __C),
-                    (__v4sf) __A);
-}
-
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
-{
-  return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
-                    __builtin_ia32_vfmaddps ((__v4sf) __A,
-                                             -(__v4sf) __B,
-                                             -(__v4sf) __C),
-                    (__v4sf) __C);
-}
-
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
-{
-  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
-                    __builtin_ia32_vfmaddps256 ((__v8sf) __A,
-                                                -(__v8sf) __B,
-                                                -(__v8sf) __C),
-                    (__v8sf) __A);
-}
-
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
-{
-  return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
-                    __builtin_ia32_vfmaddps256 ((__v8sf) __A,
-                                                -(__v8sf) __B,
-                                                -(__v8sf) __C),
-                    (__v8sf) __C);
-}
-
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_add_pd(__A, __B),
                                               (__v2df)__W);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_add_pd(__A, __B),
                                               (__v2df)_mm_setzero_pd());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                               (__v4df)_mm256_add_pd(__A, __B),
                                               (__v4df)__W);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                               (__v4df)_mm256_add_pd(__A, __B),
                                               (__v4df)_mm256_setzero_pd());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_add_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_add_ps(__A, __B),
                                              (__v4sf)__W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_add_ps(__mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_add_ps(__A, __B),
                                              (__v4sf)_mm_setzero_ps());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_add_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                              (__v8sf)_mm256_add_ps(__A, __B),
                                              (__v8sf)__W);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                              (__v8sf)_mm256_add_ps(__A, __B),
                                              (__v8sf)_mm256_setzero_ps());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_blend_epi32(__mmask8 __U, __m128i __A, __m128i __W) {
   return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
                 (__v4si) __W,
                 (__v4si) __A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_blend_epi32(__mmask8 __U, __m256i __A, __m256i __W) {
   return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
                 (__v8si) __W,
                 (__v8si) __A);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_blend_pd(__mmask8 __U, __m128d __A, __m128d __W) {
   return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
                  (__v2df) __W,
                  (__v2df) __A);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) {
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_blend_pd(__mmask8 __U, __m256d __A, __m256d __W) {
   return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
                  (__v4df) __W,
                  (__v4df) __A);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_blend_ps(__mmask8 __U, __m128 __A, __m128 __W) {
   return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
                 (__v4sf) __W,
                 (__v4sf) __A);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) {
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_blend_ps(__mmask8 __U, __m256 __A, __m256 __W) {
   return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
                 (__v8sf) __W,
                 (__v8sf) __A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_blend_epi64(__mmask8 __U, __m128i __A, __m128i __W) {
   return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
                 (__v2di) __W,
                 (__v2di) __A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_blend_epi64(__mmask8 __U, __m256i __A, __m256i __W) {
   return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
                 (__v4di) __W,
                 (__v4di) __A);
@@ -1905,57 +1706,57 @@ _mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A) {
             (__mmask8) __U);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_cvtepi32_pd(__m128d __W, __mmask8 __U, __m128i __A) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
                                               (__v2df)_mm_cvtepi32_pd(__A),
                                               (__v2df)__W);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_cvtepi32_pd(__mmask8 __U, __m128i __A) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
                                               (__v2df)_mm_cvtepi32_pd(__A),
                                               (__v2df)_mm_setzero_pd());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A) {
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_cvtepi32_pd(__m256d __W, __mmask8 __U, __m128i __A) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
                                               (__v4df)_mm256_cvtepi32_pd(__A),
                                               (__v4df)__W);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) {
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_cvtepi32_pd(__mmask8 __U, __m128i __A) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
                                               (__v4df)_mm256_cvtepi32_pd(__A),
                                               (__v4df)_mm256_setzero_pd());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_cvtepi32_ps(__m128 __W, __mmask8 __U, __m128i __A) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_cvtepi32_ps(__A),
                                              (__v4sf)__W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_maskz_cvtepi32_ps (__mmask8 __U, __m128i __A) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_cvtepi32_ps(__mmask8 __U, __m128i __A) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_cvtepi32_ps(__A),
                                              (__v4sf)_mm_setzero_ps());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_cvtepi32_ps(__m256 __W, __mmask8 __U, __m256i __A) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                              (__v8sf)_mm256_cvtepi32_ps(__A),
                                              (__v8sf)__W);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_maskz_cvtepi32_ps (__mmask8 __U, __m256i __A) {
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_cvtepi32_ps(__mmask8 __U, __m256i __A) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                              (__v8sf)_mm256_cvtepi32_ps(__A),
                                              (__v8sf)_mm256_setzero_ps());
@@ -1990,30 +1791,30 @@ _mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A) {
                                              (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_cvtpd_ps(__m128 __W, __mmask8 __U, __m128d __A) {
   return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
             (__v4sf) __W,
             (__mmask8) __U);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_cvtpd_ps(__mmask8 __U, __m128d __A) {
   return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
             (__v4sf)
             _mm_setzero_ps (),
             (__mmask8) __U);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS256
-_mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_cvtpd_ps(__m128 __W, __mmask8 __U, __m256d __A) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm256_cvtpd_ps(__A),
                                              (__v4sf)__W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS256
-_mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_cvtpd_ps(__mmask8 __U, __m256d __A) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm256_cvtpd_ps(__A),
                                              (__v4sf)_mm_setzero_ps());
@@ -2316,133 +2117,133 @@ _mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A) {
                   (__mmask8) __U);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_cvtepu32_pd (__m128i __A) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_cvtepu32_pd(__m128i __A) {
   return (__m128d) __builtin_convertvector(
       __builtin_shufflevector((__v4su)__A, (__v4su)__A, 0, 1), __v2df);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_cvtepu32_pd(__m128d __W, __mmask8 __U, __m128i __A) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
                                               (__v2df)_mm_cvtepu32_pd(__A),
                                               (__v2df)__W);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_cvtepu32_pd(__mmask8 __U, __m128i __A) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
                                               (__v2df)_mm_cvtepu32_pd(__A),
                                               (__v2df)_mm_setzero_pd());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_cvtepu32_pd (__m128i __A) {
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_cvtepu32_pd(__m128i __A) {
   return (__m256d)__builtin_convertvector((__v4su)__A, __v4df);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A) {
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_cvtepu32_pd(__m256d __W, __mmask8 __U, __m128i __A) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
                                               (__v4df)_mm256_cvtepu32_pd(__A),
                                               (__v4df)__W);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) {
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_cvtepu32_pd(__mmask8 __U, __m128i __A) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
                                               (__v4df)_mm256_cvtepu32_pd(__A),
                                               (__v4df)_mm256_setzero_pd());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_cvtepu32_ps (__m128i __A) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_cvtepu32_ps(__m128i __A) {
   return (__m128)__builtin_convertvector((__v4su)__A, __v4sf);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_cvtepu32_ps(__m128 __W, __mmask8 __U, __m128i __A) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_cvtepu32_ps(__A),
                                              (__v4sf)__W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_cvtepu32_ps(__mmask8 __U, __m128i __A) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_cvtepu32_ps(__A),
                                              (__v4sf)_mm_setzero_ps());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_cvtepu32_ps (__m256i __A) {
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_cvtepu32_ps(__m256i __A) {
   return (__m256)__builtin_convertvector((__v8su)__A, __v8sf);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_cvtepu32_ps(__m256 __W, __mmask8 __U, __m256i __A) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                              (__v8sf)_mm256_cvtepu32_ps(__A),
                                              (__v8sf)__W);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A) {
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_cvtepu32_ps(__mmask8 __U, __m256i __A) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                              (__v8sf)_mm256_cvtepu32_ps(__A),
                                              (__v8sf)_mm256_setzero_ps());
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_div_pd(__A, __B),
                                               (__v2df)__W);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_div_pd(__A, __B),
                                               (__v2df)_mm_setzero_pd());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                               (__v4df)_mm256_div_pd(__A, __B),
                                               (__v4df)__W);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                               (__v4df)_mm256_div_pd(__A, __B),
                                               (__v4df)_mm256_setzero_pd());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_div_ps(__A, __B),
                                              (__v4sf)__W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_div_ps(__A, __B),
                                              (__v4sf)_mm_setzero_ps());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                              (__v8sf)_mm256_div_ps(__A, __B),
                                              (__v8sf)__W);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                              (__v8sf)_mm256_div_ps(__A, __B),
@@ -2796,499 +2597,499 @@ _mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A) {
                (__mmask8) __U);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_max_pd(__A, __B),
                                               (__v2df)__W);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_max_pd(__A, __B),
                                               (__v2df)_mm_setzero_pd());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                               (__v4df)_mm256_max_pd(__A, __B),
                                               (__v4df)__W);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                               (__v4df)_mm256_max_pd(__A, __B),
                                               (__v4df)_mm256_setzero_pd());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_max_ps(__A, __B),
                                              (__v4sf)__W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_max_ps(__A, __B),
                                              (__v4sf)_mm_setzero_ps());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                              (__v8sf)_mm256_max_ps(__A, __B),
                                              (__v8sf)__W);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                              (__v8sf)_mm256_max_ps(__A, __B),
                                              (__v8sf)_mm256_setzero_ps());
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_min_pd(__A, __B),
                                               (__v2df)__W);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_min_pd(__A, __B),
                                               (__v2df)_mm_setzero_pd());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                               (__v4df)_mm256_min_pd(__A, __B),
                                               (__v4df)__W);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                               (__v4df)_mm256_min_pd(__A, __B),
                                               (__v4df)_mm256_setzero_pd());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_min_ps(__A, __B),
                                              (__v4sf)__W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_min_ps(__A, __B),
                                              (__v4sf)_mm_setzero_ps());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                              (__v8sf)_mm256_min_ps(__A, __B),
                                              (__v8sf)__W);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                              (__v8sf)_mm256_min_ps(__A, __B),
                                              (__v8sf)_mm256_setzero_ps());
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_mul_pd(__A, __B),
                                               (__v2df)__W);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_mul_pd(__A, __B),
                                               (__v2df)_mm_setzero_pd());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                               (__v4df)_mm256_mul_pd(__A, __B),
                                               (__v4df)__W);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                               (__v4df)_mm256_mul_pd(__A, __B),
                                               (__v4df)_mm256_setzero_pd());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_mul_ps(__A, __B),
                                              (__v4sf)__W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_mul_ps(__A, __B),
                                              (__v4sf)_mm_setzero_ps());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                              (__v8sf)_mm256_mul_ps(__A, __B),
                                              (__v8sf)__W);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                              (__v8sf)_mm256_mul_ps(__A, __B),
                                              (__v8sf)_mm256_setzero_ps());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
                                              (__v4si)_mm_abs_epi32(__A),
                                              (__v4si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_abs_epi32(__mmask8 __U, __m128i __A) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
                                              (__v4si)_mm_abs_epi32(__A),
                                              (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                              (__v8si)_mm256_abs_epi32(__A),
                                              (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                              (__v8si)_mm256_abs_epi32(__A),
                                              (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_abs_epi64 (__m128i __A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_abs_epi64(__m128i __A) {
   return (__m128i)__builtin_elementwise_abs((__v2di)__A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_abs_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
                                              (__v2di)_mm_abs_epi64(__A),
                                              (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_abs_epi64(__mmask8 __U, __m128i __A) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
                                              (__v2di)_mm_abs_epi64(__A),
                                              (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_abs_epi64 (__m256i __A) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_abs_epi64(__m256i __A) {
   return (__m256i)__builtin_elementwise_abs((__v4di)__A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_abs_epi64(__m256i __W, __mmask8 __U, __m256i __A) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
                                              (__v4di)_mm256_abs_epi64(__A),
                                              (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_abs_epi64(__mmask8 __U, __m256i __A) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
                                              (__v4di)_mm256_abs_epi64(__A),
                                              (__v4di)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
                                              (__v4si)_mm_max_epi32(__A, __B),
                                              (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
                                              (__v4si)_mm_max_epi32(__A, __B),
                                              (__v4si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
                                              (__v8si)_mm256_max_epi32(__A, __B),
                                              (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
                                              (__v8si)_mm256_max_epi32(__A, __B),
                                              (__v8si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_max_epi64 (__m128i __A, __m128i __B) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_max_epi64(__m128i __A, __m128i __B) {
   return (__m128i)__builtin_elementwise_max((__v2di)__A, (__v2di)__B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_max_epi64(__mmask8 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
                                              (__v2di)_mm_max_epi64(__A, __B),
                                              (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_max_epi64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
                                              (__v2di)_mm_max_epi64(__A, __B),
                                              (__v2di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_max_epi64 (__m256i __A, __m256i __B) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_max_epi64(__m256i __A, __m256i __B) {
   return (__m256i)__builtin_elementwise_max((__v4di)__A, (__v4di)__B);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_max_epi64(__mmask8 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
                                              (__v4di)_mm256_max_epi64(__A, __B),
                                              (__v4di)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_max_epi64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
                                              (__v4di)_mm256_max_epi64(__A, __B),
                                              (__v4di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
                                              (__v4si)_mm_max_epu32(__A, __B),
                                              (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
                                              (__v4si)_mm_max_epu32(__A, __B),
                                              (__v4si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
                                              (__v8si)_mm256_max_epu32(__A, __B),
                                              (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
                                              (__v8si)_mm256_max_epu32(__A, __B),
                                              (__v8si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_max_epu64 (__m128i __A, __m128i __B) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_max_epu64(__m128i __A, __m128i __B) {
   return (__m128i)__builtin_elementwise_max((__v2du)__A, (__v2du)__B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_max_epu64(__mmask8 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
                                              (__v2di)_mm_max_epu64(__A, __B),
                                              (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_max_epu64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
                                              (__v2di)_mm_max_epu64(__A, __B),
                                              (__v2di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_max_epu64 (__m256i __A, __m256i __B) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_max_epu64(__m256i __A, __m256i __B) {
   return (__m256i)__builtin_elementwise_max((__v4du)__A, (__v4du)__B);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_max_epu64(__mmask8 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
                                              (__v4di)_mm256_max_epu64(__A, __B),
                                              (__v4di)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_max_epu64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
                                              (__v4di)_mm256_max_epu64(__A, __B),
                                              (__v4di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
                                              (__v4si)_mm_min_epi32(__A, __B),
                                              (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
                                              (__v4si)_mm_min_epi32(__A, __B),
                                              (__v4si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
                                              (__v8si)_mm256_min_epi32(__A, __B),
                                              (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
                                              (__v8si)_mm256_min_epi32(__A, __B),
                                              (__v8si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_min_epi64 (__m128i __A, __m128i __B) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_min_epi64(__m128i __A, __m128i __B) {
   return (__m128i)__builtin_elementwise_min((__v2di)__A, (__v2di)__B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_min_epi64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
                                              (__v2di)_mm_min_epi64(__A, __B),
                                              (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_min_epi64(__mmask8 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
                                              (__v2di)_mm_min_epi64(__A, __B),
                                              (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_min_epi64 (__m256i __A, __m256i __B) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_min_epi64(__m256i __A, __m256i __B) {
   return (__m256i)__builtin_elementwise_min((__v4di)__A, (__v4di)__B);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_min_epi64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
                                              (__v4di)_mm256_min_epi64(__A, __B),
                                              (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_min_epi64(__mmask8 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
                                              (__v4di)_mm256_min_epi64(__A, __B),
                                              (__v4di)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
                                              (__v4si)_mm_min_epu32(__A, __B),
                                              (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
                                              (__v4si)_mm_min_epu32(__A, __B),
                                              (__v4si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
                                              (__v8si)_mm256_min_epu32(__A, __B),
                                              (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
                                              (__v8si)_mm256_min_epu32(__A, __B),
                                              (__v8si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_min_epu64 (__m128i __A, __m128i __B) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_min_epu64(__m128i __A, __m128i __B) {
   return (__m128i)__builtin_elementwise_min((__v2du)__A, (__v2du)__B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_min_epu64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
                                              (__v2di)_mm_min_epu64(__A, __B),
                                              (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_min_epu64(__mmask8 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
                                              (__v2di)_mm_min_epu64(__A, __B),
                                              (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_min_epu64 (__m256i __A, __m256i __B) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_min_epu64(__m256i __A, __m256i __B) {
   return (__m256i)__builtin_elementwise_min((__v4du)__A, (__v4du)__B);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_min_epu64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
                                              (__v4di)_mm256_min_epu64(__A, __B),
                                              (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_min_epu64(__mmask8 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
                                              (__v4di)_mm256_min_epu64(__A, __B),
                                              (__v4di)_mm256_setzero_si256());
@@ -3691,69 +3492,69 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
                                                (__v8sf)_mm256_setzero_ps());
   }
 
-  static __inline__ __m128d __DEFAULT_FN_ATTRS128
+  static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
   _mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
     return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                                 (__v2df)_mm_sub_pd(__A, __B),
                                                 (__v2df)__W);
   }
 
-  static __inline__ __m128d __DEFAULT_FN_ATTRS128
+  static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
   _mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B) {
     return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                                 (__v2df)_mm_sub_pd(__A, __B),
                                                 (__v2df)_mm_setzero_pd());
   }
 
-  static __inline__ __m256d __DEFAULT_FN_ATTRS256
+  static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
   _mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
     return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                                 (__v4df)_mm256_sub_pd(__A, __B),
                                                 (__v4df)__W);
   }
 
-  static __inline__ __m256d __DEFAULT_FN_ATTRS256
+  static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
   _mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B) {
     return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                                 (__v4df)_mm256_sub_pd(__A, __B),
                                                 (__v4df)_mm256_setzero_pd());
   }
 
-  static __inline__ __m128 __DEFAULT_FN_ATTRS128
+  static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
   _mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
     return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                                (__v4sf)_mm_sub_ps(__A, __B),
                                                (__v4sf)__W);
   }
 
-  static __inline__ __m128 __DEFAULT_FN_ATTRS128
+  static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
   _mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B) {
     return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                                (__v4sf)_mm_sub_ps(__A, __B),
                                                (__v4sf)_mm_setzero_ps());
   }
 
-  static __inline__ __m256 __DEFAULT_FN_ATTRS256
+  static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
   _mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
     return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                                (__v8sf)_mm256_sub_ps(__A, __B),
                                                (__v8sf)__W);
   }
 
-  static __inline__ __m256 __DEFAULT_FN_ATTRS256
+  static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
   _mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B) {
     return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                                (__v8sf)_mm256_sub_ps(__A, __B),
                                                (__v8sf)_mm256_setzero_ps());
   }
 
-  static __inline__ __m128i __DEFAULT_FN_ATTRS128
+  static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
   _mm_permutex2var_epi32(__m128i __A, __m128i __I, __m128i __B) {
     return (__m128i)__builtin_ia32_vpermi2vard128((__v4si) __A, (__v4si)__I,
                                                   (__v4si)__B);
   }
 
-  static __inline__ __m128i __DEFAULT_FN_ATTRS128
+  static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
   _mm_mask_permutex2var_epi32(__m128i __A, __mmask8 __U, __m128i __I,
                               __m128i __B) {
     return (__m128i)__builtin_ia32_selectd_128(__U,
@@ -3761,7 +3562,7 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
                                     (__v4si)__A);
   }
 
-  static __inline__ __m128i __DEFAULT_FN_ATTRS128
+  static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
   _mm_mask2_permutex2var_epi32(__m128i __A, __m128i __I, __mmask8 __U,
                                __m128i __B) {
     return (__m128i)__builtin_ia32_selectd_128(__U,
@@ -3769,7 +3570,7 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
                                     (__v4si)__I);
   }
 
-  static __inline__ __m128i __DEFAULT_FN_ATTRS128
+  static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
   _mm_maskz_permutex2var_epi32(__mmask8 __U, __m128i __A, __m128i __I,
                                __m128i __B) {
     return (__m128i)__builtin_ia32_selectd_128(__U,
@@ -3777,13 +3578,13 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
                                     (__v4si)_mm_setzero_si128());
   }
 
-  static __inline__ __m256i __DEFAULT_FN_ATTRS256
+  static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
   _mm256_permutex2var_epi32(__m256i __A, __m256i __I, __m256i __B) {
     return (__m256i)__builtin_ia32_vpermi2vard256((__v8si)__A, (__v8si) __I,
                                                   (__v8si) __B);
   }
 
-  static __inline__ __m256i __DEFAULT_FN_ATTRS256
+  static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
   _mm256_mask_permutex2var_epi32(__m256i __A, __mmask8 __U, __m256i __I,
                                  __m256i __B) {
     return (__m256i)__builtin_ia32_selectd_256(__U,
@@ -3791,7 +3592,7 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
                                  (__v8si)__A);
   }
 
-  static __inline__ __m256i __DEFAULT_FN_ATTRS256
+  static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
   _mm256_mask2_permutex2var_epi32(__m256i __A, __m256i __I, __mmask8 __U,
                                   __m256i __B) {
     return (__m256i)__builtin_ia32_selectd_256(__U,
@@ -3799,7 +3600,7 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
                                  (__v8si)__I);
   }
 
-  static __inline__ __m256i __DEFAULT_FN_ATTRS256
+  static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
   _mm256_maskz_permutex2var_epi32(__mmask8 __U, __m256i __A, __m256i __I,
                                   __m256i __B) {
     return (__m256i)__builtin_ia32_selectd_256(__U,
@@ -3807,40 +3608,43 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
                                  (__v8si)_mm256_setzero_si256());
   }
 
-  static __inline__ __m128d __DEFAULT_FN_ATTRS128
+  static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
   _mm_permutex2var_pd(__m128d __A, __m128i __I, __m128d __B) {
     return (__m128d)__builtin_ia32_vpermi2varpd128((__v2df)__A, (__v2di)__I,
                                                    (__v2df)__B);
   }
 
-  static __inline__ __m128d __DEFAULT_FN_ATTRS128
-  _mm_mask_permutex2var_pd(__m128d __A, __mmask8 __U, __m128i __I, __m128d __B) {
+  static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+  _mm_mask_permutex2var_pd(__m128d __A, __mmask8 __U, __m128i __I,
+                           __m128d __B) {
     return (__m128d)__builtin_ia32_selectpd_128(__U,
                                        (__v2df)_mm_permutex2var_pd(__A, __I, __B),
                                        (__v2df)__A);
   }
 
-  static __inline__ __m128d __DEFAULT_FN_ATTRS128
-  _mm_mask2_permutex2var_pd(__m128d __A, __m128i __I, __mmask8 __U, __m128d __B) {
+  static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+  _mm_mask2_permutex2var_pd(__m128d __A, __m128i __I, __mmask8 __U,
+                            __m128d __B) {
     return (__m128d)__builtin_ia32_selectpd_128(__U,
                                        (__v2df)_mm_permutex2var_pd(__A, __I, __B),
                                        (__v2df)(__m128d)__I);
   }
 
-  static __inline__ __m128d __DEFAULT_FN_ATTRS128
-  _mm_maskz_permutex2var_pd(__mmask8 __U, __m128d __A, __m128i __I, __m128d __B) {
+  static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+  _mm_maskz_permutex2var_pd(__mmask8 __U, __m128d __A, __m128i __I,
+                            __m128d __B) {
     return (__m128d)__builtin_ia32_selectpd_128(__U,
                                        (__v2df)_mm_permutex2var_pd(__A, __I, __B),
                                        (__v2df)_mm_setzero_pd());
   }
 
-  static __inline__ __m256d __DEFAULT_FN_ATTRS256
+  static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
   _mm256_permutex2var_pd(__m256d __A, __m256i __I, __m256d __B) {
     return (__m256d)__builtin_ia32_vpermi2varpd256((__v4df)__A, (__v4di)__I,
                                                    (__v4df)__B);
   }
 
-  static __inline__ __m256d __DEFAULT_FN_ATTRS256
+  static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
   _mm256_mask_permutex2var_pd(__m256d __A, __mmask8 __U, __m256i __I,
                               __m256d __B) {
     return (__m256d)__builtin_ia32_selectpd_256(__U,
@@ -3848,7 +3652,7 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
                                     (__v4df)__A);
   }
 
-  static __inline__ __m256d __DEFAULT_FN_ATTRS256
+  static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
   _mm256_mask2_permutex2var_pd(__m256d __A, __m256i __I, __mmask8 __U,
                                __m256d __B) {
     return (__m256d)__builtin_ia32_selectpd_256(__U,
@@ -3856,7 +3660,7 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
                                     (__v4df)(__m256d)__I);
   }
 
-  static __inline__ __m256d __DEFAULT_FN_ATTRS256
+  static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
   _mm256_maskz_permutex2var_pd(__mmask8 __U, __m256d __A, __m256i __I,
                                __m256d __B) {
     return (__m256d)__builtin_ia32_selectpd_256(__U,
@@ -3864,47 +3668,48 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
                                     (__v4df)_mm256_setzero_pd());
   }
 
-  static __inline__ __m128 __DEFAULT_FN_ATTRS128
+  static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
   _mm_permutex2var_ps(__m128 __A, __m128i __I, __m128 __B) {
     return (__m128)__builtin_ia32_vpermi2varps128((__v4sf)__A, (__v4si)__I,
                                                   (__v4sf)__B);
   }
 
-  static __inline__ __m128 __DEFAULT_FN_ATTRS128
+  static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
   _mm_mask_permutex2var_ps(__m128 __A, __mmask8 __U, __m128i __I, __m128 __B) {
     return (__m128)__builtin_ia32_selectps_128(__U,
                                        (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
                                        (__v4sf)__A);
   }
 
-  static __inline__ __m128 __DEFAULT_FN_ATTRS128
+  static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
   _mm_mask2_permutex2var_ps(__m128 __A, __m128i __I, __mmask8 __U, __m128 __B) {
     return (__m128)__builtin_ia32_selectps_128(__U,
                                        (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
                                        (__v4sf)(__m128)__I);
   }
 
-  static __inline__ __m128 __DEFAULT_FN_ATTRS128
+  static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
   _mm_maskz_permutex2var_ps(__mmask8 __U, __m128 __A, __m128i __I, __m128 __B) {
     return (__m128)__builtin_ia32_selectps_128(__U,
                                        (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
                                        (__v4sf)_mm_setzero_ps());
   }
 
-  static __inline__ __m256 __DEFAULT_FN_ATTRS256
+  static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
   _mm256_permutex2var_ps(__m256 __A, __m256i __I, __m256 __B) {
     return (__m256)__builtin_ia32_vpermi2varps256((__v8sf)__A, (__v8si)__I,
                                                   (__v8sf) __B);
   }
 
-  static __inline__ __m256 __DEFAULT_FN_ATTRS256
-  _mm256_mask_permutex2var_ps(__m256 __A, __mmask8 __U, __m256i __I, __m256 __B) {
+  static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+  _mm256_mask_permutex2var_ps(__m256 __A, __mmask8 __U, __m256i __I,
+                              __m256 __B) {
     return (__m256)__builtin_ia32_selectps_256(__U,
                                     (__v8sf)_mm256_permutex2var_ps(__A, __I, __B),
                                     (__v8sf)__A);
   }
 
-  static __inline__ __m256 __DEFAULT_FN_ATTRS256
+  static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
   _mm256_mask2_permutex2var_ps(__m256 __A, __m256i __I, __mmask8 __U,
                                __m256 __B) {
     return (__m256)__builtin_ia32_selectps_256(__U,
@@ -3912,7 +3717,7 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
                                     (__v8sf)(__m256)__I);
   }
 
-  static __inline__ __m256 __DEFAULT_FN_ATTRS256
+  static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
   _mm256_maskz_permutex2var_ps(__mmask8 __U, __m256 __A, __m256i __I,
                                __m256 __B) {
     return (__m256)__builtin_ia32_selectps_256(__U,
@@ -3920,13 +3725,13 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
                                     (__v8sf)_mm256_setzero_ps());
   }
 
-  static __inline__ __m128i __DEFAULT_FN_ATTRS128
+  static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
   _mm_permutex2var_epi64(__m128i __A, __m128i __I, __m128i __B) {
     return (__m128i)__builtin_ia32_vpermi2varq128((__v2di)__A, (__v2di)__I,
                                                   (__v2di)__B);
   }
 
-  static __inline__ __m128i __DEFAULT_FN_ATTRS128
+  static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
   _mm_mask_permutex2var_epi64(__m128i __A, __mmask8 __U, __m128i __I,
                               __m128i __B) {
     return (__m128i)__builtin_ia32_selectq_128(__U,
@@ -3934,7 +3739,7 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
                                     (__v2di)__A);
   }
 
-  static __inline__ __m128i __DEFAULT_FN_ATTRS128
+  static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
   _mm_mask2_permutex2var_epi64(__m128i __A, __m128i __I, __mmask8 __U,
                                __m128i __B) {
     return (__m128i)__builtin_ia32_selectq_128(__U,
@@ -3942,7 +3747,7 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
                                     (__v2di)__I);
   }
 
-  static __inline__ __m128i __DEFAULT_FN_ATTRS128
+  static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
   _mm_maskz_permutex2var_epi64(__mmask8 __U, __m128i __A, __m128i __I,
                                __m128i __B) {
     return (__m128i)__builtin_ia32_selectq_128(__U,
@@ -3950,14 +3755,13 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
                                     (__v2di)_mm_setzero_si128());
   }
 
-
-  static __inline__ __m256i __DEFAULT_FN_ATTRS256
+  static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
   _mm256_permutex2var_epi64(__m256i __A, __m256i __I, __m256i __B) {
     return (__m256i)__builtin_ia32_vpermi2varq256((__v4di)__A, (__v4di) __I,
                                                   (__v4di) __B);
   }
 
-  static __inline__ __m256i __DEFAULT_FN_ATTRS256
+  static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
   _mm256_mask_permutex2var_epi64(__m256i __A, __mmask8 __U, __m256i __I,
                                  __m256i __B) {
     return (__m256i)__builtin_ia32_selectq_256(__U,
@@ -3965,7 +3769,7 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
                                  (__v4di)__A);
   }
 
-  static __inline__ __m256i __DEFAULT_FN_ATTRS256
+  static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
   _mm256_mask2_permutex2var_epi64(__m256i __A, __m256i __I, __mmask8 __U,
                                   __m256i __B) {
     return (__m256i)__builtin_ia32_selectq_256(__U,
@@ -3973,7 +3777,7 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
                                  (__v4di)__I);
   }
 
-  static __inline__ __m256i __DEFAULT_FN_ATTRS256
+  static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
   _mm256_maskz_permutex2var_epi64(__mmask8 __U, __m256i __A, __m256i __I,
                                   __m256i __B) {
     return (__m256i)__builtin_ia32_selectq_256(__U,
@@ -4355,13 +4159,13 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
                                        (__v4di)_mm256_rol_epi64((a), (b)), \
                                        (__v4di)_mm256_setzero_si256()))
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_rolv_epi32 (__m128i __A, __m128i __B)
 {
-  return (__m128i)__builtin_ia32_prolvd128((__v4si)__A, (__v4si)__B);
+  return (__m128i)__builtin_elementwise_fshl((__v4su)__A, (__v4su)__A, (__v4su)__B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectd_128(__U,
@@ -4369,7 +4173,7 @@ _mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
                                              (__v4si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectd_128(__U,
@@ -4377,13 +4181,13 @@ _mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
                                              (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_rolv_epi32 (__m256i __A, __m256i __B)
 {
-  return (__m256i)__builtin_ia32_prolvd256((__v8si)__A, (__v8si)__B);
+  return (__m256i)__builtin_elementwise_fshl((__v8su)__A, (__v8su)__A, (__v8su)__B);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectd_256(__U,
@@ -4391,7 +4195,7 @@ _mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
                                             (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectd_256(__U,
@@ -4399,13 +4203,13 @@ _mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
                                             (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_rolv_epi64 (__m128i __A, __m128i __B)
 {
-  return (__m128i)__builtin_ia32_prolvq128((__v2di)__A, (__v2di)__B);
+  return (__m128i)__builtin_elementwise_fshl((__v2du)__A, (__v2du)__A, (__v2du)__B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectq_128(__U,
@@ -4413,7 +4217,7 @@ _mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
                                              (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectq_128(__U,
@@ -4421,13 +4225,13 @@ _mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
                                              (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_rolv_epi64 (__m256i __A, __m256i __B)
 {
-  return (__m256i)__builtin_ia32_prolvq256((__v4di)__A, (__v4di)__B);
+  return (__m256i)__builtin_elementwise_fshl((__v4du)__A, (__v4du)__A, (__v4du)__B);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectq_256(__U,
@@ -4435,7 +4239,7 @@ _mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
                                             (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectq_256(__U,
@@ -4495,33 +4299,29 @@ _mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
                                        (__v4di)_mm256_ror_epi64((a), (b)), \
                                        (__v4di)_mm256_setzero_si256()))
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
                                              (__v4si)_mm_sll_epi32(__A, __B),
                                              (__v4si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
                                              (__v4si)_mm_sll_epi32(__A, __B),
                                              (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                              (__v8si)_mm256_sll_epi32(__A, __B),
                                              (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                              (__v8si)_mm256_sll_epi32(__A, __B),
                                              (__v8si)_mm256_setzero_si256());
@@ -4543,49 +4343,44 @@ _mm_maskz_slli_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
                                              (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A,
+                       unsigned int __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                              (__v8si)_mm256_slli_epi32(__A, (int)__B),
                                              (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, unsigned int __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, unsigned int __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                              (__v8si)_mm256_slli_epi32(__A, (int)__B),
                                              (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
                                              (__v2di)_mm_sll_epi64(__A, __B),
                                              (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
                                              (__v2di)_mm_sll_epi64(__A, __B),
                                              (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
                                              (__v4di)_mm256_sll_epi64(__A, __B),
                                              (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
                                              (__v4di)_mm256_sll_epi64(__A, __B),
                                              (__v4di)_mm256_setzero_si256());
@@ -4607,29 +4402,28 @@ _mm_maskz_slli_epi64(__mmask8 __U, __m128i __A, unsigned int __B)
                                              (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A,
+                       unsigned int __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
                                              (__v4di)_mm256_slli_epi64(__A, (int)__B),
                                              (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, unsigned int __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, unsigned int __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
                                              (__v4di)_mm256_slli_epi64(__A, (int)__B),
                                              (__v4di)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_rorv_epi32 (__m128i __A, __m128i __B)
 {
-  return (__m128i)__builtin_ia32_prorvd128((__v4si)__A, (__v4si)__B);
+  return (__m128i)__builtin_elementwise_fshr((__v4su)__A, (__v4su)__A, (__v4su)__B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectd_128(__U,
@@ -4637,7 +4431,7 @@ _mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
                                              (__v4si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectd_128(__U,
@@ -4645,13 +4439,13 @@ _mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
                                              (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_rorv_epi32 (__m256i __A, __m256i __B)
 {
-  return (__m256i)__builtin_ia32_prorvd256((__v8si)__A, (__v8si)__B);
+  return (__m256i)__builtin_elementwise_fshr((__v8su)__A, (__v8su)__A, (__v8su)__B);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectd_256(__U,
@@ -4659,7 +4453,7 @@ _mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
                                             (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectd_256(__U,
@@ -4667,13 +4461,13 @@ _mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
                                             (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_rorv_epi64 (__m128i __A, __m128i __B)
 {
-  return (__m128i)__builtin_ia32_prorvq128((__v2di)__A, (__v2di)__B);
+  return (__m128i)__builtin_elementwise_fshr((__v2du)__A, (__v2du)__A, (__v2du)__B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectq_128(__U,
@@ -4681,7 +4475,7 @@ _mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
                                              (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectq_128(__U,
@@ -4689,13 +4483,13 @@ _mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
                                              (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_rorv_epi64 (__m256i __A, __m256i __B)
 {
-  return (__m256i)__builtin_ia32_prorvq256((__v4di)__A, (__v4di)__B);
+  return (__m256i)__builtin_elementwise_fshr((__v4du)__A, (__v4du)__A, (__v4du)__B);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectq_256(__U,
@@ -4703,7 +4497,7 @@ _mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
                                             (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectq_256(__U,
@@ -4711,7 +4505,7 @@ _mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
                                             (__v4di)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
@@ -4719,7 +4513,7 @@ _mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
                                              (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
@@ -4727,7 +4521,7 @@ _mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
                                              (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -4735,7 +4529,7 @@ _mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
                                             (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -4743,7 +4537,7 @@ _mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
                                             (__v4di)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
@@ -4751,7 +4545,7 @@ _mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
                                              (__v4si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
@@ -4759,7 +4553,7 @@ _mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
                                              (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -4767,7 +4561,7 @@ _mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
                                             (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -4775,7 +4569,7 @@ _mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
                                             (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
@@ -4783,7 +4577,7 @@ _mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
                                              (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
@@ -4791,7 +4585,7 @@ _mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
                                              (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -4799,7 +4593,7 @@ _mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
                                             (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -4807,7 +4601,7 @@ _mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
                                             (__v4di)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
@@ -4815,7 +4609,7 @@ _mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
                                             (__v4si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
@@ -4823,7 +4617,7 @@ _mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
                                             (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -4831,7 +4625,7 @@ _mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
                                             (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -4839,33 +4633,29 @@ _mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
                                             (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
                                              (__v4si)_mm_srl_epi32(__A, __B),
                                              (__v4si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
                                              (__v4si)_mm_srl_epi32(__A, __B),
                                              (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                              (__v8si)_mm256_srl_epi32(__A, __B),
                                              (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                              (__v8si)_mm256_srl_epi32(__A, __B),
                                              (__v8si)_mm256_setzero_si256());
@@ -4887,49 +4677,44 @@ _mm_maskz_srli_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
                                              (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A,
+                       unsigned int __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                              (__v8si)_mm256_srli_epi32(__A, (int)__B),
                                              (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, unsigned int __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, unsigned int __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                              (__v8si)_mm256_srli_epi32(__A, (int)__B),
                                              (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
                                              (__v2di)_mm_srl_epi64(__A, __B),
                                              (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
                                              (__v2di)_mm_srl_epi64(__A, __B),
                                              (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
                                              (__v4di)_mm256_srl_epi64(__A, __B),
                                              (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
                                              (__v4di)_mm256_srl_epi64(__A, __B),
                                              (__v4di)_mm256_setzero_si256());
@@ -4951,23 +4736,22 @@ _mm_maskz_srli_epi64(__mmask8 __U, __m128i __A, unsigned int __B)
                                              (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A,
+                       unsigned int __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
                                              (__v4di)_mm256_srli_epi64(__A, (int)__B),
                                              (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, unsigned int __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, unsigned int __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
                                              (__v4di)_mm256_srli_epi64(__A, (int)__B),
                                              (__v4di)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
@@ -4975,7 +4759,7 @@ _mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
                                             (__v4si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
@@ -4983,7 +4767,7 @@ _mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
                                             (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -4991,7 +4775,7 @@ _mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
                                             (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -4999,13 +4783,13 @@ _mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
                                             (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_srav_epi64(__m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_psravq128((__v2di)__X, (__v2di)__Y);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
@@ -5013,7 +4797,7 @@ _mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
                                              (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
@@ -5021,13 +4805,13 @@ _mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
                                              (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_srav_epi64(__m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_psravq256((__v4di)__X, (__v4di) __Y);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -5035,7 +4819,7 @@ _mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
                                              (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -5296,69 +5080,55 @@ _mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A)
                                               (__v4df)_mm256_setzero_pd());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A)
-{
-   return (__m128i)__builtin_ia32_selectd_128(__M,
-                                              (__v4si) _mm_set1_epi32(__A),
-                                              (__v4si)__O);
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A) {
+  return (__m128i)__builtin_ia32_selectd_128(__M, (__v4si)_mm_set1_epi32(__A),
+                                             (__v4si)__O);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_set1_epi32( __mmask8 __M, int __A)
-{
-   return (__m128i)__builtin_ia32_selectd_128(__M,
-                                              (__v4si) _mm_set1_epi32(__A),
-                                              (__v4si)_mm_setzero_si128());
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_set1_epi32(__mmask8 __M, int __A) {
+  return (__m128i)__builtin_ia32_selectd_128(__M, (__v4si)_mm_set1_epi32(__A),
+                                             (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A)
-{
-   return (__m256i)__builtin_ia32_selectd_256(__M,
-                                              (__v8si) _mm256_set1_epi32(__A),
-                                              (__v8si)__O);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A) {
+  return (__m256i)__builtin_ia32_selectd_256(
+      __M, (__v8si)_mm256_set1_epi32(__A), (__v8si)__O);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_set1_epi32( __mmask8 __M, int __A)
-{
-   return (__m256i)__builtin_ia32_selectd_256(__M,
-                                              (__v8si) _mm256_set1_epi32(__A),
-                                              (__v8si)_mm256_setzero_si256());
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_set1_epi32(__mmask8 __M, int __A) {
+  return (__m256i)__builtin_ia32_selectd_256(
+      __M, (__v8si)_mm256_set1_epi32(__A), (__v8si)_mm256_setzero_si256());
 }
 
-
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_set1_epi64(__m128i __O, __mmask8 __M, long long __A) {
   return (__m128i) __builtin_ia32_selectq_128(__M,
                                               (__v2di) _mm_set1_epi64x(__A),
                                               (__v2di) __O);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_set1_epi64(__mmask8 __M, long long __A) {
   return (__m128i) __builtin_ia32_selectq_128(__M,
                                               (__v2di) _mm_set1_epi64x(__A),
                                               (__v2di) _mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_set1_epi64(__m256i __O, __mmask8 __M, long long __A) {
   return (__m256i) __builtin_ia32_selectq_256(__M,
                                               (__v4di) _mm256_set1_epi64x(__A),
                                               (__v4di) __O) ;
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
-{
-   return (__m256i) __builtin_ia32_selectq_256(__M,
-                                               (__v4di) _mm256_set1_epi64x(__A),
-                                               (__v4di) _mm256_setzero_si256());
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_set1_epi64(__mmask8 __M, long long __A) {
+  return (__m256i)__builtin_ia32_selectq_256(
+      __M, (__v4di)_mm256_set1_epi64x(__A), (__v4di)_mm256_setzero_si256());
 }
 
 #define _mm_fixupimm_pd(A, B, C, imm) \
@@ -5805,130 +5575,113 @@ _mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
            (__mmask8) __U);
 }
 
-
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
-{
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_unpackhi_pd(__A, __B),
                                               (__v2df)__W);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B)
-{
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_unpackhi_pd(__A, __B),
                                               (__v2df)_mm_setzero_pd());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
-{
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                            (__v4df)_mm256_unpackhi_pd(__A, __B),
                                            (__v4df)__W);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B)
-{
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                            (__v4df)_mm256_unpackhi_pd(__A, __B),
                                            (__v4df)_mm256_setzero_pd());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_unpackhi_ps(__A, __B),
                                              (__v4sf)__W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_unpackhi_ps(__A, __B),
                                              (__v4sf)_mm_setzero_ps());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                            (__v8sf)_mm256_unpackhi_ps(__A, __B),
                                            (__v8sf)__W);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                            (__v8sf)_mm256_unpackhi_ps(__A, __B),
                                            (__v8sf)_mm256_setzero_ps());
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
-{
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_unpacklo_pd(__A, __B),
                                               (__v2df)__W);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B)
-{
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_unpacklo_pd(__A, __B),
                                               (__v2df)_mm_setzero_pd());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
-{
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                            (__v4df)_mm256_unpacklo_pd(__A, __B),
                                            (__v4df)__W);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B)
-{
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                            (__v4df)_mm256_unpacklo_pd(__A, __B),
                                            (__v4df)_mm256_setzero_pd());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_unpacklo_ps(__A, __B),
                                              (__v4sf)__W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_unpacklo_ps(__A, __B),
                                              (__v4sf)_mm_setzero_ps());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                            (__v8sf)_mm256_unpacklo_ps(__A, __B),
                                            (__v8sf)__W);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                            (__v8sf)_mm256_unpacklo_ps(__A, __B),
                                            (__v8sf)_mm256_setzero_ps());
@@ -6078,65 +5831,57 @@ _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
                                        (__v8sf)_mm256_permute_ps((X), (C)), \
                                        (__v8sf)_mm256_setzero_ps()))
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C)
-{
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                             (__v2df)_mm_permutevar_pd(__A, __C),
                                             (__v2df)__W);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C)
-{
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                             (__v2df)_mm_permutevar_pd(__A, __C),
                                             (__v2df)_mm_setzero_pd());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C)
-{
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                          (__v4df)_mm256_permutevar_pd(__A, __C),
                                          (__v4df)__W);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C)
-{
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                          (__v4df)_mm256_permutevar_pd(__A, __C),
                                          (__v4df)_mm256_setzero_pd());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                             (__v4sf)_mm_permutevar_ps(__A, __C),
                                             (__v4sf)__W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                             (__v4sf)_mm_permutevar_ps(__A, __C),
                                             (__v4sf)_mm_setzero_ps());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                           (__v8sf)_mm256_permutevar_ps(__A, __C),
                                           (__v8sf)__W);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                           (__v8sf)_mm256_permutevar_ps(__A, __C),
                                           (__v8sf)_mm256_setzero_ps());
@@ -6250,282 +5995,251 @@ _mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
                                        _mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
                                            (__v4si)_mm_unpackhi_epi32(__A, __B),
                                            (__v4si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
                                            (__v4si)_mm_unpackhi_epi32(__A, __B),
                                            (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A,
+                           __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                         (__v8si)_mm256_unpackhi_epi32(__A, __B),
                                         (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                         (__v8si)_mm256_unpackhi_epi32(__A, __B),
                                         (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
                                            (__v2di)_mm_unpackhi_epi64(__A, __B),
                                            (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
                                            (__v2di)_mm_unpackhi_epi64(__A, __B),
                                            (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A,
+                           __m256i __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
                                         (__v4di)_mm256_unpackhi_epi64(__A, __B),
                                         (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
                                         (__v4di)_mm256_unpackhi_epi64(__A, __B),
                                         (__v4di)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
                                            (__v4si)_mm_unpacklo_epi32(__A, __B),
                                            (__v4si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
                                            (__v4si)_mm_unpacklo_epi32(__A, __B),
                                            (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A,
+                           __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                         (__v8si)_mm256_unpacklo_epi32(__A, __B),
                                         (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                         (__v8si)_mm256_unpacklo_epi32(__A, __B),
                                         (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
                                            (__v2di)_mm_unpacklo_epi64(__A, __B),
                                            (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
                                            (__v2di)_mm_unpacklo_epi64(__A, __B),
                                            (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A,
+                           __m256i __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
                                         (__v4di)_mm256_unpacklo_epi64(__A, __B),
                                         (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
                                         (__v4di)_mm256_unpacklo_epi64(__A, __B),
                                         (__v4di)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
                                              (__v4si)_mm_sra_epi32(__A, __B),
                                              (__v4si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
                                              (__v4si)_mm_sra_epi32(__A, __B),
                                              (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                              (__v8si)_mm256_sra_epi32(__A, __B),
                                              (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                              (__v8si)_mm256_sra_epi32(__A, __B),
                                              (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
                                              (__v4si)_mm_srai_epi32(__A, (int)__B),
                                              (__v4si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, unsigned int __B) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
                                              (__v4si)_mm_srai_epi32(__A, (int)__B),
                                              (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A,
+                       unsigned int __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                              (__v8si)_mm256_srai_epi32(__A, (int)__B),
                                              (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, unsigned int __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, unsigned int __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                              (__v8si)_mm256_srai_epi32(__A, (int)__B),
                                              (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_sra_epi64(__m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_sra_epi64(__m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_psraq128((__v2di)__A, (__v2di)__B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
                                              (__v2di)_mm_sra_epi64(__A, __B), \
                                              (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
                                              (__v2di)_mm_sra_epi64(__A, __B), \
                                              (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_sra_epi64(__m256i __A, __m128i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_sra_epi64(__m256i __A, __m128i __B) {
   return (__m256i)__builtin_ia32_psraq256((__v4di) __A, (__v2di) __B);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
                                            (__v4di)_mm256_sra_epi64(__A, __B), \
                                            (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
                                            (__v4di)_mm256_sra_epi64(__A, __B), \
                                            (__v4di)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_srai_epi64(__m128i __A, unsigned int __imm)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_srai_epi64(__m128i __A, unsigned int __imm) {
   return (__m128i)__builtin_ia32_psraqi128((__v2di)__A, (int)__imm);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_srai_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __imm)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_srai_epi64(
+    __m128i __W, __mmask8 __U, __m128i __A, unsigned int __imm) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
                                            (__v2di)_mm_srai_epi64(__A, __imm), \
                                            (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, unsigned int __imm)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, unsigned int __imm) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
                                            (__v2di)_mm_srai_epi64(__A, __imm), \
                                            (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_srai_epi64(__m256i __A, unsigned int __imm)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_srai_epi64(__m256i __A, unsigned int __imm) {
   return (__m256i)__builtin_ia32_psraqi256((__v4di)__A, (int)__imm);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_srai_epi64(__m256i __W, __mmask8 __U, __m256i __A,
-                       unsigned int __imm)
-{
+                       unsigned int __imm) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
                                         (__v4di)_mm256_srai_epi64(__A, __imm), \
                                         (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, unsigned int __imm)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, unsigned int __imm) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
                                         (__v4di)_mm256_srai_epi64(__A, __imm), \
                                         (__v4di)_mm256_setzero_si256());
@@ -6792,159 +6506,139 @@ _mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A)
                 (__mmask8) __U);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_broadcast_f32x4(__m128 __A)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_broadcast_f32x4(__m128 __A) {
   return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
                                          0, 1, 2, 3, 0, 1, 2, 3);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
                                             (__v8sf)_mm256_broadcast_f32x4(__A),
                                             (__v8sf)__O);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_broadcast_f32x4(__mmask8 __M, __m128 __A) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
                                             (__v8sf)_mm256_broadcast_f32x4(__A),
                                             (__v8sf)_mm256_setzero_ps());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_broadcast_i32x4(__m128i __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_broadcast_i32x4(__m128i __A) {
   return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
                                           0, 1, 2, 3, 0, 1, 2, 3);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
                                             (__v8si)_mm256_broadcast_i32x4(__A),
                                             (__v8si)__O);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
                                             (__v8si)_mm256_broadcast_i32x4(__A),
                                             (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A)
-{
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_broadcastsd_pd(__m256d __O, __mmask8 __M, __m128d __A) {
   return (__m256d)__builtin_ia32_selectpd_256(__M,
                                               (__v4df) _mm256_broadcastsd_pd(__A),
                                               (__v4df) __O);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
-{
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A) {
   return (__m256d)__builtin_ia32_selectpd_256(__M,
                                               (__v4df) _mm256_broadcastsd_pd(__A),
                                               (__v4df) _mm256_setzero_pd());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_broadcastss_ps(__m128 __O, __mmask8 __M, __m128 __A) {
   return (__m128)__builtin_ia32_selectps_128(__M,
                                              (__v4sf) _mm_broadcastss_ps(__A),
                                              (__v4sf) __O);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_broadcastss_ps(__mmask8 __M, __m128 __A) {
   return (__m128)__builtin_ia32_selectps_128(__M,
                                              (__v4sf) _mm_broadcastss_ps(__A),
                                              (__v4sf) _mm_setzero_ps());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_broadcastss_ps(__m256 __O, __mmask8 __M, __m128 __A) {
   return (__m256)__builtin_ia32_selectps_256(__M,
                                              (__v8sf) _mm256_broadcastss_ps(__A),
                                              (__v8sf) __O);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_broadcastss_ps(__mmask8 __M, __m128 __A) {
   return (__m256)__builtin_ia32_selectps_256(__M,
                                              (__v8sf) _mm256_broadcastss_ps(__A),
                                              (__v8sf) _mm256_setzero_ps());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_broadcastd_epi32(__m128i __O, __mmask8 __M, __m128i __A) {
   return (__m128i)__builtin_ia32_selectd_128(__M,
                                              (__v4si) _mm_broadcastd_epi32(__A),
                                              (__v4si) __O);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_broadcastd_epi32(__mmask8 __M, __m128i __A) {
   return (__m128i)__builtin_ia32_selectd_128(__M,
                                              (__v4si) _mm_broadcastd_epi32(__A),
                                              (__v4si) _mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_broadcastd_epi32(__m256i __O, __mmask8 __M, __m128i __A) {
   return (__m256i)__builtin_ia32_selectd_256(__M,
                                              (__v8si) _mm256_broadcastd_epi32(__A),
                                              (__v8si) __O);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_broadcastd_epi32(__mmask8 __M, __m128i __A) {
   return (__m256i)__builtin_ia32_selectd_256(__M,
                                              (__v8si) _mm256_broadcastd_epi32(__A),
                                              (__v8si) _mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_broadcastq_epi64(__m128i __O, __mmask8 __M, __m128i __A) {
   return (__m128i)__builtin_ia32_selectq_128(__M,
                                              (__v2di) _mm_broadcastq_epi64(__A),
                                              (__v2di) __O);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A) {
   return (__m128i)__builtin_ia32_selectq_128(__M,
                                              (__v2di) _mm_broadcastq_epi64(__A),
                                              (__v2di) _mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_broadcastq_epi64(__m256i __O, __mmask8 __M, __m128i __A) {
   return (__m256i)__builtin_ia32_selectq_256(__M,
                                              (__v4di) _mm256_broadcastq_epi64(__A),
                                              (__v4di) __O);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A) {
   return (__m256i)__builtin_ia32_selectq_256(__M,
                                              (__v4di) _mm256_broadcastq_epi64(__A),
                                              (__v4di) _mm256_setzero_si256());
@@ -7536,9 +7230,8 @@ _mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
   __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_cvtepi32_epi8 (__m128i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_cvtepi32_epi8(__m128i __A) {
   return (__m128i)__builtin_shufflevector(
       __builtin_convertvector((__v4si)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1,
       2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7);
@@ -7566,9 +7259,8 @@ _mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
   __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS256
-_mm256_cvtepi32_epi8 (__m256i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_cvtepi32_epi8(__m256i __A) {
   return (__m128i)__builtin_shufflevector(
       __builtin_convertvector((__v8si)__A, __v8qi),
       (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
@@ -7576,8 +7268,7 @@ _mm256_cvtepi32_epi8 (__m256i __A)
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS256
-_mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
-{
+_mm256_mask_cvtepi32_epi8(__m128i __O, __mmask8 __M, __m256i __A) {
   return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
               (__v16qi) __O, __M);
 }
@@ -7596,9 +7287,8 @@ _mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
   __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_cvtepi32_epi16 (__m128i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_cvtepi32_epi16(__m128i __A) {
   return (__m128i)__builtin_shufflevector(
       __builtin_convertvector((__v4si)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1,
       2, 3, 4, 5, 6, 7);
@@ -7625,9 +7315,8 @@ _mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
   __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS256
-_mm256_cvtepi32_epi16 (__m256i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_cvtepi32_epi16(__m256i __A) {
   return (__m128i)__builtin_convertvector((__v8si)__A, __v8hi);
 }
 
@@ -7652,9 +7341,8 @@ _mm256_mask_cvtepi32_storeu_epi16 (void *  __P, __mmask8 __M, __m256i __A)
   __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_cvtepi64_epi8 (__m128i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_cvtepi64_epi8(__m128i __A) {
   return (__m128i)__builtin_shufflevector(
       __builtin_convertvector((__v2di)__A, __v2qi), (__v2qi){0, 0}, 0, 1, 2, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3);
@@ -7681,9 +7369,8 @@ _mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
   __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS256
-_mm256_cvtepi64_epi8 (__m256i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_cvtepi64_epi8(__m256i __A) {
   return (__m128i)__builtin_shufflevector(
       __builtin_convertvector((__v4di)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1,
       2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7);
@@ -7710,9 +7397,8 @@ _mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
   __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_cvtepi64_epi32 (__m128i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_cvtepi64_epi32(__m128i __A) {
   return (__m128i)__builtin_shufflevector(
       __builtin_convertvector((__v2di)__A, __v2si), (__v2si){0, 0}, 0, 1, 2, 3);
 }
@@ -7738,23 +7424,20 @@ _mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
   __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS256
-_mm256_cvtepi64_epi32 (__m256i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_cvtepi64_epi32(__m256i __A) {
   return (__m128i)__builtin_convertvector((__v4di)__A, __v4si);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS256
-_mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_cvtepi64_epi32(__m128i __O, __mmask8 __M, __m256i __A) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
                                              (__v4si)_mm256_cvtepi64_epi32(__A),
                                              (__v4si)__O);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS256
-_mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_cvtepi64_epi32(__mmask8 __M, __m256i __A) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
                                              (__v4si)_mm256_cvtepi64_epi32(__A),
                                              (__v4si)_mm_setzero_si128());
@@ -7766,9 +7449,8 @@ _mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
   __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_cvtepi64_epi16 (__m128i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_cvtepi64_epi16(__m128i __A) {
   return (__m128i)__builtin_shufflevector(
       __builtin_convertvector((__v2di)__A, __v2hi), (__v2hi){0, 0}, 0, 1, 2, 3,
       3, 3, 3, 3);
@@ -7796,9 +7478,8 @@ _mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
   __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS256
-_mm256_cvtepi64_epi16 (__m256i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_cvtepi64_epi16(__m256i __A) {
   return (__m128i)__builtin_shufflevector(
       __builtin_convertvector((__v4di)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1,
       2, 3, 4, 5, 6, 7);
@@ -7825,11 +7506,10 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
   __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
 }
 
-#define _mm256_extractf32x4_ps(A, imm) \
-  ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
-                                                (int)(imm), \
-                                                (__v4sf)_mm_undefined_ps(), \
-                                                (__mmask8)-1))
+#define _mm256_extractf32x4_ps(A, imm)                                         \
+  ((__m128)__builtin_ia32_extractf32x4_256_mask(                               \
+      (__v8sf)(__m256)(A), (int)(imm), (__v4sf)_mm_setzero_ps(),               \
+      (__mmask8) - 1))
 
 #define _mm256_mask_extractf32x4_ps(W, U, A, imm) \
   ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
@@ -7843,11 +7523,10 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
                                                 (__v4sf)_mm_setzero_ps(), \
                                                 (__mmask8)(U)))
 
-#define _mm256_extracti32x4_epi32(A, imm) \
-  ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
-                                                 (int)(imm), \
-                                                 (__v4si)_mm_undefined_si128(), \
-                                                 (__mmask8)-1))
+#define _mm256_extracti32x4_epi32(A, imm)                                      \
+  ((__m128i)__builtin_ia32_extracti32x4_256_mask(                              \
+      (__v8si)(__m256i)(A), (int)(imm), (__v4si)_mm_setzero_si128(),           \
+      (__mmask8) - 1))
 
 #define _mm256_mask_extracti32x4_epi32(W, U, A, imm) \
   ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
@@ -8083,47 +7762,41 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
                                       (__v4di)_mm256_permutex_epi64((X), (C)), \
                                       (__v4di)_mm256_setzero_si256()))
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_permutexvar_pd (__m256i __X, __m256d __Y)
-{
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_permutexvar_pd(__m256i __X, __m256d __Y) {
   return (__m256d)__builtin_ia32_permvardf256((__v4df)__Y, (__v4di)__X);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X,
-          __m256d __Y)
-{
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_permutexvar_pd(__m256d __W, __mmask8 __U, __m256i __X,
+                           __m256d __Y) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                         (__v4df)_mm256_permutexvar_pd(__X, __Y),
                                         (__v4df)__W);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y)
-{
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_permutexvar_pd(__mmask8 __U, __m256i __X, __m256d __Y) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                         (__v4df)_mm256_permutexvar_pd(__X, __Y),
                                         (__v4df)_mm256_setzero_pd());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_permutexvar_epi64 ( __m256i __X, __m256i __Y)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_permutexvar_epi64(__m256i __X, __m256i __Y) {
   return (__m256i)__builtin_ia32_permvardi256((__v4di) __Y, (__v4di) __X);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_permutexvar_epi64(__mmask8 __M, __m256i __X, __m256i __Y) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
                                      (__v4di)_mm256_permutexvar_epi64(__X, __Y),
                                      (__v4di)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X,
-             __m256i __Y)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_permutexvar_epi64(__m256i __W, __mmask8 __M, __m256i __X,
+                              __m256i __Y) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
                                      (__v4di)_mm256_permutexvar_epi64(__X, __Y),
                                      (__v4di)__W);
@@ -8131,17 +7804,15 @@ _mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X,
 
 #define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps((B), (A))
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                         (__v8sf)_mm256_permutexvar_ps(__X, __Y),
                                         (__v8sf)__W);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                         (__v8sf)_mm256_permutexvar_ps(__X, __Y),
                                         (__v8sf)_mm256_setzero_ps());
@@ -8149,18 +7820,16 @@ _mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y)
 
 #define _mm256_permutexvar_epi32(A, B) _mm256_permutevar8x32_epi32((B), (A))
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X,
-                              __m256i __Y)
-{
+                              __m256i __Y) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
                                      (__v8si)_mm256_permutexvar_epi32(__X, __Y),
                                      (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
                                      (__v8si)_mm256_permutexvar_epi32(__X, __Y),
                                      (__v8si)_mm256_setzero_si256());
@@ -8222,65 +7891,57 @@ _mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
                                  (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
                                  (__v4di)_mm256_setzero_si256()))
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_movehdup_ps(__m128 __W, __mmask8 __U, __m128 __A) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_movehdup_ps(__A),
                                              (__v4sf)__W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_movehdup_ps(__mmask8 __U, __m128 __A) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_movehdup_ps(__A),
                                              (__v4sf)_mm_setzero_ps());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_movehdup_ps(__m256 __W, __mmask8 __U, __m256 __A) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                              (__v8sf)_mm256_movehdup_ps(__A),
                                              (__v8sf)__W);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_movehdup_ps(__mmask8 __U, __m256 __A) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                              (__v8sf)_mm256_movehdup_ps(__A),
                                              (__v8sf)_mm256_setzero_ps());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_moveldup_ps(__m128 __W, __mmask8 __U, __m128 __A) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_moveldup_ps(__A),
                                              (__v4sf)__W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_moveldup_ps(__mmask8 __U, __m128 __A) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_moveldup_ps(__A),
                                              (__v4sf)_mm_setzero_ps());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_moveldup_ps(__m256 __W, __mmask8 __U, __m256 __A) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                              (__v8sf)_mm256_moveldup_ps(__A),
                                              (__v8sf)__W);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_moveldup_ps(__mmask8 __U, __m256 __A) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                              (__v8sf)_mm256_moveldup_ps(__A),
                                              (__v8sf)_mm256_setzero_ps());
@@ -8306,68 +7967,52 @@ _mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A)
                                        (__v4si)_mm_shuffle_epi32((A), (I)), \
                                        (__v4si)_mm_setzero_si128()))
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A)
-{
-  return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
-              (__v2df) __A,
-              (__v2df) __W);
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_mov_pd(__m128d __W, __mmask8 __U, __m128d __A) {
+  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)__A,
+                                              (__v2df)__W);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_maskz_mov_pd (__mmask8 __U, __m128d __A)
-{
-  return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
-              (__v2df) __A,
-              (__v2df) _mm_setzero_pd ());
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_mov_pd(__mmask8 __U, __m128d __A) {
+  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)__A,
+                                              (__v2df)_mm_setzero_pd());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A)
-{
-  return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
-              (__v4df) __A,
-              (__v4df) __W);
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_mov_pd(__m256d __W, __mmask8 __U, __m256d __A) {
+  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)__A,
+                                              (__v4df)__W);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_maskz_mov_pd (__mmask8 __U, __m256d __A)
-{
-  return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
-              (__v4df) __A,
-              (__v4df) _mm256_setzero_pd ());
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_mov_pd(__mmask8 __U, __m256d __A) {
+  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)__A,
+                                              (__v4df)_mm256_setzero_pd());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A)
-{
-  return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
-             (__v4sf) __A,
-             (__v4sf) __W);
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_mov_ps(__m128 __W, __mmask8 __U, __m128 __A) {
+  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)__A,
+                                             (__v4sf)__W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_maskz_mov_ps (__mmask8 __U, __m128 __A)
-{
-  return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
-             (__v4sf) __A,
-             (__v4sf) _mm_setzero_ps ());
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_mov_ps(__mmask8 __U, __m128 __A) {
+  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)__A,
+                                             (__v4sf)_mm_setzero_ps());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A)
-{
-  return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
-             (__v8sf) __A,
-             (__v8sf) __W);
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_mov_ps(__m256 __W, __mmask8 __U, __m256 __A) {
+  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)__A,
+                                             (__v8sf)__W);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_maskz_mov_ps (__mmask8 __U, __m256 __A)
-{
-  return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
-             (__v8sf) __A,
-             (__v8sf) _mm256_setzero_ps ());
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_mov_ps(__mmask8 __U, __m256 __A) {
+  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)__A,
+                                             (__v8sf)_mm256_setzero_ps());
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS128
@@ -8430,8 +8075,9 @@ _mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
 #define _mm256_mask_cvtps_ph  _mm256_mask_cvt_roundps_ph
 #define _mm256_maskz_cvtps_ph _mm256_maskz_cvt_roundps_ph
 
-
 #undef __DEFAULT_FN_ATTRS128
 #undef __DEFAULT_FN_ATTRS256
+#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
+#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
 
 #endif /* __AVX512VLINTRIN_H */
diff --git a/lib/include/avx512vlvbmi2intrin.h b/lib/include/avx512vlvbmi2intrin.h
index 77af2d5cbd..da295d2a12 100644
--- a/lib/include/avx512vlvbmi2intrin.h
+++ b/lib/include/avx512vlvbmi2intrin.h
@@ -17,13 +17,21 @@
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS128                                                  \
   __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx512vl,avx512vbmi2,no-evex512"),                \
+                 __target__("avx512vl,avx512vbmi2"),                           \
                  __min_vector_width__(128)))
 #define __DEFAULT_FN_ATTRS256                                                  \
   __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx512vl,avx512vbmi2,no-evex512"),                \
+                 __target__("avx512vl,avx512vbmi2"),                           \
                  __min_vector_width__(256)))
 
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
+#else
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
+#endif
+
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D)
 {
@@ -412,14 +420,14 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P)
                                        (__v8hi)_mm_shrdi_epi16((A), (B), (I)), \
                                        (__v8hi)_mm_setzero_si128()))
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_shldv_epi64(__m256i __A, __m256i __B, __m256i __C)
 {
-  return (__m256i)__builtin_ia32_vpshldvq256((__v4di)__A, (__v4di)__B,
-                                             (__v4di)__C);
+  return (__m256i)__builtin_elementwise_fshl((__v4du)__A, (__v4du)__B,
+                                             (__v4du)__C);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_shldv_epi64(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C)
 {
   return (__m256i)__builtin_ia32_selectq_256(__U,
@@ -427,7 +435,7 @@ _mm256_mask_shldv_epi64(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C)
                                       (__v4di)__A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_shldv_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C)
 {
   return (__m256i)__builtin_ia32_selectq_256(__U,
@@ -435,14 +443,14 @@ _mm256_maskz_shldv_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C)
                                       (__v4di)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_shldv_epi64(__m128i __A, __m128i __B, __m128i __C)
 {
-  return (__m128i)__builtin_ia32_vpshldvq128((__v2di)__A, (__v2di)__B,
-                                             (__v2di)__C);
+  return (__m128i)__builtin_elementwise_fshl((__v2du)__A, (__v2du)__B,
+                                             (__v2du)__C);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_shldv_epi64(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C)
 {
   return (__m128i)__builtin_ia32_selectq_128(__U,
@@ -450,7 +458,7 @@ _mm_mask_shldv_epi64(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C)
                                          (__v2di)__A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_shldv_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
 {
   return (__m128i)__builtin_ia32_selectq_128(__U,
@@ -458,14 +466,14 @@ _mm_maskz_shldv_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
                                          (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_shldv_epi32(__m256i __A, __m256i __B, __m256i __C)
 {
-  return (__m256i)__builtin_ia32_vpshldvd256((__v8si)__A, (__v8si)__B,
-                                             (__v8si)__C);
+  return (__m256i)__builtin_elementwise_fshl((__v8su)__A, (__v8su)__B,
+                                             (__v8su)__C);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_shldv_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C)
 {
   return (__m256i)__builtin_ia32_selectd_256(__U,
@@ -473,7 +481,7 @@ _mm256_mask_shldv_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C)
                                       (__v8si)__A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_shldv_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C)
 {
   return (__m256i)__builtin_ia32_selectd_256(__U,
@@ -481,14 +489,14 @@ _mm256_maskz_shldv_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C)
                                       (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_shldv_epi32(__m128i __A, __m128i __B, __m128i __C)
 {
-  return (__m128i)__builtin_ia32_vpshldvd128((__v4si)__A, (__v4si)__B,
-                                             (__v4si)__C);
+  return (__m128i)__builtin_elementwise_fshl((__v4su)__A, (__v4su)__B,
+                                             (__v4su)__C);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_shldv_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C)
 {
   return (__m128i)__builtin_ia32_selectd_128(__U,
@@ -496,7 +504,7 @@ _mm_mask_shldv_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C)
                                          (__v4si)__A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_shldv_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
 {
   return (__m128i)__builtin_ia32_selectd_128(__U,
@@ -504,14 +512,14 @@ _mm_maskz_shldv_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
                                          (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_shldv_epi16(__m256i __A, __m256i __B, __m256i __C)
 {
-  return (__m256i)__builtin_ia32_vpshldvw256((__v16hi)__A, (__v16hi)__B,
-                                             (__v16hi)__C);
+  return (__m256i)__builtin_elementwise_fshl((__v16hu)__A, (__v16hu)__B,
+                                             (__v16hu)__C);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_shldv_epi16(__m256i __A, __mmask16 __U, __m256i __B, __m256i __C)
 {
   return (__m256i)__builtin_ia32_selectw_256(__U,
@@ -519,7 +527,7 @@ _mm256_mask_shldv_epi16(__m256i __A, __mmask16 __U, __m256i __B, __m256i __C)
                                       (__v16hi)__A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_shldv_epi16(__mmask16 __U, __m256i __A, __m256i __B, __m256i __C)
 {
   return (__m256i)__builtin_ia32_selectw_256(__U,
@@ -527,14 +535,14 @@ _mm256_maskz_shldv_epi16(__mmask16 __U, __m256i __A, __m256i __B, __m256i __C)
                                       (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_shldv_epi16(__m128i __A, __m128i __B, __m128i __C)
 {
-  return (__m128i)__builtin_ia32_vpshldvw128((__v8hi)__A, (__v8hi)__B,
-                                             (__v8hi)__C);
+  return (__m128i)__builtin_elementwise_fshl((__v8hu)__A, (__v8hu)__B,
+                                             (__v8hu)__C);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_shldv_epi16(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C)
 {
   return (__m128i)__builtin_ia32_selectw_128(__U,
@@ -542,7 +550,7 @@ _mm_mask_shldv_epi16(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C)
                                          (__v8hi)__A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_shldv_epi16(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
 {
   return (__m128i)__builtin_ia32_selectw_128(__U,
@@ -550,14 +558,15 @@ _mm_maskz_shldv_epi16(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
                                          (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_shrdv_epi64(__m256i __A, __m256i __B, __m256i __C)
 {
-  return (__m256i)__builtin_ia32_vpshrdvq256((__v4di)__A, (__v4di)__B,
-                                             (__v4di)__C);
+  // Ops __A and __B are swapped.
+  return (__m256i)__builtin_elementwise_fshr((__v4du)__B, (__v4du)__A,
+                                             (__v4du)__C);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_shrdv_epi64(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C)
 {
   return (__m256i)__builtin_ia32_selectq_256(__U,
@@ -565,7 +574,7 @@ _mm256_mask_shrdv_epi64(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C)
                                       (__v4di)__A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_shrdv_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C)
 {
   return (__m256i)__builtin_ia32_selectq_256(__U,
@@ -573,14 +582,15 @@ _mm256_maskz_shrdv_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C)
                                       (__v4di)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_shrdv_epi64(__m128i __A, __m128i __B, __m128i __C)
 {
-  return (__m128i)__builtin_ia32_vpshrdvq128((__v2di)__A, (__v2di)__B,
-                                             (__v2di)__C);
+  // Ops __A and __B are swapped.
+  return (__m128i)__builtin_elementwise_fshr((__v2du)__B, (__v2du)__A,
+                                             (__v2du)__C);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_shrdv_epi64(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C)
 {
   return (__m128i)__builtin_ia32_selectq_128(__U,
@@ -588,7 +598,7 @@ _mm_mask_shrdv_epi64(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C)
                                          (__v2di)__A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_shrdv_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
 {
   return (__m128i)__builtin_ia32_selectq_128(__U,
@@ -596,14 +606,15 @@ _mm_maskz_shrdv_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
                                          (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_shrdv_epi32(__m256i __A, __m256i __B, __m256i __C)
 {
-  return (__m256i)__builtin_ia32_vpshrdvd256((__v8si)__A, (__v8si)__B,
-                                             (__v8si)__C);
+  // Ops __A and __B are swapped.
+  return (__m256i)__builtin_elementwise_fshr((__v8su)__B, (__v8su)__A,
+                                             (__v8su)__C);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_shrdv_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C)
 {
   return (__m256i)__builtin_ia32_selectd_256(__U,
@@ -611,7 +622,7 @@ _mm256_mask_shrdv_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C)
                                       (__v8si)__A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_shrdv_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C)
 {
   return (__m256i)__builtin_ia32_selectd_256(__U,
@@ -619,14 +630,15 @@ _mm256_maskz_shrdv_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C)
                                       (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_shrdv_epi32(__m128i __A, __m128i __B, __m128i __C)
 {
-  return (__m128i)__builtin_ia32_vpshrdvd128((__v4si)__A, (__v4si)__B,
-                                             (__v4si)__C);
+  // Ops __A and __B are swapped.
+  return (__m128i)__builtin_elementwise_fshr((__v4su)__B, (__v4su)__A,
+                                             (__v4su)__C);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_shrdv_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C)
 {
   return (__m128i)__builtin_ia32_selectd_128(__U,
@@ -634,7 +646,7 @@ _mm_mask_shrdv_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C)
                                          (__v4si)__A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_shrdv_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
 {
   return (__m128i)__builtin_ia32_selectd_128(__U,
@@ -642,14 +654,15 @@ _mm_maskz_shrdv_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
                                          (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_shrdv_epi16(__m256i __A, __m256i __B, __m256i __C)
 {
-  return (__m256i)__builtin_ia32_vpshrdvw256((__v16hi)__A, (__v16hi)__B,
-                                             (__v16hi)__C);
+  // Ops __A and __B are swapped.
+  return (__m256i)__builtin_elementwise_fshr((__v16hu)__B, (__v16hu)__A,
+                                             (__v16hu)__C);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_shrdv_epi16(__m256i __A, __mmask16 __U, __m256i __B, __m256i __C)
 {
   return (__m256i)__builtin_ia32_selectw_256(__U,
@@ -657,7 +670,7 @@ _mm256_mask_shrdv_epi16(__m256i __A, __mmask16 __U, __m256i __B, __m256i __C)
                                      (__v16hi)__A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_shrdv_epi16(__mmask16 __U, __m256i __A, __m256i __B, __m256i __C)
 {
   return (__m256i)__builtin_ia32_selectw_256(__U,
@@ -665,14 +678,15 @@ _mm256_maskz_shrdv_epi16(__mmask16 __U, __m256i __A, __m256i __B, __m256i __C)
                                      (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_shrdv_epi16(__m128i __A, __m128i __B, __m128i __C)
 {
-  return (__m128i)__builtin_ia32_vpshrdvw128((__v8hi)__A, (__v8hi)__B,
-                                             (__v8hi)__C);
+  // Ops __A and __B are swapped.
+  return (__m128i)__builtin_elementwise_fshr((__v8hu)__B, (__v8hu)__A,
+                                             (__v8hu)__C);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_shrdv_epi16(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C)
 {
   return (__m128i)__builtin_ia32_selectw_128(__U,
@@ -680,7 +694,7 @@ _mm_mask_shrdv_epi16(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C)
                                          (__v8hi)__A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_shrdv_epi16(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
 {
   return (__m128i)__builtin_ia32_selectw_128(__U,
@@ -688,8 +702,9 @@ _mm_maskz_shrdv_epi16(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
                                          (__v8hi)_mm_setzero_si128());
 }
 
-
 #undef __DEFAULT_FN_ATTRS128
 #undef __DEFAULT_FN_ATTRS256
+#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
+#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
 
 #endif
diff --git a/lib/include/avx512vlvnniintrin.h b/lib/include/avx512vlvnniintrin.h
index d1e5cd9d69..4b8a199af3 100644
--- a/lib/include/avx512vlvnniintrin.h
+++ b/lib/include/avx512vlvnniintrin.h
@@ -17,11 +17,11 @@
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS128                                                  \
   __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx512vl,avx512vnni,no-evex512"),                 \
+                 __target__("avx512vl,avx512vnni"),                            \
                  __min_vector_width__(128)))
 #define __DEFAULT_FN_ATTRS256                                                  \
   __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx512vl,avx512vnni,no-evex512"),                 \
+                 __target__("avx512vl,avx512vnni"),                            \
                  __min_vector_width__(256)))
 
 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a A with
@@ -41,8 +41,8 @@
 ///    ENDFOR
 ///    DST[MAX:256] := 0
 /// \endcode
-#define _mm256_dpbusd_epi32(S, A, B) \
-  ((__m256i)__builtin_ia32_vpdpbusd256((__v8si)(S), (__v8si)(A), (__v8si)(B)))
+#define _mm256_dpbusd_epi32(S, A, B)                                           \
+  ((__m256i)__builtin_ia32_vpdpbusd256((__v8si)(S), (__v32qu)(A), (__v32qi)(B)))
 
 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a A with
 /// corresponding signed 8-bit integers in \a B, producing 4 intermediate signed
@@ -61,8 +61,9 @@
 ///    ENDFOR
 ///    DST[MAX:256] := 0
 /// \endcode
-#define _mm256_dpbusds_epi32(S, A, B) \
-  ((__m256i)__builtin_ia32_vpdpbusds256((__v8si)(S), (__v8si)(A), (__v8si)(B)))
+#define _mm256_dpbusds_epi32(S, A, B)                                          \
+  ((__m256i)__builtin_ia32_vpdpbusds256((__v8si)(S), (__v32qu)(A),             \
+                                        (__v32qi)(B)))
 
 /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a A with
 /// corresponding 16-bit integers in \a B, producing 2 intermediate signed 32-bit
@@ -79,8 +80,8 @@
 ///    ENDFOR
 ///    DST[MAX:256] := 0
 /// \endcode
-#define _mm256_dpwssd_epi32(S, A, B) \
-  ((__m256i)__builtin_ia32_vpdpwssd256((__v8si)(S), (__v8si)(A), (__v8si)(B)))
+#define _mm256_dpwssd_epi32(S, A, B)                                           \
+  ((__m256i)__builtin_ia32_vpdpwssd256((__v8si)(S), (__v16hi)(A), (__v16hi)(B)))
 
 /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a A with
 /// corresponding 16-bit integers in \a B, producing 2 intermediate signed 32-bit
@@ -97,8 +98,9 @@
 ///    ENDFOR
 ///    DST[MAX:256] := 0
 /// \endcode
-#define _mm256_dpwssds_epi32(S, A, B) \
-  ((__m256i)__builtin_ia32_vpdpwssds256((__v8si)(S), (__v8si)(A), (__v8si)(B)))
+#define _mm256_dpwssds_epi32(S, A, B)                                          \
+  ((__m256i)__builtin_ia32_vpdpwssds256((__v8si)(S), (__v16hi)(A),             \
+                                        (__v16hi)(B)))
 
 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a A with
 /// corresponding signed 8-bit integers in \a B, producing 4 intermediate signed
@@ -117,8 +119,8 @@
 ///    ENDFOR
 ///    DST[MAX:128] := 0
 /// \endcode
-#define _mm_dpbusd_epi32(S, A, B) \
-  ((__m128i)__builtin_ia32_vpdpbusd128((__v4si)(S), (__v4si)(A), (__v4si)(B)))
+#define _mm_dpbusd_epi32(S, A, B)                                              \
+  ((__m128i)__builtin_ia32_vpdpbusd128((__v4si)(S), (__v16qu)(A), (__v16qi)(B)))
 
 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a A with
 /// corresponding signed 8-bit integers in \a B, producing 4 intermediate signed
@@ -137,8 +139,9 @@
 ///    ENDFOR
 ///    DST[MAX:128] := 0
 /// \endcode
-#define _mm_dpbusds_epi32(S, A, B) \
-  ((__m128i)__builtin_ia32_vpdpbusds128((__v4si)(S), (__v4si)(A), (__v4si)(B)))
+#define _mm_dpbusds_epi32(S, A, B)                                             \
+  ((__m128i)__builtin_ia32_vpdpbusds128((__v4si)(S), (__v16qu)(A),             \
+                                        (__v16qi)(B)))
 
 /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a A with
 /// corresponding 16-bit integers in \a B, producing 2 intermediate signed 32-bit
@@ -155,8 +158,8 @@
 ///    ENDFOR
 ///    DST[MAX:128] := 0
 /// \endcode
-#define _mm_dpwssd_epi32(S, A, B) \
-  ((__m128i)__builtin_ia32_vpdpwssd128((__v4si)(S), (__v4si)(A), (__v4si)(B)))
+#define _mm_dpwssd_epi32(S, A, B)                                              \
+  ((__m128i)__builtin_ia32_vpdpwssd128((__v4si)(S), (__v8hi)(A), (__v8hi)(B)))
 
 /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a A with
 /// corresponding 16-bit integers in \a B, producing 2 intermediate signed 32-bit
@@ -173,8 +176,8 @@
 ///    ENDFOR
 ///    DST[MAX:128] := 0
 /// \endcode
-#define _mm_dpwssds_epi32(S, A, B) \
-  ((__m128i)__builtin_ia32_vpdpwssds128((__v4si)(S), (__v4si)(A), (__v4si)(B)))
+#define _mm_dpwssds_epi32(S, A, B)                                             \
+  ((__m128i)__builtin_ia32_vpdpwssds128((__v4si)(S), (__v8hi)(A), (__v8hi)(B)))
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
diff --git a/lib/include/avx512vlvp2intersectintrin.h b/lib/include/avx512vlvp2intersectintrin.h
index 63a31241a5..8cb33169e5 100644
--- a/lib/include/avx512vlvp2intersectintrin.h
+++ b/lib/include/avx512vlvp2intersectintrin.h
@@ -30,12 +30,12 @@
 
 #define __DEFAULT_FN_ATTRS128                                                  \
   __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx512vl,avx512vp2intersect,no-evex512"),         \
+                 __target__("avx512vl,avx512vp2intersect"),                    \
                  __min_vector_width__(128)))
 
 #define __DEFAULT_FN_ATTRS256                                                  \
   __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx512vl,avx512vp2intersect,no-evex512"),         \
+                 __target__("avx512vl,avx512vp2intersect"),                    \
                  __min_vector_width__(256)))
 /// Store, in an even/odd pair of mask registers, the indicators of the
 /// locations of value matches between dwords in operands __a and __b.
diff --git a/lib/include/avx512vnniintrin.h b/lib/include/avx512vnniintrin.h
index 0fb381a12f..2ce88efe4a 100644
--- a/lib/include/avx512vnniintrin.h
+++ b/lib/include/avx512vnniintrin.h
@@ -16,14 +16,14 @@
 
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS                                                     \
-  __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx512vnni,evex512"), __min_vector_width__(512)))
+  __attribute__((__always_inline__, __nodebug__, __target__("avx512vnni"),     \
+                 __min_vector_width__(512)))
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_ia32_vpdpbusd512((__v16si)__S, (__v16si)__A,
-                                             (__v16si)__B);
+  return (__m512i)__builtin_ia32_vpdpbusd512((__v16si)__S, (__v64qu)__A,
+                                             (__v64qi)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -45,8 +45,8 @@ _mm512_maskz_dpbusd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_dpbusds_epi32(__m512i __S, __m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_ia32_vpdpbusds512((__v16si)__S, (__v16si)__A,
-                                              (__v16si)__B);
+  return (__m512i)__builtin_ia32_vpdpbusds512((__v16si)__S, (__v64qu)__A,
+                                              (__v64qi)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -68,8 +68,8 @@ _mm512_maskz_dpbusds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_dpwssd_epi32(__m512i __S, __m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_ia32_vpdpwssd512((__v16si)__S, (__v16si)__A,
-                                             (__v16si)__B);
+  return (__m512i)__builtin_ia32_vpdpwssd512((__v16si)__S, (__v32hi)__A,
+                                             (__v32hi)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -91,8 +91,8 @@ _mm512_maskz_dpwssd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_dpwssds_epi32(__m512i __S, __m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_ia32_vpdpwssds512((__v16si)__S, (__v16si)__A,
-                                              (__v16si)__B);
+  return (__m512i)__builtin_ia32_vpdpwssds512((__v16si)__S, (__v32hi)__A,
+                                              (__v32hi)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
diff --git a/lib/include/avx512vp2intersectintrin.h b/lib/include/avx512vp2intersectintrin.h
index 16552cae3b..7d999960a5 100644
--- a/lib/include/avx512vp2intersectintrin.h
+++ b/lib/include/avx512vp2intersectintrin.h
@@ -30,8 +30,7 @@
 
 #define __DEFAULT_FN_ATTRS                                                     \
   __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx512vp2intersect,evex512"),                     \
-                 __min_vector_width__(512)))
+                 __target__("avx512vp2intersect"), __min_vector_width__(512)))
 
 /// Store, in an even/odd pair of mask registers, the indicators of the
 /// locations of value matches between dwords in operands __a and __b.
diff --git a/lib/include/avx512vpopcntdqintrin.h b/lib/include/avx512vpopcntdqintrin.h
index e24c2c5e1b..cc884fea41 100644
--- a/lib/include/avx512vpopcntdqintrin.h
+++ b/lib/include/avx512vpopcntdqintrin.h
@@ -16,19 +16,18 @@
 #define __AVX512VPOPCNTDQINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
 #define __DEFAULT_FN_ATTRS                                                     \
   __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx512vpopcntdq,evex512"),                        \
-                 __min_vector_width__(512)))
-
-#if defined(__cplusplus) && (__cplusplus >= 201103L)
-#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
+                 __target__("avx512vpopcntdq"),                                \
+                 __min_vector_width__(512))) constexpr
 #else
-#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
+#define __DEFAULT_FN_ATTRS                                                     \
+  __attribute__((__always_inline__, __nodebug__,                               \
+                 __target__("avx512vpopcntdq"), __min_vector_width__(512)))
 #endif
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm512_popcnt_epi64(__m512i __A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi64(__m512i __A) {
   return (__m512i)__builtin_elementwise_popcount((__v8du)__A);
 }
 
@@ -43,8 +42,7 @@ _mm512_maskz_popcnt_epi64(__mmask8 __U, __m512i __A) {
   return _mm512_mask_popcnt_epi64((__m512i)_mm512_setzero_si512(), __U, __A);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm512_popcnt_epi32(__m512i __A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi32(__m512i __A) {
   return (__m512i)__builtin_elementwise_popcount((__v16su)__A);
 }
 
diff --git a/lib/include/avx512vpopcntdqvlintrin.h b/lib/include/avx512vpopcntdqvlintrin.h
index b6c819b0cb..9b26aa1e63 100644
--- a/lib/include/avx512vpopcntdqvlintrin.h
+++ b/lib/include/avx512vpopcntdqvlintrin.h
@@ -16,25 +16,28 @@
 #define __AVX512VPOPCNTDQVLINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
+
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
 #define __DEFAULT_FN_ATTRS128                                                  \
   __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx512vpopcntdq,avx512vl,no-evex512"),            \
+                 __target__("avx512vpopcntdq,avx512vl"),                       \
+                 __min_vector_width__(128))) constexpr
+#define __DEFAULT_FN_ATTRS256                                                  \
+  __attribute__((__always_inline__, __nodebug__,                               \
+                 __target__("avx512vpopcntdq,avx512vl"),                       \
+                 __min_vector_width__(256))) constexpr
+#else
+#define __DEFAULT_FN_ATTRS128                                                  \
+  __attribute__((__always_inline__, __nodebug__,                               \
+                 __target__("avx512vpopcntdq,avx512vl"),                       \
                  __min_vector_width__(128)))
 #define __DEFAULT_FN_ATTRS256                                                  \
   __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx512vpopcntdq,avx512vl,no-evex512"),            \
+                 __target__("avx512vpopcntdq,avx512vl"),                       \
                  __min_vector_width__(256)))
-
-#if defined(__cplusplus) && (__cplusplus >= 201103L)
-#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
-#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
-#else
-#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
-#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
 #endif
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_popcnt_epi64(__m128i __A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_popcnt_epi64(__m128i __A) {
   return (__m128i)__builtin_elementwise_popcount((__v2du)__A);
 }
 
@@ -49,8 +52,7 @@ _mm_maskz_popcnt_epi64(__mmask8 __U, __m128i __A) {
   return _mm_mask_popcnt_epi64((__m128i)_mm_setzero_si128(), __U, __A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_popcnt_epi32(__m128i __A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_popcnt_epi32(__m128i __A) {
   return (__m128i)__builtin_elementwise_popcount((__v4su)__A);
 }
 
@@ -65,7 +67,7 @@ _mm_maskz_popcnt_epi32(__mmask8 __U, __m128i __A) {
   return _mm_mask_popcnt_epi32((__m128i)_mm_setzero_si128(), __U, __A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_popcnt_epi64(__m256i __A) {
   return (__m256i)__builtin_elementwise_popcount((__v4du)__A);
 }
@@ -81,7 +83,7 @@ _mm256_maskz_popcnt_epi64(__mmask8 __U, __m256i __A) {
   return _mm256_mask_popcnt_epi64((__m256i)_mm256_setzero_si256(), __U, __A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_popcnt_epi32(__m256i __A) {
   return (__m256i)__builtin_elementwise_popcount((__v8su)__A);
 }
diff --git a/lib/include/avxifmaintrin.h b/lib/include/avxifmaintrin.h
index 5c782d2a5b..30df01caed 100644
--- a/lib/include/avxifmaintrin.h
+++ b/lib/include/avxifmaintrin.h
@@ -15,12 +15,28 @@
 #define __AVXIFMAINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS128                                                  \
+  __attribute__((__always_inline__, __nodebug__, __target__("avxifma"),        \
+                 __min_vector_width__(128))) constexpr
+#define __DEFAULT_FN_ATTRS256                                                  \
+  __attribute__((__always_inline__, __nodebug__, __target__("avxifma"),        \
+                 __min_vector_width__(256))) constexpr
+#else
 #define __DEFAULT_FN_ATTRS128                                                  \
   __attribute__((__always_inline__, __nodebug__, __target__("avxifma"),        \
                  __min_vector_width__(128)))
 #define __DEFAULT_FN_ATTRS256                                                  \
   __attribute__((__always_inline__, __nodebug__, __target__("avxifma"),        \
                  __min_vector_width__(256)))
+#endif
+
+#if !defined(__AVX512IFMA__) && defined(__AVXIFMA__)
+#define _mm_madd52hi_epu64(X, Y, Z) _mm_madd52hi_avx_epu64(X, Y, Z)
+#define _mm_madd52lo_epu64(X, Y, Z) _mm_madd52lo_avx_epu64(X, Y, Z)
+#define _mm256_madd52hi_epu64(X, Y, Z) _mm256_madd52hi_avx_epu64(X, Y, Z)
+#define _mm256_madd52lo_epu64(X, Y, Z) _mm256_madd52lo_avx_epu64(X, Y, Z)
+#endif
 
 // must vex-encoding
 
diff --git a/lib/include/avxintrin.h b/lib/include/avxintrin.h
index 8e497a9823..fbd20e5832 100644
--- a/lib/include/avxintrin.h
+++ b/lib/include/avxintrin.h
@@ -50,28 +50,19 @@ typedef __bf16 __m256bh __attribute__((__vector_size__(32), __aligned__(32)));
 #endif
 
 /* Define the default attributes for the functions in this file. */
-#if defined(__EVEX512__) && !defined(__AVX10_1_512__)
-#define __DEFAULT_FN_ATTRS                                                     \
-  __attribute__((__always_inline__, __nodebug__, __target__("avx,no-evex512"), \
-                 __min_vector_width__(256)))
-#define __DEFAULT_FN_ATTRS128                                                  \
-  __attribute__((__always_inline__, __nodebug__, __target__("avx,no-evex512"), \
-                 __min_vector_width__(128)))
-#else
 #define __DEFAULT_FN_ATTRS                                                     \
   __attribute__((__always_inline__, __nodebug__, __target__("avx"),            \
                  __min_vector_width__(256)))
 #define __DEFAULT_FN_ATTRS128                                                  \
   __attribute__((__always_inline__, __nodebug__, __target__("avx"),            \
                  __min_vector_width__(128)))
-#endif
 
 #if defined(__cplusplus) && (__cplusplus >= 201103L)
 #define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
 #define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
 #else
-#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS128
-#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS
+#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
 #endif
 
 /* Arithmetic */
@@ -87,9 +78,8 @@ typedef __bf16 __m256bh __attribute__((__vector_size__(32), __aligned__(32)));
 ///    A 256-bit vector of [4 x double] containing one of the source operands.
 /// \returns A 256-bit vector of [4 x double] containing the sums of both
 ///    operands.
-static __inline __m256d __DEFAULT_FN_ATTRS
-_mm256_add_pd(__m256d __a, __m256d __b)
-{
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_add_pd(__m256d __a, __m256d __b) {
   return (__m256d)((__v4df)__a+(__v4df)__b);
 }
 
@@ -105,9 +95,8 @@ _mm256_add_pd(__m256d __a, __m256d __b)
 ///    A 256-bit vector of [8 x float] containing one of the source operands.
 /// \returns A 256-bit vector of [8 x float] containing the sums of both
 ///    operands.
-static __inline __m256 __DEFAULT_FN_ATTRS
-_mm256_add_ps(__m256 __a, __m256 __b)
-{
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_add_ps(__m256 __a,
+                                                                  __m256 __b) {
   return (__m256)((__v8sf)__a+(__v8sf)__b);
 }
 
@@ -123,9 +112,8 @@ _mm256_add_ps(__m256 __a, __m256 __b)
 ///    A 256-bit vector of [4 x double] containing the subtrahend.
 /// \returns A 256-bit vector of [4 x double] containing the differences between
 ///    both operands.
-static __inline __m256d __DEFAULT_FN_ATTRS
-_mm256_sub_pd(__m256d __a, __m256d __b)
-{
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_sub_pd(__m256d __a, __m256d __b) {
   return (__m256d)((__v4df)__a-(__v4df)__b);
 }
 
@@ -141,9 +129,8 @@ _mm256_sub_pd(__m256d __a, __m256d __b)
 ///    A 256-bit vector of [8 x float] containing the subtrahend.
 /// \returns A 256-bit vector of [8 x float] containing the differences between
 ///    both operands.
-static __inline __m256 __DEFAULT_FN_ATTRS
-_mm256_sub_ps(__m256 __a, __m256 __b)
-{
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_sub_ps(__m256 __a,
+                                                                  __m256 __b) {
   return (__m256)((__v8sf)__a-(__v8sf)__b);
 }
 
@@ -160,9 +147,8 @@ _mm256_sub_ps(__m256 __a, __m256 __b)
 ///    A 256-bit vector of [4 x double] containing the right source operand.
 /// \returns A 256-bit vector of [4 x double] containing the alternating sums
 ///    and differences between both operands.
-static __inline __m256d __DEFAULT_FN_ATTRS
-_mm256_addsub_pd(__m256d __a, __m256d __b)
-{
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_addsub_pd(__m256d __a, __m256d __b) {
   return (__m256d)__builtin_ia32_addsubpd256((__v4df)__a, (__v4df)__b);
 }
 
@@ -179,9 +165,8 @@ _mm256_addsub_pd(__m256d __a, __m256d __b)
 ///    A 256-bit vector of [8 x float] containing the right source operand.
 /// \returns A 256-bit vector of [8 x float] containing the alternating sums and
 ///    differences between both operands.
-static __inline __m256 __DEFAULT_FN_ATTRS
-_mm256_addsub_ps(__m256 __a, __m256 __b)
-{
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_addsub_ps(__m256 __a, __m256 __b) {
   return (__m256)__builtin_ia32_addsubps256((__v8sf)__a, (__v8sf)__b);
 }
 
@@ -197,9 +182,8 @@ _mm256_addsub_ps(__m256 __a, __m256 __b)
 ///    A 256-bit vector of [4 x double] containing the divisor.
 /// \returns A 256-bit vector of [4 x double] containing the quotients of both
 ///    operands.
-static __inline __m256d __DEFAULT_FN_ATTRS
-_mm256_div_pd(__m256d __a, __m256d __b)
-{
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_div_pd(__m256d __a, __m256d __b) {
   return (__m256d)((__v4df)__a/(__v4df)__b);
 }
 
@@ -215,9 +199,8 @@ _mm256_div_pd(__m256d __a, __m256d __b)
 ///    A 256-bit vector of [8 x float] containing the divisor.
 /// \returns A 256-bit vector of [8 x float] containing the quotients of both
 ///    operands.
-static __inline __m256 __DEFAULT_FN_ATTRS
-_mm256_div_ps(__m256 __a, __m256 __b)
-{
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_div_ps(__m256 __a,
+                                                                  __m256 __b) {
   return (__m256)((__v8sf)__a/(__v8sf)__b);
 }
 
@@ -236,9 +219,8 @@ _mm256_div_ps(__m256 __a, __m256 __b)
 ///    A 256-bit vector of [4 x double] containing one of the operands.
 /// \returns A 256-bit vector of [4 x double] containing the maximum values
 ///    between both operands.
-static __inline __m256d __DEFAULT_FN_ATTRS
-_mm256_max_pd(__m256d __a, __m256d __b)
-{
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_max_pd(__m256d __a, __m256d __b) {
   return (__m256d)__builtin_ia32_maxpd256((__v4df)__a, (__v4df)__b);
 }
 
@@ -257,9 +239,8 @@ _mm256_max_pd(__m256d __a, __m256d __b)
 ///    A 256-bit vector of [8 x float] containing one of the operands.
 /// \returns A 256-bit vector of [8 x float] containing the maximum values
 ///    between both operands.
-static __inline __m256 __DEFAULT_FN_ATTRS
-_mm256_max_ps(__m256 __a, __m256 __b)
-{
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_max_ps(__m256 __a,
+                                                                  __m256 __b) {
   return (__m256)__builtin_ia32_maxps256((__v8sf)__a, (__v8sf)__b);
 }
 
@@ -278,9 +259,8 @@ _mm256_max_ps(__m256 __a, __m256 __b)
 ///    A 256-bit vector of [4 x double] containing one of the operands.
 /// \returns A 256-bit vector of [4 x double] containing the minimum values
 ///    between both operands.
-static __inline __m256d __DEFAULT_FN_ATTRS
-_mm256_min_pd(__m256d __a, __m256d __b)
-{
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_min_pd(__m256d __a, __m256d __b) {
   return (__m256d)__builtin_ia32_minpd256((__v4df)__a, (__v4df)__b);
 }
 
@@ -299,9 +279,8 @@ _mm256_min_pd(__m256d __a, __m256d __b)
 ///    A 256-bit vector of [8 x float] containing one of the operands.
 /// \returns A 256-bit vector of [8 x float] containing the minimum values
 ///    between both operands.
-static __inline __m256 __DEFAULT_FN_ATTRS
-_mm256_min_ps(__m256 __a, __m256 __b)
-{
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_min_ps(__m256 __a,
+                                                                  __m256 __b) {
   return (__m256)__builtin_ia32_minps256((__v8sf)__a, (__v8sf)__b);
 }
 
@@ -317,9 +296,8 @@ _mm256_min_ps(__m256 __a, __m256 __b)
 ///    A 256-bit vector of [4 x double] containing one of the operands.
 /// \returns A 256-bit vector of [4 x double] containing the products of both
 ///    operands.
-static __inline __m256d __DEFAULT_FN_ATTRS
-_mm256_mul_pd(__m256d __a, __m256d __b)
-{
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_mul_pd(__m256d __a, __m256d __b) {
   return (__m256d)((__v4df)__a * (__v4df)__b);
 }
 
@@ -335,9 +313,8 @@ _mm256_mul_pd(__m256d __a, __m256d __b)
 ///    A 256-bit vector of [8 x float] containing one of the operands.
 /// \returns A 256-bit vector of [8 x float] containing the products of both
 ///    operands.
-static __inline __m256 __DEFAULT_FN_ATTRS
-_mm256_mul_ps(__m256 __a, __m256 __b)
-{
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_mul_ps(__m256 __a,
+                                                                  __m256 __b) {
   return (__m256)((__v8sf)__a * (__v8sf)__b);
 }
 
@@ -352,10 +329,8 @@ _mm256_mul_ps(__m256 __a, __m256 __b)
 ///    A 256-bit vector of [4 x double].
 /// \returns A 256-bit vector of [4 x double] containing the square roots of the
 ///    values in the operand.
-static __inline __m256d __DEFAULT_FN_ATTRS
-_mm256_sqrt_pd(__m256d __a)
-{
-  return (__m256d)__builtin_ia32_sqrtpd256((__v4df)__a);
+static __inline __m256d __DEFAULT_FN_ATTRS _mm256_sqrt_pd(__m256d __a) {
+  return __builtin_elementwise_sqrt(__a);
 }
 
 /// Calculates the square roots of the values in a 256-bit vector of
@@ -369,10 +344,8 @@ _mm256_sqrt_pd(__m256d __a)
 ///    A 256-bit vector of [8 x float].
 /// \returns A 256-bit vector of [8 x float] containing the square roots of the
 ///    values in the operand.
-static __inline __m256 __DEFAULT_FN_ATTRS
-_mm256_sqrt_ps(__m256 __a)
-{
-  return (__m256)__builtin_ia32_sqrtps256((__v8sf)__a);
+static __inline __m256 __DEFAULT_FN_ATTRS _mm256_sqrt_ps(__m256 __a) {
+  return __builtin_elementwise_sqrt(__a);
 }
 
 /// Calculates the reciprocal square roots of the values in a 256-bit
@@ -555,7 +528,7 @@ _mm256_rcp_ps(__m256 __a)
 ///    A 256-bit vector of [4 x double] containing one of the source operands.
 /// \returns A 256-bit vector of [4 x double] containing the bitwise AND of the
 ///    values between both operands.
-static __inline __m256d __DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_and_pd(__m256d __a, __m256d __b)
 {
   return (__m256d)((__v4du)__a & (__v4du)__b);
@@ -573,7 +546,7 @@ _mm256_and_pd(__m256d __a, __m256d __b)
 ///    A 256-bit vector of [8 x float] containing one of the source operands.
 /// \returns A 256-bit vector of [8 x float] containing the bitwise AND of the
 ///    values between both operands.
-static __inline __m256 __DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_and_ps(__m256 __a, __m256 __b)
 {
   return (__m256)((__v8su)__a & (__v8su)__b);
@@ -594,7 +567,7 @@ _mm256_and_ps(__m256 __a, __m256 __b)
 /// \returns A 256-bit vector of [4 x double] containing the bitwise AND of the
 ///    values of the second operand and the one's complement of the first
 ///    operand.
-static __inline __m256d __DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_andnot_pd(__m256d __a, __m256d __b)
 {
   return (__m256d)(~(__v4du)__a & (__v4du)__b);
@@ -615,7 +588,7 @@ _mm256_andnot_pd(__m256d __a, __m256d __b)
 /// \returns A 256-bit vector of [8 x float] containing the bitwise AND of the
 ///    values of the second operand and the one's complement of the first
 ///    operand.
-static __inline __m256 __DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_andnot_ps(__m256 __a, __m256 __b)
 {
   return (__m256)(~(__v8su)__a & (__v8su)__b);
@@ -633,7 +606,7 @@ _mm256_andnot_ps(__m256 __a, __m256 __b)
 ///    A 256-bit vector of [4 x double] containing one of the source operands.
 /// \returns A 256-bit vector of [4 x double] containing the bitwise OR of the
 ///    values between both operands.
-static __inline __m256d __DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_or_pd(__m256d __a, __m256d __b)
 {
   return (__m256d)((__v4du)__a | (__v4du)__b);
@@ -651,7 +624,7 @@ _mm256_or_pd(__m256d __a, __m256d __b)
 ///    A 256-bit vector of [8 x float] containing one of the source operands.
 /// \returns A 256-bit vector of [8 x float] containing the bitwise OR of the
 ///    values between both operands.
-static __inline __m256 __DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_or_ps(__m256 __a, __m256 __b)
 {
   return (__m256)((__v8su)__a | (__v8su)__b);
@@ -669,7 +642,7 @@ _mm256_or_ps(__m256 __a, __m256 __b)
 ///    A 256-bit vector of [4 x double] containing one of the source operands.
 /// \returns A 256-bit vector of [4 x double] containing the bitwise XOR of the
 ///    values between both operands.
-static __inline __m256d __DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_xor_pd(__m256d __a, __m256d __b)
 {
   return (__m256d)((__v4du)__a ^ (__v4du)__b);
@@ -687,7 +660,7 @@ _mm256_xor_pd(__m256d __a, __m256d __b)
 ///    A 256-bit vector of [8 x float] containing one of the source operands.
 /// \returns A 256-bit vector of [8 x float] containing the bitwise XOR of the
 ///    values between both operands.
-static __inline __m256 __DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_xor_ps(__m256 __a, __m256 __b)
 {
   return (__m256)((__v8su)__a ^ (__v8su)__b);
@@ -711,9 +684,8 @@ _mm256_xor_ps(__m256 __a, __m256 __b)
 ///    elements of a vector of [4 x double].
 /// \returns A 256-bit vector of [4 x double] containing the horizontal sums of
 ///    both operands.
-static __inline __m256d __DEFAULT_FN_ATTRS
-_mm256_hadd_pd(__m256d __a, __m256d __b)
-{
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_hadd_pd(__m256d __a, __m256d __b) {
   return (__m256d)__builtin_ia32_haddpd256((__v4df)__a, (__v4df)__b);
 }
 
@@ -734,9 +706,8 @@ _mm256_hadd_pd(__m256d __a, __m256d __b)
 ///    index 2, 3, 6, 7 of a vector of [8 x float].
 /// \returns A 256-bit vector of [8 x float] containing the horizontal sums of
 ///    both operands.
-static __inline __m256 __DEFAULT_FN_ATTRS
-_mm256_hadd_ps(__m256 __a, __m256 __b)
-{
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_hadd_ps(__m256 __a,
+                                                                   __m256 __b) {
   return (__m256)__builtin_ia32_haddps256((__v8sf)__a, (__v8sf)__b);
 }
 
@@ -757,9 +728,8 @@ _mm256_hadd_ps(__m256 __a, __m256 __b)
 ///    odd-indexed elements of a vector of [4 x double].
 /// \returns A 256-bit vector of [4 x double] containing the horizontal
 ///    differences of both operands.
-static __inline __m256d __DEFAULT_FN_ATTRS
-_mm256_hsub_pd(__m256d __a, __m256d __b)
-{
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_hsub_pd(__m256d __a, __m256d __b) {
   return (__m256d)__builtin_ia32_hsubpd256((__v4df)__a, (__v4df)__b);
 }
 
@@ -780,9 +750,8 @@ _mm256_hsub_pd(__m256d __a, __m256d __b)
 ///    elements with index 2, 3, 6, 7 of a vector of [8 x float].
 /// \returns A 256-bit vector of [8 x float] containing the horizontal
 ///    differences of both operands.
-static __inline __m256 __DEFAULT_FN_ATTRS
-_mm256_hsub_ps(__m256 __a, __m256 __b)
-{
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_hsub_ps(__m256 __a,
+                                                                   __m256 __b) {
   return (__m256)__builtin_ia32_hsubps256((__v8sf)__a, (__v8sf)__b);
 }
 
@@ -810,9 +779,8 @@ _mm256_hsub_ps(__m256 __a, __m256 __b)
 ///      1: Bits [127:64] of the source are copied to bits [127:64] of the
 ///         returned vector.
 /// \returns A 128-bit vector of [2 x double] containing the copied values.
-static __inline __m128d __DEFAULT_FN_ATTRS128
-_mm_permutevar_pd(__m128d __a, __m128i __c)
-{
+static __inline __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_permutevar_pd(__m128d __a, __m128i __c) {
   return (__m128d)__builtin_ia32_vpermilvarpd((__v2df)__a, (__v2di)__c);
 }
 
@@ -849,9 +817,8 @@ _mm_permutevar_pd(__m128d __a, __m128i __c)
 ///      1: Bits [255:192] of the source are copied to bits [255:192] of the
 ///    returned vector.
 /// \returns A 256-bit vector of [4 x double] containing the copied values.
-static __inline __m256d __DEFAULT_FN_ATTRS
-_mm256_permutevar_pd(__m256d __a, __m256i __c)
-{
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_permutevar_pd(__m256d __a, __m256i __c) {
   return (__m256d)__builtin_ia32_vpermilvarpd256((__v4df)__a, (__v4di)__c);
 }
 
@@ -904,9 +871,8 @@ _mm256_permutevar_pd(__m256d __a, __m256i __c)
 ///      11: Bits [127:96] of the source are copied to bits [127:96] of the
 ///          returned vector.
 /// \returns A 128-bit vector of [4 x float] containing the copied values.
-static __inline __m128 __DEFAULT_FN_ATTRS128
-_mm_permutevar_ps(__m128 __a, __m128i __c)
-{
+static __inline __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_permutevar_ps(__m128 __a, __m128i __c) {
   return (__m128)__builtin_ia32_vpermilvarps((__v4sf)__a, (__v4si)__c);
 }
 
@@ -995,9 +961,8 @@ _mm_permutevar_ps(__m128 __a, __m128i __c)
 ///      11: Bits [255:224] of the source are copied to bits [255:224] of the
 ///          returned vector.
 /// \returns A 256-bit vector of [8 x float] containing the copied values.
-static __inline __m256 __DEFAULT_FN_ATTRS
-_mm256_permutevar_ps(__m256 __a, __m256i __c)
-{
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_permutevar_ps(__m256 __a, __m256i __c) {
   return (__m256)__builtin_ia32_vpermilvarps256((__v8sf)__a, (__v8si)__c);
 }
 
@@ -1419,9 +1384,8 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c)
 ///    64-bit element in operand \a __b is copied to the same position in the
 ///    destination.
 /// \returns A 256-bit vector of [4 x double] containing the copied values.
-static __inline __m256d __DEFAULT_FN_ATTRS
-_mm256_blendv_pd(__m256d __a, __m256d __b, __m256d __c)
-{
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_blendv_pd(__m256d __a, __m256d __b, __m256d __c) {
   return (__m256d)__builtin_ia32_blendvpd256(
     (__v4df)__a, (__v4df)__b, (__v4df)__c);
 }
@@ -1447,9 +1411,8 @@ _mm256_blendv_pd(__m256d __a, __m256d __b, __m256d __c)
 ///    corresponding 32-bit element in operand \a __b is copied to the same
 ///    position in the destination.
 /// \returns A 256-bit vector of [8 x float] containing the copied values.
-static __inline __m256 __DEFAULT_FN_ATTRS
-_mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
-{
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) {
   return (__m256)__builtin_ia32_blendvps256(
     (__v8sf)__a, (__v8sf)__b, (__v8sf)__c);
 }
@@ -2190,9 +2153,8 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
 /// \param __a
 ///    A 128-bit integer vector of [4 x i32].
 /// \returns A 256-bit vector of [4 x double] containing the converted values.
-static __inline __m256d __DEFAULT_FN_ATTRS
-_mm256_cvtepi32_pd(__m128i __a)
-{
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_cvtepi32_pd(__m128i __a) {
   return (__m256d)__builtin_convertvector((__v4si)__a, __v4df);
 }
 
@@ -2205,9 +2167,8 @@ _mm256_cvtepi32_pd(__m128i __a)
 /// \param __a
 ///    A 256-bit integer vector.
 /// \returns A 256-bit vector of [8 x float] containing the converted values.
-static __inline __m256 __DEFAULT_FN_ATTRS
-_mm256_cvtepi32_ps(__m256i __a)
-{
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_cvtepi32_ps(__m256i __a) {
   return (__m256)__builtin_convertvector((__v8si)__a, __v8sf);
 }
 
@@ -2221,9 +2182,8 @@ _mm256_cvtepi32_ps(__m256i __a)
 /// \param __a
 ///    A 256-bit vector of [4 x double].
 /// \returns A 128-bit vector of [4 x float] containing the converted values.
-static __inline __m128 __DEFAULT_FN_ATTRS
-_mm256_cvtpd_ps(__m256d __a)
-{
+static __inline __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_cvtpd_ps(__m256d __a) {
   return (__m128)__builtin_ia32_cvtpd2ps256((__v4df) __a);
 }
 
@@ -2256,9 +2216,8 @@ _mm256_cvtps_epi32(__m256 __a)
 /// \param __a
 ///    A 128-bit vector of [4 x float].
 /// \returns A 256-bit vector of [4 x double] containing the converted values.
-static __inline __m256d __DEFAULT_FN_ATTRS
-_mm256_cvtps_pd(__m128 __a)
-{
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_cvtps_pd(__m128 __a) {
   return (__m256d)__builtin_convertvector((__v4sf)__a, __v4df);
 }
 
@@ -2333,10 +2292,9 @@ _mm256_cvttps_epi32(__m256 __a)
 /// \param __a
 ///    A 256-bit vector of [4 x double].
 /// \returns A 64 bit double containing the first element of the input vector.
-static __inline double __DEFAULT_FN_ATTRS
-_mm256_cvtsd_f64(__m256d __a)
-{
- return __a[0];
+static __inline double __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_cvtsd_f64(__m256d __a) {
+  return __a[0];
 }
 
 /// Returns the first element of the input vector of [8 x i32].
@@ -2349,11 +2307,10 @@ _mm256_cvtsd_f64(__m256d __a)
 /// \param __a
 ///    A 256-bit vector of [8 x i32].
 /// \returns A 32 bit integer containing the first element of the input vector.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_cvtsi256_si32(__m256i __a)
-{
- __v8si __b = (__v8si)__a;
- return __b[0];
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_cvtsi256_si32(__m256i __a) {
+  __v8si __b = (__v8si)__a;
+  return __b[0];
 }
 
 /// Returns the first element of the input vector of [8 x float].
@@ -2366,10 +2323,9 @@ _mm256_cvtsi256_si32(__m256i __a)
 /// \param __a
 ///    A 256-bit vector of [8 x float].
 /// \returns A 32 bit float containing the first element of the input vector.
-static __inline float __DEFAULT_FN_ATTRS
-_mm256_cvtss_f32(__m256 __a)
-{
- return __a[0];
+static __inline float __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_cvtss_f32(__m256 __a) {
+  return __a[0];
 }
 
 /* Vector replicate */
@@ -2392,7 +2348,7 @@ _mm256_cvtss_f32(__m256 __a)
 ///    return value.
 /// \returns A 256-bit vector of [8 x float] containing the moved and duplicated
 ///    values.
-static __inline __m256 __DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_movehdup_ps(__m256 __a)
 {
   return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 1, 1, 3, 3, 5, 5, 7, 7);
@@ -2417,7 +2373,7 @@ _mm256_movehdup_ps(__m256 __a)
 ///    return value.
 /// \returns A 256-bit vector of [8 x float] containing the moved and duplicated
 ///    values.
-static __inline __m256 __DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_moveldup_ps(__m256 __a)
 {
   return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 0, 0, 2, 2, 4, 4, 6, 6);
@@ -2439,7 +2395,7 @@ _mm256_moveldup_ps(__m256 __a)
 ///    the return value.
 /// \returns A 256-bit vector of [4 x double] containing the moved and
 ///    duplicated values.
-static __inline __m256d __DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_movedup_pd(__m256d __a)
 {
   return __builtin_shufflevector((__v4df)__a, (__v4df)__a, 0, 0, 2, 2);
@@ -2462,9 +2418,8 @@ _mm256_movedup_pd(__m256d __a)
 ///    Bits [127:64] are written to bits [127:64] of the return value. \n
 ///    Bits [255:192] are written to bits [255:192] of the return value. \n
 /// \returns A 256-bit vector of [4 x double] containing the interleaved values.
-static __inline __m256d __DEFAULT_FN_ATTRS
-_mm256_unpackhi_pd(__m256d __a, __m256d __b)
-{
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_unpackhi_pd(__m256d __a, __m256d __b) {
   return __builtin_shufflevector((__v4df)__a, (__v4df)__b, 1, 5, 1+2, 5+2);
 }
 
@@ -2484,9 +2439,8 @@ _mm256_unpackhi_pd(__m256d __a, __m256d __b)
 ///    Bits [63:0] are written to bits [127:64] of the return value. \n
 ///    Bits [191:128] are written to bits [255:192] of the return value. \n
 /// \returns A 256-bit vector of [4 x double] containing the interleaved values.
-static __inline __m256d __DEFAULT_FN_ATTRS
-_mm256_unpacklo_pd(__m256d __a, __m256d __b)
-{
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_unpacklo_pd(__m256d __a, __m256d __b) {
   return __builtin_shufflevector((__v4df)__a, (__v4df)__b, 0, 4, 0+2, 4+2);
 }
 
@@ -2511,9 +2465,8 @@ _mm256_unpacklo_pd(__m256d __a, __m256d __b)
 ///    Bits [223:192] are written to bits [191:160] of the return value. \n
 ///    Bits [255:224] are written to bits [255:224] of the return value.
 /// \returns A 256-bit vector of [8 x float] containing the interleaved values.
-static __inline __m256 __DEFAULT_FN_ATTRS
-_mm256_unpackhi_ps(__m256 __a, __m256 __b)
-{
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_unpackhi_ps(__m256 __a, __m256 __b) {
   return __builtin_shufflevector((__v8sf)__a, (__v8sf)__b, 2, 10, 2+1, 10+1, 6, 14, 6+1, 14+1);
 }
 
@@ -2538,9 +2491,8 @@ _mm256_unpackhi_ps(__m256 __a, __m256 __b)
 ///    Bits [159:128] are written to bits [191:160] of the return value. \n
 ///    Bits [191:160] are written to bits [255:224] of the return value.
 /// \returns A 256-bit vector of [8 x float] containing the interleaved values.
-static __inline __m256 __DEFAULT_FN_ATTRS
-_mm256_unpacklo_ps(__m256 __a, __m256 __b)
-{
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_unpacklo_ps(__m256 __a, __m256 __b) {
   return __builtin_shufflevector((__v8sf)__a, (__v8sf)__b, 0, 8, 0+1, 8+1, 4, 12, 4+1, 12+1);
 }
 
@@ -2568,9 +2520,8 @@ _mm256_unpacklo_ps(__m256 __a, __m256 __b)
 /// \param __b
 ///    A 128-bit vector of [2 x double].
 /// \returns the ZF flag in the EFLAGS register.
-static __inline int __DEFAULT_FN_ATTRS128
-_mm_testz_pd(__m128d __a, __m128d __b)
-{
+static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testz_pd(__m128d __a,
+                                                                 __m128d __b) {
   return __builtin_ia32_vtestzpd((__v2df)__a, (__v2df)__b);
 }
 
@@ -2597,9 +2548,8 @@ _mm_testz_pd(__m128d __a, __m128d __b)
 /// \param __b
 ///    A 128-bit vector of [2 x double].
 /// \returns the CF flag in the EFLAGS register.
-static __inline int __DEFAULT_FN_ATTRS128
-_mm_testc_pd(__m128d __a, __m128d __b)
-{
+static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testc_pd(__m128d __a,
+                                                                 __m128d __b) {
   return __builtin_ia32_vtestcpd((__v2df)__a, (__v2df)__b);
 }
 
@@ -2627,9 +2577,8 @@ _mm_testc_pd(__m128d __a, __m128d __b)
 /// \param __b
 ///    A 128-bit vector of [2 x double].
 /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.
-static __inline int __DEFAULT_FN_ATTRS128
-_mm_testnzc_pd(__m128d __a, __m128d __b)
-{
+static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_testnzc_pd(__m128d __a, __m128d __b) {
   return __builtin_ia32_vtestnzcpd((__v2df)__a, (__v2df)__b);
 }
 
@@ -2656,9 +2605,8 @@ _mm_testnzc_pd(__m128d __a, __m128d __b)
 /// \param __b
 ///    A 128-bit vector of [4 x float].
 /// \returns the ZF flag.
-static __inline int __DEFAULT_FN_ATTRS128
-_mm_testz_ps(__m128 __a, __m128 __b)
-{
+static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testz_ps(__m128 __a,
+                                                                 __m128 __b) {
   return __builtin_ia32_vtestzps((__v4sf)__a, (__v4sf)__b);
 }
 
@@ -2685,9 +2633,8 @@ _mm_testz_ps(__m128 __a, __m128 __b)
 /// \param __b
 ///    A 128-bit vector of [4 x float].
 /// \returns the CF flag.
-static __inline int __DEFAULT_FN_ATTRS128
-_mm_testc_ps(__m128 __a, __m128 __b)
-{
+static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testc_ps(__m128 __a,
+                                                                 __m128 __b) {
   return __builtin_ia32_vtestcps((__v4sf)__a, (__v4sf)__b);
 }
 
@@ -2715,9 +2662,8 @@ _mm_testc_ps(__m128 __a, __m128 __b)
 /// \param __b
 ///    A 128-bit vector of [4 x float].
 /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.
-static __inline int __DEFAULT_FN_ATTRS128
-_mm_testnzc_ps(__m128 __a, __m128 __b)
-{
+static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testnzc_ps(__m128 __a,
+                                                                   __m128 __b) {
   return __builtin_ia32_vtestnzcps((__v4sf)__a, (__v4sf)__b);
 }
 
@@ -2744,9 +2690,8 @@ _mm_testnzc_ps(__m128 __a, __m128 __b)
 /// \param __b
 ///    A 256-bit vector of [4 x double].
 /// \returns the ZF flag.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testz_pd(__m256d __a, __m256d __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testz_pd(__m256d __a,
+                                                                 __m256d __b) {
   return __builtin_ia32_vtestzpd256((__v4df)__a, (__v4df)__b);
 }
 
@@ -2773,9 +2718,8 @@ _mm256_testz_pd(__m256d __a, __m256d __b)
 /// \param __b
 ///    A 256-bit vector of [4 x double].
 /// \returns the CF flag.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testc_pd(__m256d __a, __m256d __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testc_pd(__m256d __a,
+                                                                 __m256d __b) {
   return __builtin_ia32_vtestcpd256((__v4df)__a, (__v4df)__b);
 }
 
@@ -2803,9 +2747,8 @@ _mm256_testc_pd(__m256d __a, __m256d __b)
 /// \param __b
 ///    A 256-bit vector of [4 x double].
 /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testnzc_pd(__m256d __a, __m256d __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_testnzc_pd(__m256d __a, __m256d __b) {
   return __builtin_ia32_vtestnzcpd256((__v4df)__a, (__v4df)__b);
 }
 
@@ -2832,9 +2775,8 @@ _mm256_testnzc_pd(__m256d __a, __m256d __b)
 /// \param __b
 ///    A 256-bit vector of [8 x float].
 /// \returns the ZF flag.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testz_ps(__m256 __a, __m256 __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testz_ps(__m256 __a,
+                                                                 __m256 __b) {
   return __builtin_ia32_vtestzps256((__v8sf)__a, (__v8sf)__b);
 }
 
@@ -2861,9 +2803,8 @@ _mm256_testz_ps(__m256 __a, __m256 __b)
 /// \param __b
 ///    A 256-bit vector of [8 x float].
 /// \returns the CF flag.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testc_ps(__m256 __a, __m256 __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testc_ps(__m256 __a,
+                                                                 __m256 __b) {
   return __builtin_ia32_vtestcps256((__v8sf)__a, (__v8sf)__b);
 }
 
@@ -2891,9 +2832,8 @@ _mm256_testc_ps(__m256 __a, __m256 __b)
 /// \param __b
 ///    A 256-bit vector of [8 x float].
 /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testnzc_ps(__m256 __a, __m256 __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testnzc_ps(__m256 __a,
+                                                                   __m256 __b) {
   return __builtin_ia32_vtestnzcps256((__v8sf)__a, (__v8sf)__b);
 }
 
@@ -2917,9 +2857,8 @@ _mm256_testnzc_ps(__m256 __a, __m256 __b)
 /// \param __b
 ///    A 256-bit integer vector.
 /// \returns the ZF flag.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testz_si256(__m256i __a, __m256i __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_testz_si256(__m256i __a, __m256i __b) {
   return __builtin_ia32_ptestz256((__v4di)__a, (__v4di)__b);
 }
 
@@ -2943,9 +2882,8 @@ _mm256_testz_si256(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit integer vector.
 /// \returns the CF flag.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testc_si256(__m256i __a, __m256i __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_testc_si256(__m256i __a, __m256i __b) {
   return __builtin_ia32_ptestc256((__v4di)__a, (__v4di)__b);
 }
 
@@ -2970,9 +2908,8 @@ _mm256_testc_si256(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit integer vector.
 /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testnzc_si256(__m256i __a, __m256i __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_testnzc_si256(__m256i __a, __m256i __b) {
   return __builtin_ia32_ptestnzc256((__v4di)__a, (__v4di)__b);
 }
 
@@ -2989,9 +2926,8 @@ _mm256_testnzc_si256(__m256i __a, __m256i __b)
 ///    A 256-bit vector of [4 x double] containing the double-precision
 ///    floating point values with sign bits to be extracted.
 /// \returns The sign bits from the operand, written to bits [3:0].
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_movemask_pd(__m256d __a)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_movemask_pd(__m256d __a) {
   return __builtin_ia32_movmskpd256((__v4df)__a);
 }
 
@@ -3007,9 +2943,8 @@ _mm256_movemask_pd(__m256d __a)
 ///    A 256-bit vector of [8 x float] containing the single-precision floating
 ///    point values with sign bits to be extracted.
 /// \returns The sign bits from the operand, written to bits [7:0].
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_movemask_ps(__m256 __a)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_movemask_ps(__m256 __a) {
   return __builtin_ia32_movmskps256((__v8sf)__a);
 }
 
@@ -3666,9 +3601,7 @@ _mm256_undefined_pd(void)
 /// This intrinsic has no corresponding instruction.
 ///
 /// \returns A 256-bit vector of [8 x float] containing undefined values.
-static __inline__ __m256 __DEFAULT_FN_ATTRS
-_mm256_undefined_ps(void)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_undefined_ps(void) {
   return (__m256)__builtin_ia32_undef256();
 }
 
@@ -3777,7 +3710,7 @@ _mm256_set_ps(float __a, float __b, float __c, float __d,
 /// \param __i7
 ///    A 32-bit integral value used to initialize bits [31:0] of the result.
 /// \returns An initialized 256-bit integer vector.
-static __inline __m256i __DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_set_epi32(int __i0, int __i1, int __i2, int __i3,
                  int __i4, int __i5, int __i6, int __i7)
 {
@@ -3825,7 +3758,7 @@ _mm256_set_epi32(int __i0, int __i1, int __i2, int __i3,
 /// \param __w00
 ///    A 16-bit integral value used to initialize bits [15:0] of the result.
 /// \returns An initialized 256-bit integer vector.
-static __inline __m256i __DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_set_epi16(short __w15, short __w14, short __w13, short __w12,
                  short __w11, short __w10, short __w09, short __w08,
                  short __w07, short __w06, short __w05, short __w04,
@@ -3908,7 +3841,7 @@ _mm256_set_epi16(short __w15, short __w14, short __w13, short __w12,
 /// \param __b00
 ///    An 8-bit integral value used to initialize bits [7:0] of the result.
 /// \returns An initialized 256-bit integer vector.
-static __inline __m256i __DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_set_epi8(char __b31, char __b30, char __b29, char __b28,
                 char __b27, char __b26, char __b25, char __b24,
                 char __b23, char __b22, char __b21, char __b20,
@@ -3943,7 +3876,7 @@ _mm256_set_epi8(char __b31, char __b30, char __b29, char __b28,
 /// \param __d
 ///    A 64-bit integral value used to initialize bits [63:0] of the result.
 /// \returns An initialized 256-bit integer vector.
-static __inline __m256i __DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_set_epi64x(long long __a, long long __b, long long __c, long long __d)
 {
   return __extension__ (__m256i)(__v4di){ __d, __c, __b, __a };
@@ -4044,7 +3977,7 @@ _mm256_setr_ps(float __a, float __b, float __c, float __d,
 /// \param __i7
 ///    A 32-bit integral value used to initialize bits [255:224] of the result.
 /// \returns An initialized 256-bit integer vector.
-static __inline __m256i __DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_setr_epi32(int __i0, int __i1, int __i2, int __i3,
                   int __i4, int __i5, int __i6, int __i7)
 {
@@ -4092,7 +4025,7 @@ _mm256_setr_epi32(int __i0, int __i1, int __i2, int __i3,
 /// \param __w00
 ///    A 16-bit integral value used to initialize bits [255:240] of the result.
 /// \returns An initialized 256-bit integer vector.
-static __inline __m256i __DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_setr_epi16(short __w15, short __w14, short __w13, short __w12,
        short __w11, short __w10, short __w09, short __w08,
        short __w07, short __w06, short __w05, short __w04,
@@ -4177,7 +4110,7 @@ _mm256_setr_epi16(short __w15, short __w14, short __w13, short __w12,
 /// \param __b00
 ///    An 8-bit integral value used to initialize bits [255:248] of the result.
 /// \returns An initialized 256-bit integer vector.
-static __inline __m256i __DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_setr_epi8(char __b31, char __b30, char __b29, char __b28,
                  char __b27, char __b26, char __b25, char __b24,
                  char __b23, char __b22, char __b21, char __b20,
@@ -4210,7 +4143,7 @@ _mm256_setr_epi8(char __b31, char __b30, char __b29, char __b28,
 /// \param __d
 ///    A 64-bit integral value used to initialize bits [255:192] of the result.
 /// \returns An initialized 256-bit integer vector.
-static __inline __m256i __DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_setr_epi64x(long long __a, long long __b, long long __c, long long __d)
 {
   return _mm256_set_epi64x(__d, __c, __b, __a);
@@ -4267,7 +4200,7 @@ _mm256_set1_ps(float __w)
 ///    A 32-bit integral value used to initialize each vector element of the
 ///    result.
 /// \returns An initialized 256-bit integer vector of [8 x i32].
-static __inline __m256i __DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_set1_epi32(int __i)
 {
   return _mm256_set_epi32(__i, __i, __i, __i, __i, __i, __i, __i);
@@ -4285,7 +4218,7 @@ _mm256_set1_epi32(int __i)
 ///    A 16-bit integral value used to initialize each vector element of the
 ///    result.
 /// \returns An initialized 256-bit integer vector of [16 x i16].
-static __inline __m256i __DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_set1_epi16(short __w)
 {
   return _mm256_set_epi16(__w, __w, __w, __w, __w, __w, __w, __w,
@@ -4303,7 +4236,7 @@ _mm256_set1_epi16(short __w)
 ///    An 8-bit integral value used to initialize each vector element of the
 ///    result.
 /// \returns An initialized 256-bit integer vector of [32 x i8].
-static __inline __m256i __DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_set1_epi8(char __b)
 {
   return _mm256_set_epi8(__b, __b, __b, __b, __b, __b, __b, __b,
@@ -4324,7 +4257,7 @@ _mm256_set1_epi8(char __b)
 ///    A 64-bit integral value used to initialize each vector element of the
 ///    result.
 /// \returns An initialized 256-bit integer vector of [4 x i64].
-static __inline __m256i __DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_set1_epi64x(long long __q)
 {
   return _mm256_set_epi64x(__q, __q, __q, __q);
@@ -4379,7 +4312,7 @@ _mm256_setzero_si256(void) {
 ///    A 256-bit floating-point vector of [4 x double].
 /// \returns A 256-bit floating-point vector of [8 x float] containing the same
 ///    bitwise pattern as the parameter.
-static __inline __m256 __DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_castpd_ps(__m256d __a)
 {
   return (__m256)__a;
@@ -4396,7 +4329,7 @@ _mm256_castpd_ps(__m256d __a)
 ///    A 256-bit floating-point vector of [4 x double].
 /// \returns A 256-bit integer vector containing the same bitwise pattern as the
 ///    parameter.
-static __inline __m256i __DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_castpd_si256(__m256d __a)
 {
   return (__m256i)__a;
@@ -4413,7 +4346,7 @@ _mm256_castpd_si256(__m256d __a)
 ///    A 256-bit floating-point vector of [8 x float].
 /// \returns A 256-bit floating-point vector of [4 x double] containing the same
 ///    bitwise pattern as the parameter.
-static __inline __m256d __DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_castps_pd(__m256 __a)
 {
   return (__m256d)__a;
@@ -4430,7 +4363,7 @@ _mm256_castps_pd(__m256 __a)
 ///    A 256-bit floating-point vector of [8 x float].
 /// \returns A 256-bit integer vector containing the same bitwise pattern as the
 ///    parameter.
-static __inline __m256i __DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_castps_si256(__m256 __a)
 {
   return (__m256i)__a;
@@ -4447,7 +4380,7 @@ _mm256_castps_si256(__m256 __a)
 ///    A 256-bit integer vector.
 /// \returns A 256-bit floating-point vector of [8 x float] containing the same
 ///    bitwise pattern as the parameter.
-static __inline __m256 __DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_castsi256_ps(__m256i __a)
 {
   return (__m256)__a;
@@ -4464,7 +4397,7 @@ _mm256_castsi256_ps(__m256i __a)
 ///    A 256-bit integer vector.
 /// \returns A 256-bit floating-point vector of [4 x double] containing the same
 ///    bitwise pattern as the parameter.
-static __inline __m256d __DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_castsi256_pd(__m256i __a)
 {
   return (__m256d)__a;
@@ -4481,7 +4414,7 @@ _mm256_castsi256_pd(__m256i __a)
 ///    A 256-bit floating-point vector of [4 x double].
 /// \returns A 128-bit floating-point vector of [2 x double] containing the
 ///    lower 128 bits of the parameter.
-static __inline __m128d __DEFAULT_FN_ATTRS
+static __inline __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_castpd256_pd128(__m256d __a)
 {
   return __builtin_shufflevector((__v4df)__a, (__v4df)__a, 0, 1);
@@ -4498,7 +4431,7 @@ _mm256_castpd256_pd128(__m256d __a)
 ///    A 256-bit floating-point vector of [8 x float].
 /// \returns A 128-bit floating-point vector of [4 x float] containing the
 ///    lower 128 bits of the parameter.
-static __inline __m128 __DEFAULT_FN_ATTRS
+static __inline __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_castps256_ps128(__m256 __a)
 {
   return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 0, 1, 2, 3);
@@ -4514,7 +4447,7 @@ _mm256_castps256_ps128(__m256 __a)
 ///    A 256-bit integer vector.
 /// \returns A 128-bit integer vector containing the lower 128 bits of the
 ///    parameter.
-static __inline __m128i __DEFAULT_FN_ATTRS
+static __inline __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_castsi256_si128(__m256i __a)
 {
   return __builtin_shufflevector((__v4di)__a, (__v4di)__a, 0, 1);
@@ -4598,9 +4531,8 @@ _mm256_castsi128_si256(__m128i __a)
 ///    A 128-bit vector of [2 x double].
 /// \returns A 256-bit floating-point vector of [4 x double]. The lower 128 bits
 ///    contain the value of the parameter. The upper 128 bits are set to zero.
-static __inline __m256d __DEFAULT_FN_ATTRS
-_mm256_zextpd128_pd256(__m128d __a)
-{
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_zextpd128_pd256(__m128d __a) {
   return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3);
 }
 
@@ -4616,9 +4548,8 @@ _mm256_zextpd128_pd256(__m128d __a)
 ///    A 128-bit vector of [4 x float].
 /// \returns A 256-bit floating-point vector of [8 x float]. The lower 128 bits
 ///    contain the value of the parameter. The upper 128 bits are set to zero.
-static __inline __m256 __DEFAULT_FN_ATTRS
-_mm256_zextps128_ps256(__m128 __a)
-{
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_zextps128_ps256(__m128 __a) {
   return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7);
 }
 
@@ -4634,9 +4565,8 @@ _mm256_zextps128_ps256(__m128 __a)
 ///    A 128-bit integer vector.
 /// \returns A 256-bit integer vector. The lower 128 bits contain the value of
 ///    the parameter. The upper 128 bits are set to zero.
-static __inline __m256i __DEFAULT_FN_ATTRS
-_mm256_zextsi128_si256(__m128i __a)
-{
+static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_zextsi128_si256(__m128i __a) {
   return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3);
 }
 
@@ -4851,9 +4781,8 @@ _mm256_zextsi128_si256(__m128i __a)
 ///    128 bits of the result.
 /// \returns A 256-bit floating-point vector of [8 x float] containing the
 ///    concatenated result.
-static __inline __m256 __DEFAULT_FN_ATTRS
-_mm256_set_m128 (__m128 __hi, __m128 __lo)
-{
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_set_m128(__m128 __hi, __m128 __lo) {
   return (__m256) __builtin_shufflevector((__v4sf)__lo, (__v4sf)__hi, 0, 1, 2, 3, 4, 5, 6, 7);
 }
 
@@ -4872,9 +4801,8 @@ _mm256_set_m128 (__m128 __hi, __m128 __lo)
 ///    128 bits of the result.
 /// \returns A 256-bit floating-point vector of [4 x double] containing the
 ///    concatenated result.
-static __inline __m256d __DEFAULT_FN_ATTRS
-_mm256_set_m128d (__m128d __hi, __m128d __lo)
-{
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_set_m128d(__m128d __hi, __m128d __lo) {
   return (__m256d) __builtin_shufflevector((__v2df)__lo, (__v2df)__hi, 0, 1, 2, 3);
 }
 
@@ -4892,9 +4820,8 @@ _mm256_set_m128d (__m128d __hi, __m128d __lo)
 ///    A 128-bit integer vector to be copied to the lower 128 bits of the
 ///    result.
 /// \returns A 256-bit integer vector containing the concatenated result.
-static __inline __m256i __DEFAULT_FN_ATTRS
-_mm256_set_m128i (__m128i __hi, __m128i __lo)
-{
+static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_set_m128i(__m128i __hi, __m128i __lo) {
   return (__m256i) __builtin_shufflevector((__v2di)__lo, (__v2di)__hi, 0, 1, 2, 3);
 }
 
@@ -4915,9 +4842,8 @@ _mm256_set_m128i (__m128i __hi, __m128i __lo)
 ///    128 bits of the result.
 /// \returns A 256-bit floating-point vector of [8 x float] containing the
 ///    concatenated result.
-static __inline __m256 __DEFAULT_FN_ATTRS
-_mm256_setr_m128 (__m128 __lo, __m128 __hi)
-{
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_setr_m128(__m128 __lo, __m128 __hi) {
   return _mm256_set_m128(__hi, __lo);
 }
 
@@ -4938,9 +4864,8 @@ _mm256_setr_m128 (__m128 __lo, __m128 __hi)
 ///    128 bits of the result.
 /// \returns A 256-bit floating-point vector of [4 x double] containing the
 ///    concatenated result.
-static __inline __m256d __DEFAULT_FN_ATTRS
-_mm256_setr_m128d (__m128d __lo, __m128d __hi)
-{
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_setr_m128d(__m128d __lo, __m128d __hi) {
   return (__m256d)_mm256_set_m128d(__hi, __lo);
 }
 
@@ -4959,9 +4884,8 @@ _mm256_setr_m128d (__m128d __lo, __m128d __hi)
 ///    A 128-bit integer vector to be copied to the upper 128 bits of the
 ///    result.
 /// \returns A 256-bit integer vector containing the concatenated result.
-static __inline __m256i __DEFAULT_FN_ATTRS
-_mm256_setr_m128i (__m128i __lo, __m128i __hi)
-{
+static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_setr_m128i(__m128i __lo, __m128i __hi) {
   return (__m256i)_mm256_set_m128i(__hi, __lo);
 }
 
diff --git a/lib/include/avxvnniint16intrin.h b/lib/include/avxvnniint16intrin.h
index 805d249911..98d94ee3fc 100644
--- a/lib/include/avxvnniint16intrin.h
+++ b/lib/include/avxvnniint16intrin.h
@@ -16,9 +16,10 @@
 #define __AVXVNNIINT16INTRIN_H
 
 /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with
-///    corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate
-///    signed 16-bit results. Sum these 2 results with the corresponding
-///    32-bit integer in \a __W, and store the packed 32-bit results in \a dst.
+///    corresponding unsigned 16-bit integers in \a __B, producing 2
+///    intermediate signed 16-bit results. Sum these 2 results with the
+///    corresponding 32-bit integer in \a __W, and store the packed 32-bit
+///    results in \a dst.
 ///
 /// \headerfile <immintrin.h>
 ///
@@ -40,19 +41,21 @@
 /// \code{.operation}
 /// FOR j := 0 to 3
 /// 	tmp1.dword := SignExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j])
-/// 	tmp2.dword := SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1])
+/// 	tmp2.dword :=
+///		  SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1])
 /// 	dst.dword[j] := __W.dword[j] + tmp1 + tmp2
 /// ENDFOR
 /// dst[MAX:128] := 0
 /// \endcode
 #define _mm_dpwsud_epi32(__W, __A, __B)                                        \
-  ((__m128i)__builtin_ia32_vpdpwsud128((__v4si)(__W), (__v4si)(__A),           \
-                                       (__v4si)(__B)))
+  ((__m128i)__builtin_ia32_vpdpwsud128((__v4si)(__W), (__v8hi)(__A),           \
+                                       (__v8hu)(__B)))
 
 /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with
-///    corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate
-///    signed 16-bit results. Sum these 2 results with the corresponding
-///    32-bit integer in \a __W, and store the packed 32-bit results in \a dst.
+///    corresponding unsigned 16-bit integers in \a __B, producing 2
+///    intermediate signed 16-bit results. Sum these 2 results with the
+///    corresponding 32-bit integer in \a __W, and store the packed 32-bit
+///    results in \a dst.
 ///
 /// \headerfile <immintrin.h>
 ///
@@ -74,20 +77,21 @@
 /// \code{.operation}
 /// FOR j := 0 to 7
 /// 	tmp1.dword := SignExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j])
-/// 	tmp2.dword := SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1])
-/// 	dst.dword[j] := __W.dword[j] + tmp1 + tmp2
+/// 	tmp2.dword :=
+///		  SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1])
+///		dst.dword[j] := __W.dword[j] + tmp1 + tmp2
 /// ENDFOR
 /// dst[MAX:256] := 0
 /// \endcode
 #define _mm256_dpwsud_epi32(__W, __A, __B)                                     \
-  ((__m256i)__builtin_ia32_vpdpwsud256((__v8si)(__W), (__v8si)(__A),           \
-                                       (__v8si)(__B)))
+  ((__m256i)__builtin_ia32_vpdpwsud256((__v8si)(__W), (__v16hi)(__A),          \
+                                       (__v16hu)(__B)))
 
 /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with
-///    corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate
-///    signed 16-bit results. Sum these 2 results with the corresponding
-///    32-bit integer in \a __W with signed saturation, and store the packed
-///    32-bit results in \a dst.
+///    corresponding unsigned 16-bit integers in \a __B, producing 2
+///    intermediate signed 16-bit results. Sum these 2 results with the
+///    corresponding 32-bit integer in \a __W with signed saturation, and store
+///    the packed 32-bit results in \a dst.
 ///
 /// \headerfile <immintrin.h>
 ///
@@ -109,20 +113,22 @@
 /// \code{.operation}
 /// FOR j := 0 to 3
 /// 	tmp1.dword := SignExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j])
-/// 	tmp2.dword := SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1])
+/// 	tmp2.dword :=
+///		  SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1])
 /// 	dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2)
 /// ENDFOR
 /// dst[MAX:128] := 0
 /// \endcode
+/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with
 #define _mm_dpwsuds_epi32(__W, __A, __B)                                       \
-  ((__m128i)__builtin_ia32_vpdpwsuds128((__v4si)(__W), (__v4si)(__A),          \
-                                        (__v4si)(__B)))
+  ((__m128i)__builtin_ia32_vpdpwsuds128((__v4si)(__W), (__v8hi)(__A),          \
+                                        (__v8hu)(__B)))
 
 /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with
-///    corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate
-///    signed 16-bit results. Sum these 2 results with the corresponding
-///    32-bit integer in \a __W with signed saturation, and store the packed
-///    32-bit results in \a dst.
+///    corresponding unsigned 16-bit integers in \a __B, producing 2
+///    intermediate signed 16-bit results. Sum these 2 results with the
+///    corresponding 32-bit integer in \a __W with signed saturation, and store
+///    the packed 32-bit results in \a dst.
 ///
 /// \headerfile <immintrin.h>
 ///
@@ -144,19 +150,21 @@
 /// \code{.operation}
 /// FOR j := 0 to 7
 /// 	tmp1.dword := SignExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j])
-/// 	tmp2.dword := SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1])
+/// 	tmp2.dword :=
+///		  SignExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1])
 /// 	dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2)
 /// ENDFOR
 /// dst[MAX:256] := 0
 /// \endcode
 #define _mm256_dpwsuds_epi32(__W, __A, __B)                                    \
-  ((__m256i)__builtin_ia32_vpdpwsuds256((__v8si)(__W), (__v8si)(__A),          \
-                                        (__v8si)(__B)))
+  ((__m256i)__builtin_ia32_vpdpwsuds256((__v8si)(__W), (__v16hi)(__A),         \
+                                        (__v16hu)(__B)))
 
-/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with
-///    corresponding signed 16-bit integers in \a __B, producing 2 intermediate
-///    signed 16-bit results. Sum these 2 results with the corresponding
-///    32-bit integer in \a __W, and store the packed 32-bit results in \a dst.
+/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A
+///    with corresponding signed 16-bit integers in \a __B, producing 2
+///    intermediate signed 16-bit results. Sum these 2 results with the
+///    corresponding 32-bit integer in \a __W, and store the packed 32-bit
+///    results in \a dst.
 ///
 /// \headerfile <immintrin.h>
 ///
@@ -178,19 +186,21 @@
 /// \code{.operation}
 /// FOR j := 0 to 3
 /// 	tmp1.dword := ZeroExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j])
-/// 	tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1])
+/// 	tmp2.dword :=
+/// 	  ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1])
 /// 	dst.dword[j] := __W.dword[j] + tmp1 + tmp2
 /// ENDFOR
 /// dst[MAX:128] := 0
 /// \endcode
 #define _mm_dpwusd_epi32(__W, __A, __B)                                        \
-  ((__m128i)__builtin_ia32_vpdpwusd128((__v4si)(__W), (__v4si)(__A),           \
-                                       (__v4si)(__B)))
+  ((__m128i)__builtin_ia32_vpdpwusd128((__v4si)(__W), (__v8hu)(__A),           \
+                                       (__v8hi)(__B)))
 
-/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with
-///    corresponding signed 16-bit integers in \a __B, producing 2 intermediate
-///    signed 16-bit results. Sum these 2 results with the corresponding
-///    32-bit integer in \a __W, and store the packed 32-bit results in \a dst.
+/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A
+///    with corresponding signed 16-bit integers in \a __B, producing 2
+///    intermediate signed 16-bit results. Sum these 2 results with the
+///    corresponding 32-bit integer in \a __W, and store the packed 32-bit
+///    results in \a dst.
 ///
 /// \headerfile <immintrin.h>
 ///
@@ -212,20 +222,21 @@
 /// \code{.operation}
 /// FOR j := 0 to 7
 /// 	tmp1.dword := ZeroExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j])
-/// 	tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1])
+/// 	tmp2.dword :=
+/// 	  ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1])
 /// 	dst.dword[j] := __W.dword[j] + tmp1 + tmp2
 /// ENDFOR
 /// dst[MAX:256] := 0
 /// \endcode
 #define _mm256_dpwusd_epi32(__W, __A, __B)                                     \
-  ((__m256i)__builtin_ia32_vpdpwusd256((__v8si)(__W), (__v8si)(__A),           \
-                                       (__v8si)(__B)))
+  ((__m256i)__builtin_ia32_vpdpwusd256((__v8si)(__W), (__v16hu)(__A),          \
+                                       (__v16hi)(__B)))
 
-/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with
-///    corresponding signed 16-bit integers in \a __B, producing 2 intermediate
-///    signed 16-bit results. Sum these 2 results with the corresponding
-///    32-bit integer in \a __W with signed saturation, and store the packed
-///    32-bit results in \a dst.
+/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A
+///    with corresponding signed 16-bit integers in \a __B, producing 2
+///    intermediate signed 16-bit results. Sum these 2 results with the
+///    corresponding 32-bit integer in \a __W with signed saturation, and
+///    store the packed 32-bit results in \a dst.
 ///
 /// \headerfile <immintrin.h>
 ///
@@ -233,7 +244,7 @@
 /// __m128i _mm_dpwusds_epi32(__m128i __W, __m128i __A, __m128i __B)
 /// \endcode
 ///
-/// This intrinsic corresponds to the \c VPDPWSUDS instruction.
+/// This intrinsic corresponds to the \c VPDPWUSDS instruction.
 ///
 /// \param __W
 ///    A 128-bit vector of [4 x int].
@@ -247,20 +258,21 @@
 /// \code{.operation}
 /// FOR j := 0 to 3
 /// 	tmp1.dword := ZeroExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j])
-/// 	tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1])
+/// 	tmp2.dword :=
+/// 	  ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1])
 /// 	dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2)
 /// ENDFOR
 /// dst[MAX:128] := 0
 /// \endcode
 #define _mm_dpwusds_epi32(__W, __A, __B)                                       \
-  ((__m128i)__builtin_ia32_vpdpwusds128((__v4si)(__W), (__v4si)(__A),          \
-                                        (__v4si)(__B)))
+  ((__m128i)__builtin_ia32_vpdpwusds128((__v4si)(__W), (__v8hu)(__A),          \
+                                        (__v8hi)(__B)))
 
-/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with
-///    corresponding signed 16-bit integers in \a __B, producing 2 intermediate
-///    signed 16-bit results. Sum these 2 results with the corresponding
-///    32-bit integer in \a __W with signed saturation, and store the packed
-///    32-bit results in \a dst.
+/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A
+///    with corresponding signed 16-bit integers in \a __B, producing 2
+///    intermediate signed 16-bit results. Sum these 2 results with the
+///    corresponding 32-bit integer in \a __W with signed saturation, and
+///    store the packed 32-bit results in \a dst.
 ///
 /// \headerfile <immintrin.h>
 ///
@@ -268,7 +280,7 @@
 /// __m256i _mm256_dpwsuds_epi32(__m256i __W, __m256i __A, __m256i __B)
 /// \endcode
 ///
-/// This intrinsic corresponds to the \c VPDPWSUDS instruction.
+/// This intrinsic corresponds to the \c VPDPWUSDS instruction.
 ///
 /// \param __W
 ///    A 256-bit vector of [8 x int].
@@ -282,19 +294,21 @@
 /// \code{.operation}
 /// FOR j := 0 to 7
 /// 	tmp1.dword := ZeroExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j])
-/// 	tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1])
+/// 	tmp2.dword :=
+/// 	  ZeroExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1])
 /// 	dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2)
 /// ENDFOR
 /// dst[MAX:256] := 0
 /// \endcode
 #define _mm256_dpwusds_epi32(__W, __A, __B)                                    \
-  ((__m256i)__builtin_ia32_vpdpwusds256((__v8si)(__W), (__v8si)(__A),          \
-                                        (__v8si)(__B)))
+  ((__m256i)__builtin_ia32_vpdpwusds256((__v8si)(__W), (__v16hu)(__A),         \
+                                        (__v16hi)(__B)))
 
-/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with
-///    corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate
-///    signed 16-bit results. Sum these 2 results with the corresponding
-///    32-bit integer in \a __W, and store the packed 32-bit results in \a dst.
+/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A
+///    with corresponding unsigned 16-bit integers in \a __B, producing 2
+///    intermediate signed 16-bit results. Sum these 2 results with the
+///    corresponding 32-bit integer in \a __W, and store the packed 32-bit
+///    results in \a dst.
 ///
 /// \headerfile <immintrin.h>
 ///
@@ -305,30 +319,32 @@
 /// This intrinsic corresponds to the \c VPDPWUUD instruction.
 ///
 /// \param __W
-///    A 128-bit vector of [4 x unsigned int].
+///    A 128-bit vector of [4 x int].
 /// \param __A
 ///    A 128-bit vector of [8 x unsigned short].
 /// \param __B
 ///    A 128-bit vector of [8 x unsigned short].
 /// \returns
-///    A 128-bit vector of [4 x unsigned int].
+///    A 128-bit vector of [4 x int].
 ///
 /// \code{.operation}
 /// FOR j := 0 to 3
 /// 	tmp1.dword := ZeroExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j])
-/// 	tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1])
+/// 	tmp2.dword :=
+/// 	  ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1])
 /// 	dst.dword[j] := __W.dword[j] + tmp1 + tmp2
 /// ENDFOR
 /// dst[MAX:128] := 0
 /// \endcode
 #define _mm_dpwuud_epi32(__W, __A, __B)                                        \
-  ((__m128i)__builtin_ia32_vpdpwuud128((__v4si)(__W), (__v4si)(__A),           \
-                                       (__v4si)(__B)))
+  ((__m128i)__builtin_ia32_vpdpwuud128((__v4si)(__W), (__v8hu)(__A),           \
+                                       (__v8hu)(__B)))
 
-/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with
-///    corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate
-///    signed 16-bit results. Sum these 2 results with the corresponding
-///    32-bit integer in \a __W, and store the packed 32-bit results in \a dst.
+/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A
+///    with corresponding unsigned 16-bit integers in \a __B, producing 2
+///    intermediate signed 16-bit results. Sum these 2 results with the
+///    corresponding 32-bit integer in \a __W, and store the packed 32-bit
+///    results in \a dst.
 ///
 /// \headerfile <immintrin.h>
 ///
@@ -339,31 +355,32 @@
 /// This intrinsic corresponds to the \c VPDPWUUD instruction.
 ///
 /// \param __W
-///    A 256-bit vector of [8 x unsigned int].
+///    A 256-bit vector of [8 x int].
 /// \param __A
 ///    A 256-bit vector of [16 x unsigned short].
 /// \param __B
 ///    A 256-bit vector of [16 x unsigned short].
 /// \returns
-///    A 256-bit vector of [8 x unsigned int].
+///    A 256-bit vector of [8 x int].
 ///
 /// \code{.operation}
 /// FOR j := 0 to 7
 /// 	tmp1.dword := ZeroExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j])
-/// 	tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1])
+/// 	tmp2.dword :=
+/// 	  ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1])
 /// 	dst.dword[j] := __W.dword[j] + tmp1 + tmp2
 /// ENDFOR
 /// dst[MAX:256] := 0
 /// \endcode
 #define _mm256_dpwuud_epi32(__W, __A, __B)                                     \
-  ((__m256i)__builtin_ia32_vpdpwuud256((__v8si)(__W), (__v8si)(__A),           \
-                                       (__v8si)(__B)))
+  ((__m256i)__builtin_ia32_vpdpwuud256((__v8si)(__W), (__v16hu)(__A),          \
+                                       (__v16hu)(__B)))
 
-/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with
-///    corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate
-///    signed 16-bit results. Sum these 2 results with the corresponding
-///    32-bit integer in \a __W with signed saturation, and store the packed
-///    32-bit results in \a dst.
+/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A
+///    with corresponding unsigned 16-bit integers in \a __B, producing 2
+///    intermediate signed 16-bit results. Sum these 2 results with the
+///    corresponding 32-bit integer in \a __W with signed saturation, and store
+///    the packed 32-bit results in \a dst.
 ///
 /// \headerfile <immintrin.h>
 ///
@@ -371,34 +388,35 @@
 /// __m128i _mm_dpwsuds_epi32(__m128i __W, __m128i __A, __m128i __B)
 /// \endcode
 ///
-/// This intrinsic corresponds to the \c VPDPWSUDS instruction.
+/// This intrinsic corresponds to the \c VPDPWUUDS instruction.
 ///
 /// \param __W
-///    A 128-bit vector of [4 x unsigned int].
+///    A 128-bit vector of [4 x int].
 /// \param __A
 ///    A 128-bit vector of [8 x unsigned short].
 /// \param __B
 ///    A 128-bit vector of [8 x unsigned short].
 /// \returns
-///    A 128-bit vector of [4 x unsigned int].
+///    A 128-bit vector of [4 x int].
 ///
 /// \code{.operation}
 /// FOR j := 0 to 3
 /// 	tmp1.dword := ZeroExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j])
-/// 	tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1])
+/// 	tmp2.dword :=
+/// 	  ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1])
 /// 	dst.dword[j] := UNSIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2)
 /// ENDFOR
 /// dst[MAX:128] := 0
 /// \endcode
 #define _mm_dpwuuds_epi32(__W, __A, __B)                                       \
-  ((__m128i)__builtin_ia32_vpdpwuuds128((__v4si)(__W), (__v4si)(__A),          \
-                                        (__v4si)(__B)))
+  ((__m128i)__builtin_ia32_vpdpwuuds128((__v4si)(__W), (__v8hu)(__A),          \
+                                        (__v8hu)(__B)))
 
-/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with
-///    corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate
-///    signed 16-bit results. Sum these 2 results with the corresponding
-///    32-bit integer in \a __W with signed saturation, and store the packed
-///    32-bit results in \a dst.
+/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A
+///    with corresponding unsigned 16-bit integers in \a __B, producing 2
+///    intermediate signed 16-bit results. Sum these 2 results with the
+///    corresponding 32-bit integer in \a __W with signed saturation, and store
+///    the packed 32-bit results in \a dst.
 ///
 /// \headerfile <immintrin.h>
 ///
@@ -406,27 +424,28 @@
 /// __m256i _mm256_dpwuuds_epi32(__m256i __W, __m256i __A, __m256i __B)
 /// \endcode
 ///
-/// This intrinsic corresponds to the \c VPDPWSUDS instruction.
+/// This intrinsic corresponds to the \c VPDPWUUDS instruction.
 ///
 /// \param __W
-///    A 256-bit vector of [8 x unsigned int].
+///    A 256-bit vector of [8 x int].
 /// \param __A
 ///    A 256-bit vector of [16 x unsigned short].
 /// \param __B
 ///    A 256-bit vector of [16 x unsigned short].
 /// \returns
-///    A 256-bit vector of [8 x unsigned int].
+///    A 256-bit vector of [8 x int].
 ///
 /// \code{.operation}
 /// FOR j := 0 to 7
 /// 	tmp1.dword := ZeroExtend32(__A.word[2*j]) * ZeroExtend32(__B.word[2*j])
-/// 	tmp2.dword := ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1])
+/// 	tmp2.dword :=
+/// 	  ZeroExtend32(__A.word[2*j+1]) * ZeroExtend32(__B.word[2*j+1])
 /// 	dst.dword[j] := UNSIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2)
 /// ENDFOR
 /// dst[MAX:256] := 0
 /// \endcode
 #define _mm256_dpwuuds_epi32(__W, __A, __B)                                    \
-  ((__m256i)__builtin_ia32_vpdpwuuds256((__v8si)(__W), (__v8si)(__A),          \
-                                        (__v8si)(__B)))
+  ((__m256i)__builtin_ia32_vpdpwuuds256((__v8si)(__W), (__v16hu)(__A),         \
+                                        (__v16hu)(__B)))
 
 #endif // __AVXVNNIINT16INTRIN_H
diff --git a/lib/include/avxvnniint8intrin.h b/lib/include/avxvnniint8intrin.h
index c211620c68..858b66b138 100644
--- a/lib/include/avxvnniint8intrin.h
+++ b/lib/include/avxvnniint8intrin.h
@@ -14,6 +14,7 @@
 #ifndef __AVXVNNIINT8INTRIN_H
 #define __AVXVNNIINT8INTRIN_H
 
+// clang-format off
 /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with
 ///    corresponding signed 8-bit integers in \a __B, producing 4 intermediate
 ///    signed 16-bit results. Sum these 4 results with the corresponding
@@ -44,10 +45,12 @@
 /// ENDFOR
 /// dst[MAX:128] := 0
 /// \endcode
+// clang-format on
 #define _mm_dpbssd_epi32(__W, __A, __B)                                        \
-  ((__m128i)__builtin_ia32_vpdpbssd128((__v4si)(__W), (__v4si)(__A),           \
-                                       (__v4si)(__B)))
+  ((__m128i)__builtin_ia32_vpdpbssd128((__v4si)(__W), (__v16qi)(__A),          \
+                                       (__v16qi)(__B)))
 
+// clang-format off
 /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with
 ///    corresponding signed 8-bit integers in \a __B, producing 4 intermediate
 ///    signed 16-bit results. Sum these 4 results with the corresponding
@@ -78,10 +81,12 @@
 /// ENDFOR
 /// dst[MAX:256] := 0
 /// \endcode
+// clang-format on
 #define _mm256_dpbssd_epi32(__W, __A, __B)                                     \
-  ((__m256i)__builtin_ia32_vpdpbssd256((__v8si)(__W), (__v8si)(__A),           \
-                                       (__v8si)(__B)))
+  ((__m256i)__builtin_ia32_vpdpbssd256((__v8si)(__W), (__v32qi)(__A),          \
+                                       (__v32qi)(__B)))
 
+// clang-format off
 /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with
 ///    corresponding signed 8-bit integers in \a __B, producing 4 intermediate
 ///    signed 16-bit results. Sum these 4 results with the corresponding
@@ -94,7 +99,7 @@
 /// _mm_dpbssds_epi32( __m128i __W, __m128i __A, __m128i __B);
 /// \endcode
 ///
-/// This intrinsic corresponds to the \c VPDPBSSD instruction.
+/// This intrinsic corresponds to the \c VPDPBSSDS instruction.
 ///
 /// \param __A
 ///    A 128-bit vector of [16 x char].
@@ -113,10 +118,12 @@
 /// ENDFOR
 /// dst[MAX:128] := 0
 /// \endcode
+// clang-format on
 #define _mm_dpbssds_epi32(__W, __A, __B)                                       \
-  ((__m128i)__builtin_ia32_vpdpbssds128((__v4si)(__W), (__v4si)(__A),          \
-                                        (__v4si)(__B)))
+  ((__m128i)__builtin_ia32_vpdpbssds128((__v4si)(__W), (__v16qi)(__A),         \
+                                        (__v16qi)(__B)))
 
+// clang-format off
 /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with
 ///    corresponding signed 8-bit integers in \a __B, producing 4 intermediate
 ///    signed 16-bit results. Sum these 4 results with the corresponding
@@ -129,7 +136,7 @@
 /// _mm256_dpbssds_epi32(__m256i __W, __m256i __A, __m256i __B);
 /// \endcode
 ///
-/// This intrinsic corresponds to the \c VPDPBSSD instruction.
+/// This intrinsic corresponds to the \c VPDPBSSDS instruction.
 ///
 /// \param __A
 ///    A 256-bit vector of [32 x char].
@@ -148,10 +155,12 @@
 /// ENDFOR
 /// dst[MAX:256] := 0
 /// \endcode
+// clang-format on
 #define _mm256_dpbssds_epi32(__W, __A, __B)                                    \
-  ((__m256i)__builtin_ia32_vpdpbssds256((__v8si)(__W), (__v8si)(__A),          \
-                                        (__v8si)(__B)))
+  ((__m256i)__builtin_ia32_vpdpbssds256((__v8si)(__W), (__v32qi)(__A),         \
+                                        (__v32qi)(__B)))
 
+// clang-format off
 /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with
 ///    corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate
 ///    signed 16-bit results. Sum these 4 results with the corresponding
@@ -163,7 +172,7 @@
 /// _mm_dpbsud_epi32(__m128i __W, __m128i __A, __m128i __B);
 /// \endcode
 ///
-/// This intrinsic corresponds to the \c VPDPBSSD instruction.
+/// This intrinsic corresponds to the \c VPDPBSUD instruction.
 ///
 /// \param __A
 ///    A 128-bit vector of [16 x char].
@@ -182,10 +191,12 @@
 /// ENDFOR
 /// dst[MAX:128] := 0
 /// \endcode
+// clang-format on
 #define _mm_dpbsud_epi32(__W, __A, __B)                                        \
-  ((__m128i)__builtin_ia32_vpdpbsud128((__v4si)(__W), (__v4si)(__A),           \
-                                       (__v4si)(__B)))
+  ((__m128i)__builtin_ia32_vpdpbsud128((__v4si)(__W), (__v16qi)(__A),          \
+                                       (__v16qu)(__B)))
 
+// clang-format off
 /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with
 ///    corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate
 ///    signed 16-bit results. Sum these 4 results with the corresponding
@@ -197,7 +208,7 @@
 /// _mm256_dpbsud_epi32(__m256i __W, __m256i __A, __m256i __B);
 /// \endcode
 ///
-/// This intrinsic corresponds to the \c VPDPBSSD instruction.
+/// This intrinsic corresponds to the \c VPDPBSUD instruction.
 ///
 /// \param __A
 ///    A 256-bit vector of [32 x char].
@@ -216,10 +227,12 @@
 /// ENDFOR
 /// dst[MAX:256] := 0
 /// \endcode
+// clang-format on
 #define _mm256_dpbsud_epi32(__W, __A, __B)                                     \
-  ((__m256i)__builtin_ia32_vpdpbsud256((__v8si)(__W), (__v8si)(__A),           \
-                                       (__v8si)(__B)))
+  ((__m256i)__builtin_ia32_vpdpbsud256((__v8si)(__W), (__v32qi)(__A),          \
+                                       (__v32qu)(__B)))
 
+// clang-format off
 /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with
 ///    corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate
 ///    signed 16-bit results. Sum these 4 results with the corresponding
@@ -232,7 +245,7 @@
 /// _mm_dpbsuds_epi32( __m128i __W, __m128i __A, __m128i __B);
 /// \endcode
 ///
-/// This intrinsic corresponds to the \c VPDPBSSD instruction.
+/// This intrinsic corresponds to the \c VPDPBSUDS instruction.
 ///
 /// \param __A
 ///    A 128-bit vector of [16 x char].
@@ -251,10 +264,12 @@
 /// ENDFOR
 /// dst[MAX:128] := 0
 /// \endcode
+// clang-format on
 #define _mm_dpbsuds_epi32(__W, __A, __B)                                       \
-  ((__m128i)__builtin_ia32_vpdpbsuds128((__v4si)(__W), (__v4si)(__A),          \
-                                        (__v4si)(__B)))
+  ((__m128i)__builtin_ia32_vpdpbsuds128((__v4si)(__W), (__v16qi)(__A),         \
+                                        (__v16qu)(__B)))
 
+// clang-format off
 /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with
 ///    corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate
 ///    signed 16-bit results. Sum these 4 results with the corresponding
@@ -267,7 +282,7 @@
 /// _mm256_dpbsuds_epi32(__m256i __W, __m256i __A, __m256i __B);
 /// \endcode
 ///
-/// This intrinsic corresponds to the \c VPDPBSSD instruction.
+/// This intrinsic corresponds to the \c VPDPBSUDS instruction.
 ///
 /// \param __A
 ///    A 256-bit vector of [32 x char].
@@ -286,10 +301,12 @@
 /// ENDFOR
 /// dst[MAX:256] := 0
 /// \endcode
+// clang-format on
 #define _mm256_dpbsuds_epi32(__W, __A, __B)                                    \
-  ((__m256i)__builtin_ia32_vpdpbsuds256((__v8si)(__W), (__v8si)(__A),          \
-                                        (__v8si)(__B)))
+  ((__m256i)__builtin_ia32_vpdpbsuds256((__v8si)(__W), (__v32qi)(__A),         \
+                                        (__v32qu)(__B)))
 
+// clang-format off
 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with
 ///    corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate
 ///    signed 16-bit results. Sum these 4 results with the corresponding
@@ -301,7 +318,7 @@
 /// _mm_dpbuud_epi32(__m128i __W, __m128i __A, __m128i __B);
 /// \endcode
 ///
-/// This intrinsic corresponds to the \c VPDPBSSD instruction.
+/// This intrinsic corresponds to the \c VPDPBUUD instruction.
 ///
 /// \param __A
 ///    A 128-bit vector of [16 x unsigned char].
@@ -320,10 +337,12 @@
 /// ENDFOR
 /// dst[MAX:128] := 0
 /// \endcode
+// clang-format on
 #define _mm_dpbuud_epi32(__W, __A, __B)                                        \
-  ((__m128i)__builtin_ia32_vpdpbuud128((__v4si)(__W), (__v4si)(__A),           \
-                                       (__v4si)(__B)))
+  ((__m128i)__builtin_ia32_vpdpbuud128((__v4si)(__W), (__v16qu)(__A),          \
+                                       (__v16qu)(__B)))
 
+// clang-format off
 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with
 ///    corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate
 ///    signed 16-bit results. Sum these 4 results with the corresponding
@@ -335,7 +354,7 @@
 /// _mm256_dpbuud_epi32(__m256i __W, __m256i __A, __m256i __B);
 /// \endcode
 ///
-/// This intrinsic corresponds to the \c VPDPBSSD instruction.
+/// This intrinsic corresponds to the \c VPDPBUUD instruction.
 ///
 /// \param __A
 ///    A 256-bit vector of [32 x unsigned char].
@@ -354,10 +373,12 @@
 /// ENDFOR
 /// dst[MAX:256] := 0
 /// \endcode
+// clang-format on
 #define _mm256_dpbuud_epi32(__W, __A, __B)                                     \
-  ((__m256i)__builtin_ia32_vpdpbuud256((__v8si)(__W), (__v8si)(__A),           \
-                                       (__v8si)(__B)))
+  ((__m256i)__builtin_ia32_vpdpbuud256((__v8si)(__W), (__v32qu)(__A),          \
+                                       (__v32qu)(__B)))
 
+// clang-format off
 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with
 ///    corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate
 ///    signed 16-bit results. Sum these 4 results with the corresponding
@@ -389,10 +410,12 @@
 /// ENDFOR
 /// dst[MAX:128] := 0
 /// \endcode
+// clang-format on
 #define _mm_dpbuuds_epi32(__W, __A, __B)                                       \
-  ((__m128i)__builtin_ia32_vpdpbuuds128((__v4si)(__W), (__v4si)(__A),          \
-                                        (__v4si)(__B)))
+  ((__m128i)__builtin_ia32_vpdpbuuds128((__v4si)(__W), (__v16qu)(__A),         \
+                                        (__v16qu)(__B)))
 
+// clang-format off
 ///    corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate
 ///    signed 16-bit results. Sum these 4 results with the corresponding
 ///    32-bit integer in \a __W with signed saturation, and store the packed
@@ -423,8 +446,9 @@
 /// ENDFOR
 /// dst[MAX:256] := 0
 /// \endcode
+// clang-format on
 #define _mm256_dpbuuds_epi32(__W, __A, __B)                                    \
-  ((__m256i)__builtin_ia32_vpdpbuuds256((__v8si)(__W), (__v8si)(__A),          \
-                                        (__v8si)(__B)))
+  ((__m256i)__builtin_ia32_vpdpbuuds256((__v8si)(__W), (__v32qu)(__A),         \
+                                        (__v32qu)(__B)))
 
 #endif // __AVXVNNIINT8INTRIN_H
diff --git a/lib/include/avxvnniintrin.h b/lib/include/avxvnniintrin.h
index b7de562b57..1d2e8c906e 100644
--- a/lib/include/avxvnniintrin.h
+++ b/lib/include/avxvnniintrin.h
@@ -63,7 +63,8 @@
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_dpbusd_avx_epi32(__m256i __S, __m256i __A, __m256i __B)
 {
-  return (__m256i)__builtin_ia32_vpdpbusd256((__v8si)__S, (__v8si)__A, (__v8si)__B);
+  return (__m256i)__builtin_ia32_vpdpbusd256((__v8si)__S, (__v32qu)__A,
+                                             (__v32qi)__B);
 }
 
 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with
@@ -86,7 +87,8 @@ _mm256_dpbusd_avx_epi32(__m256i __S, __m256i __A, __m256i __B)
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_dpbusds_avx_epi32(__m256i __S, __m256i __A, __m256i __B)
 {
-  return (__m256i)__builtin_ia32_vpdpbusds256((__v8si)__S, (__v8si)__A, (__v8si)__B);
+  return (__m256i)__builtin_ia32_vpdpbusds256((__v8si)__S, (__v32qu)__A,
+                                              (__v32qi)__B);
 }
 
 /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with
@@ -107,7 +109,8 @@ _mm256_dpbusds_avx_epi32(__m256i __S, __m256i __A, __m256i __B)
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_dpwssd_avx_epi32(__m256i __S, __m256i __A, __m256i __B)
 {
-  return (__m256i)__builtin_ia32_vpdpwssd256((__v8si)__S, (__v8si)__A, (__v8si)__B);
+  return (__m256i)__builtin_ia32_vpdpwssd256((__v8si)__S, (__v16hi)__A,
+                                             (__v16hi)__B);
 }
 
 /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with
@@ -128,7 +131,8 @@ _mm256_dpwssd_avx_epi32(__m256i __S, __m256i __A, __m256i __B)
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_dpwssds_avx_epi32(__m256i __S, __m256i __A, __m256i __B)
 {
-  return (__m256i)__builtin_ia32_vpdpwssds256((__v8si)__S, (__v8si)__A, (__v8si)__B);
+  return (__m256i)__builtin_ia32_vpdpwssds256((__v8si)__S, (__v16hi)__A,
+                                              (__v16hi)__B);
 }
 
 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with
@@ -151,7 +155,8 @@ _mm256_dpwssds_avx_epi32(__m256i __S, __m256i __A, __m256i __B)
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_dpbusd_avx_epi32(__m128i __S, __m128i __A, __m128i __B)
 {
-  return (__m128i)__builtin_ia32_vpdpbusd128((__v4si)__S, (__v4si)__A, (__v4si)__B);
+  return (__m128i)__builtin_ia32_vpdpbusd128((__v4si)__S, (__v16qu)__A,
+                                             (__v16qi)__B);
 }
 
 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with
@@ -174,7 +179,8 @@ _mm_dpbusd_avx_epi32(__m128i __S, __m128i __A, __m128i __B)
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_dpbusds_avx_epi32(__m128i __S, __m128i __A, __m128i __B)
 {
-  return (__m128i)__builtin_ia32_vpdpbusds128((__v4si)__S, (__v4si)__A, (__v4si)__B);
+  return (__m128i)__builtin_ia32_vpdpbusds128((__v4si)__S, (__v16qu)__A,
+                                              (__v16qi)__B);
 }
 
 /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with
@@ -195,7 +201,8 @@ _mm_dpbusds_avx_epi32(__m128i __S, __m128i __A, __m128i __B)
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_dpwssd_avx_epi32(__m128i __S, __m128i __A, __m128i __B)
 {
-  return (__m128i)__builtin_ia32_vpdpwssd128((__v4si)__S, (__v4si)__A, (__v4si)__B);
+  return (__m128i)__builtin_ia32_vpdpwssd128((__v4si)__S, (__v8hi)__A,
+                                             (__v8hi)__B);
 }
 
 /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with
@@ -216,7 +223,8 @@ _mm_dpwssd_avx_epi32(__m128i __S, __m128i __A, __m128i __B)
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_dpwssds_avx_epi32(__m128i __S, __m128i __A, __m128i __B)
 {
-  return (__m128i)__builtin_ia32_vpdpwssds128((__v4si)__S, (__v4si)__A, (__v4si)__B);
+  return (__m128i)__builtin_ia32_vpdpwssds128((__v4si)__S, (__v8hi)__A,
+                                              (__v8hi)__B);
 }
 
 #undef __DEFAULT_FN_ATTRS128
diff --git a/lib/include/cpuid.h b/lib/include/cpuid.h
index 52addb7bfa..156425c756 100644
--- a/lib/include/cpuid.h
+++ b/lib/include/cpuid.h
@@ -253,10 +253,6 @@
 #define bit_RDPRU       0x00000010
 #define bit_WBNOINVD    0x00000200
 
-/* Features in %ebx for leaf 0x24 */
-#define bit_AVX10_256   0x00020000
-#define bit_AVX10_512   0x00040000
-
 #ifdef __i386__
 #define __cpuid(__leaf, __eax, __ebx, __ecx, __edx) \
     __asm("cpuid" : "=a"(__eax), "=b" (__ebx), "=c"(__ecx), "=d"(__edx) \
@@ -282,6 +278,24 @@
         : "0"(__leaf), "2"(__count))
 #endif
 
+/// Queries the processor to determine the highest supported \c CPUID leaf.
+/// This intrinsic is only available on x86 and x64.
+///
+/// \headerfile <cpuid.h>
+///
+/// This intrinsic corresponds to the <c> CPUID </c> instruction.
+///
+/// \param __leaf
+///    \a __leaf can be either 0x0 or 0x8000000. If \a __leaf == 0x0, the
+///    highest supported value for basic \c CPUID information is returned.
+///    If \a __leaf == 0x8000000, the highest supported value for extended
+///    \c CPUID information is returned.
+/// \param __sig
+///    If the \a __sig pointer is non-null, the first four bytes of the
+///    signature (as found in the \c EBX register) are returned in the
+///    location pointed to by \a __sig.
+/// \returns Returns 0 if \c CPUID is supported; otherwise returns the value
+///    that \c CPUID returns in the \c EAX register.
 static __inline unsigned int __get_cpuid_max (unsigned int __leaf,
                                               unsigned int *__sig)
 {
@@ -315,6 +329,32 @@ static __inline unsigned int __get_cpuid_max (unsigned int __leaf,
     return __eax;
 }
 
+/// For the requested \c CPUID leaf, queries the processor for information
+/// about the CPU type and CPU features (such as processor vendor, supported
+/// instruction sets, CPU capabilities, cache sizes, CPU model and family, and
+/// other hardware details). This intrinsic is only available on x86 and x64.
+///
+/// \headerfile <cpuid.h>
+///
+/// This intrinsic corresponds to the <c> CPUID </c> instruction.
+///
+/// \param __leaf
+///    An unsigned integer that identifies the level (also called "leaf") at
+///    which the \c CPUID instruction will be executed.
+/// \param __eax
+///    A pointer to an integer that corresponds to the \c EAX register where
+///    \c CPUID stores output results.
+/// \param __ebx
+///    A pointer to an integer that corresponds to the \c EBX register where
+///    \c CPUID stores output results.
+/// \param __ecx
+///    A pointer to an integer that corresponds to the \c ECX register where
+///    \c CPUID stores output results.
+/// \param __edx
+///    A pointer to an integer that corresponds to the \c EDX register where
+///    \c CPUID stores output results.
+/// \returns Returns 1 if the requested \c CPUID leaf is supported; otherwise
+///    returns 0.
 static __inline int __get_cpuid (unsigned int __leaf, unsigned int *__eax,
                                  unsigned int *__ebx, unsigned int *__ecx,
                                  unsigned int *__edx)
@@ -328,6 +368,36 @@ static __inline int __get_cpuid (unsigned int __leaf, unsigned int *__eax,
     return 1;
 }
 
+/// For the requested \c CPUID leaf and subleaf, queries the processor for
+/// information about the CPU type and CPU features (such as processor vendor,
+/// supported instruction sets, CPU capabilities, cache sizes, CPU model and
+/// family, and other hardware details). This intrinsic is only available on
+/// x86 and x64.
+///
+/// \headerfile <cpuid.h>
+///
+/// This intrinsic corresponds to the <c> CPUID </c> instruction.
+///
+/// \param __leaf
+///    An unsigned integer that identifies the level (also called "leaf") at
+///    which the \c CPUID instruction will be executed.
+/// \param __subleaf
+///    An unsigned integer that identifies the sublevel (also called
+///    "subleaf") at which the \c CPUID instruction will be executed.
+/// \param __eax
+///    A pointer to an integer that corresponds to the \c EAX register where
+///    \c CPUID stores output results.
+/// \param __ebx
+///    A pointer to an integer that corresponds to the \c EBX register where
+///    \c CPUID stores output results.
+/// \param __ecx
+///    A pointer to an integer that corresponds to the \c ECX register where
+///    \c CPUID stores output results.
+/// \param __edx
+///    A pointer to an integer that corresponds to the \c EDX register where
+///    \c CPUID stores output results.
+/// \returns Returns 1 if the requested \c CPUID leaf is supported; otherwise
+///    returns 0.
 static __inline int __get_cpuid_count (unsigned int __leaf,
                                        unsigned int __subleaf,
                                        unsigned int *__eax, unsigned int *__ebx,
@@ -345,10 +415,37 @@ static __inline int __get_cpuid_count (unsigned int __leaf,
 // In some configurations, __cpuidex is defined as a builtin (primarily
 // -fms-extensions) which will conflict with the __cpuidex definition below.
 #if !(__has_builtin(__cpuidex))
+// In some cases, offloading will set the host as the aux triple and define the
+// builtin. Given __has_builtin does not detect builtins on aux triples, we need
+// to explicitly check for some offloading cases.
+#if !defined(__NVPTX__) && !defined(__AMDGPU__) && !defined(__SPIRV__)
+/// Executes the \c CPUID instruction with the specified leaf and subleaf
+/// values, and returns the results from the CPU's registers. This intrinsic
+/// is only available on x86 and x64.
+///
+/// \headerfile <cpuid.h>
+///
+/// This intrinsic corresponds to the <c> CPUID </c> instruction.
+///
+/// \param __cpu_info
+///    An output array of four integers:
+///    <ul>
+///    <li>\a __cpuInfo[0] receives the value of the \c EAX register.</li>
+///    <li>\a __cpuInfo[1] receives the value of the \c EBX register.</li>
+///    <li>\a __cpuInfo[2] receives the value of the \c ECX register.</li>
+///    <li>\a __cpuInfo[3] receives the value of the \c EDX register.</li>
+///    </ul>
+/// \param __leaf
+///    An unsigned integer that identifies the level (also called the "leaf")
+///    at which the \c CPUID instruction will be executed.
+/// \param __subleaf
+///    An unsigned integer that identifies the sublevel (also called the
+///    "subleaf") at which the \c CPUID instruction will be executed.
 static __inline void __cpuidex(int __cpu_info[4], int __leaf, int __subleaf) {
   __cpuid_count(__leaf, __subleaf, __cpu_info[0], __cpu_info[1], __cpu_info[2],
                 __cpu_info[3]);
 }
 #endif
+#endif
 
 #endif /* __CPUID_H */
diff --git a/lib/include/emmintrin.h b/lib/include/emmintrin.h
index 78e8a422db..61b35e9731 100644
--- a/lib/include/emmintrin.h
+++ b/lib/include/emmintrin.h
@@ -17,7 +17,6 @@
 #include <xmmintrin.h>
 
 typedef double __m128d __attribute__((__vector_size__(16), __aligned__(16)));
-typedef long long __m128i __attribute__((__vector_size__(16), __aligned__(16)));
 
 typedef double __m128d_u __attribute__((__vector_size__(16), __aligned__(1)));
 typedef long long __m128i_u
@@ -25,14 +24,9 @@ typedef long long __m128i_u
 
 /* Type defines.  */
 typedef double __v2df __attribute__((__vector_size__(16)));
-typedef long long __v2di __attribute__((__vector_size__(16)));
-typedef short __v8hi __attribute__((__vector_size__(16)));
-typedef char __v16qi __attribute__((__vector_size__(16)));
 
 /* Unsigned types */
 typedef unsigned long long __v2du __attribute__((__vector_size__(16)));
-typedef unsigned short __v8hu __attribute__((__vector_size__(16)));
-typedef unsigned char __v16qu __attribute__((__vector_size__(16)));
 
 /* We need an explicitly signed variant for char. Note that this shouldn't
  * appear in the interface though. */
@@ -49,15 +43,9 @@ typedef __bf16 __m128bh __attribute__((__vector_size__(16), __aligned__(16)));
 #endif
 
 /* Define the default attributes for the functions in this file. */
-#if defined(__EVEX512__) && !defined(__AVX10_1_512__)
-#define __DEFAULT_FN_ATTRS                                                     \
-  __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("sse2,no-evex512"), __min_vector_width__(128)))
-#else
 #define __DEFAULT_FN_ATTRS                                                     \
   __attribute__((__always_inline__, __nodebug__, __target__("sse2"),           \
                  __min_vector_width__(128)))
-#endif
 
 #if defined(__cplusplus) && (__cplusplus >= 201103L)
 #define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
@@ -67,6 +55,9 @@ typedef __bf16 __m128bh __attribute__((__vector_size__(16), __aligned__(16)));
 
 #define __trunc64(x)                                                           \
   (__m64) __builtin_shufflevector((__v2di)(x), __extension__(__v2di){}, 0)
+#define __zext128(x)                                                           \
+  (__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0,   \
+                                    1, 2, 3)
 #define __anyext128(x)                                                         \
   (__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0,   \
                                     1, -1, -1)
@@ -250,8 +241,7 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_div_pd(__m128d __a,
 ///    bits are copied from the upper 64 bits of operand \a __a.
 static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_sd(__m128d __a,
                                                          __m128d __b) {
-  __m128d __c = __builtin_ia32_sqrtsd((__v2df)__b);
-  return __extension__(__m128d){__c[0], __a[1]};
+  return __extension__(__m128d){__builtin_elementwise_sqrt(__b[0]), __a[1]};
 }
 
 /// Calculates the square root of the each of two values stored in a
@@ -266,7 +256,7 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_sd(__m128d __a,
 /// \returns A 128-bit vector of [2 x double] containing the square roots of the
 ///    values in the operand.
 static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_pd(__m128d __a) {
-  return __builtin_ia32_sqrtpd((__v2df)__a);
+  return __builtin_elementwise_sqrt(__a);
 }
 
 /// Compares lower 64-bit double-precision values of both operands, and
@@ -310,8 +300,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_sd(__m128d __a,
 ///    A 128-bit vector of [2 x double] containing one of the operands.
 /// \returns A 128-bit vector of [2 x double] containing the minimum values
 ///    between both operands.
-static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_pd(__m128d __a,
-                                                        __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_min_pd(__m128d __a,
+                                                                  __m128d __b) {
   return __builtin_ia32_minpd((__v2df)__a, (__v2df)__b);
 }
 
@@ -356,8 +346,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_sd(__m128d __a,
 ///    A 128-bit vector of [2 x double] containing one of the operands.
 /// \returns A 128-bit vector of [2 x double] containing the maximum values
 ///    between both operands.
-static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_pd(__m128d __a,
-                                                        __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_max_pd(__m128d __a,
+                                                                  __m128d __b) {
   return __builtin_ia32_maxpd((__v2df)__a, (__v2df)__b);
 }
 
@@ -1288,7 +1278,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomineq_sd(__m128d __a,
 ///    A 128-bit vector of [2 x double].
 /// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the
 ///    converted values. The upper 64 bits are set to zero.
-static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtpd_ps(__m128d __a) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cvtpd_ps(__m128d __a) {
   return __builtin_ia32_cvtpd2ps((__v2df)__a);
 }
 
@@ -1393,8 +1384,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsd_si32(__m128d __a) {
 /// \returns A 128-bit vector of [4 x float]. The lower 32 bits contain the
 ///    converted value from the second parameter. The upper 96 bits are copied
 ///    from the upper 96 bits of the first parameter.
-static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtsd_ss(__m128 __a,
-                                                         __m128d __b) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cvtsd_ss(__m128 __a, __m128d __b) {
   return (__m128)__builtin_ia32_cvtsd2ss((__v4sf)__a, (__v2df)__b);
 }
 
@@ -2068,8 +2059,8 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_pd(double *__dp,
 ///    A 128-bit vector of [16 x i8].
 /// \returns A 128-bit vector of [16 x i8] containing the sums of both
 ///    parameters.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8(__m128i __a,
-                                                          __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_add_epi8(__m128i __a, __m128i __b) {
   return (__m128i)((__v16qu)__a + (__v16qu)__b);
 }
 
@@ -2089,8 +2080,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8(__m128i __a,
 ///    A 128-bit vector of [8 x i16].
 /// \returns A 128-bit vector of [8 x i16] containing the sums of both
 ///    parameters.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi16(__m128i __a,
-                                                           __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_add_epi16(__m128i __a, __m128i __b) {
   return (__m128i)((__v8hu)__a + (__v8hu)__b);
 }
 
@@ -2127,8 +2118,9 @@ _mm_add_epi32(__m128i __a, __m128i __b) {
 /// \param __b
 ///    A 64-bit integer.
 /// \returns A 64-bit integer containing the sum of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_add_si64(__m64 __a, __m64 __b) {
-  return (__m64)(((unsigned long long)__a) + ((unsigned long long)__b));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_si64(__m64 __a,
+                                                                  __m64 __b) {
+  return (__m64)(((__v1du)__a)[0] + ((__v1du)__b)[0]);
 }
 
 /// Adds the corresponding elements of two 128-bit vectors of [2 x i64],
@@ -2169,8 +2161,8 @@ _mm_add_epi64(__m128i __a, __m128i __b) {
 ///    A 128-bit signed [16 x i8] vector.
 /// \returns A 128-bit signed [16 x i8] vector containing the saturated sums of
 ///    both parameters.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi8(__m128i __a,
-                                                           __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_adds_epi8(__m128i __a, __m128i __b) {
   return (__m128i)__builtin_elementwise_add_sat((__v16qs)__a, (__v16qs)__b);
 }
 
@@ -2191,8 +2183,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi8(__m128i __a,
 ///    A 128-bit signed [8 x i16] vector.
 /// \returns A 128-bit signed [8 x i16] vector containing the saturated sums of
 ///    both parameters.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi16(__m128i __a,
-                                                            __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_adds_epi16(__m128i __a, __m128i __b) {
   return (__m128i)__builtin_elementwise_add_sat((__v8hi)__a, (__v8hi)__b);
 }
 
@@ -2213,8 +2205,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi16(__m128i __a,
 ///    A 128-bit unsigned [16 x i8] vector.
 /// \returns A 128-bit unsigned [16 x i8] vector containing the saturated sums
 ///    of both parameters.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu8(__m128i __a,
-                                                           __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_adds_epu8(__m128i __a, __m128i __b) {
   return (__m128i)__builtin_elementwise_add_sat((__v16qu)__a, (__v16qu)__b);
 }
 
@@ -2235,8 +2227,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu8(__m128i __a,
 ///    A 128-bit unsigned [8 x i16] vector.
 /// \returns A 128-bit unsigned [8 x i16] vector containing the saturated sums
 ///    of both parameters.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu16(__m128i __a,
-                                                            __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_adds_epu16(__m128i __a, __m128i __b) {
   return (__m128i)__builtin_elementwise_add_sat((__v8hu)__a, (__v8hu)__b);
 }
 
@@ -2254,9 +2246,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu16(__m128i __a,
 ///    A 128-bit unsigned [16 x i8] vector.
 /// \returns A 128-bit unsigned [16 x i8] vector containing the rounded
 ///    averages of both parameters.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu8(__m128i __a,
-                                                          __m128i __b) {
-  return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_avg_epu8(__m128i __a, __m128i __b) {
+  return (__m128i)__builtin_ia32_pavgb128((__v16qu)__a, (__v16qu)__b);
 }
 
 /// Computes the rounded averages of corresponding elements of two
@@ -2273,9 +2265,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu8(__m128i __a,
 ///    A 128-bit unsigned [8 x i16] vector.
 /// \returns A 128-bit unsigned [8 x i16] vector containing the rounded
 ///    averages of both parameters.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu16(__m128i __a,
-                                                           __m128i __b) {
-  return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_avg_epu16(__m128i __a, __m128i __b) {
+  return (__m128i)__builtin_ia32_pavgw128((__v8hu)__a, (__v8hu)__b);
 }
 
 /// Multiplies the corresponding elements of two 128-bit signed [8 x i16]
@@ -2298,8 +2290,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu16(__m128i __a,
 ///    A 128-bit signed [8 x i16] vector.
 /// \returns A 128-bit signed [4 x i32] vector containing the sums of products
 ///    of both parameters.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_madd_epi16(__m128i __a,
-                                                            __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_madd_epi16(__m128i __a, __m128i __b) {
   return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)__a, (__v8hi)__b);
 }
 
@@ -2317,8 +2309,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_madd_epi16(__m128i __a,
 ///    A 128-bit signed [8 x i16] vector.
 /// \returns A 128-bit signed [8 x i16] vector containing the greater value of
 ///    each comparison.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi16(__m128i __a,
-                                                           __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_max_epi16(__m128i __a, __m128i __b) {
   return (__m128i)__builtin_elementwise_max((__v8hi)__a, (__v8hi)__b);
 }
 
@@ -2336,8 +2328,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi16(__m128i __a,
 ///    A 128-bit unsigned [16 x i8] vector.
 /// \returns A 128-bit unsigned [16 x i8] vector containing the greater value of
 ///    each comparison.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu8(__m128i __a,
-                                                          __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_max_epu8(__m128i __a, __m128i __b) {
   return (__m128i)__builtin_elementwise_max((__v16qu)__a, (__v16qu)__b);
 }
 
@@ -2355,8 +2347,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu8(__m128i __a,
 ///    A 128-bit signed [8 x i16] vector.
 /// \returns A 128-bit signed [8 x i16] vector containing the smaller value of
 ///    each comparison.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi16(__m128i __a,
-                                                           __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_min_epi16(__m128i __a, __m128i __b) {
   return (__m128i)__builtin_elementwise_min((__v8hi)__a, (__v8hi)__b);
 }
 
@@ -2374,8 +2366,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi16(__m128i __a,
 ///    A 128-bit unsigned [16 x i8] vector.
 /// \returns A 128-bit unsigned [16 x i8] vector containing the smaller value of
 ///    each comparison.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu8(__m128i __a,
-                                                          __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_min_epu8(__m128i __a, __m128i __b) {
   return (__m128i)__builtin_elementwise_min((__v16qu)__a, (__v16qu)__b);
 }
 
@@ -2393,8 +2385,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu8(__m128i __a,
 ///    A 128-bit signed [8 x i16] vector.
 /// \returns A 128-bit signed [8 x i16] vector containing the upper 16 bits of
 ///    each of the eight 32-bit products.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epi16(__m128i __a,
-                                                             __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_mulhi_epi16(__m128i __a, __m128i __b) {
   return (__m128i)__builtin_ia32_pmulhw128((__v8hi)__a, (__v8hi)__b);
 }
 
@@ -2412,9 +2404,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epi16(__m128i __a,
 ///    A 128-bit unsigned [8 x i16] vector.
 /// \returns A 128-bit unsigned [8 x i16] vector containing the upper 16 bits
 ///    of each of the eight 32-bit products.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epu16(__m128i __a,
-                                                             __m128i __b) {
-  return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)__a, (__v8hi)__b);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_mulhi_epu16(__m128i __a, __m128i __b) {
+  return (__m128i)__builtin_ia32_pmulhuw128((__v8hu)__a, (__v8hu)__b);
 }
 
 /// Multiplies the corresponding elements of two signed [8 x i16]
@@ -2431,8 +2423,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epu16(__m128i __a,
 ///    A 128-bit signed [8 x i16] vector.
 /// \returns A 128-bit signed [8 x i16] vector containing the lower 16 bits of
 ///    each of the eight 32-bit products.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi16(__m128i __a,
-                                                             __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_mullo_epi16(__m128i __a, __m128i __b) {
   return (__m128i)((__v8hu)__a * (__v8hu)__b);
 }
 
@@ -2449,9 +2441,10 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi16(__m128i __a,
 /// \param __b
 ///    A 64-bit integer containing one of the source operands.
 /// \returns A 64-bit integer vector containing the product of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mul_su32(__m64 __a, __m64 __b) {
-  return __trunc64(__builtin_ia32_pmuludq128((__v4si)__anyext128(__a),
-                                             (__v4si)__anyext128(__b)));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_su32(__m64 __a,
+                                                                  __m64 __b) {
+  return __trunc64(__builtin_ia32_pmuludq128((__v4si)__zext128(__a),
+                                             (__v4si)__zext128(__b)));
 }
 
 /// Multiplies 32-bit unsigned integer values contained in the lower
@@ -2467,8 +2460,8 @@ static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mul_su32(__m64 __a, __m64 __b) {
 /// \param __b
 ///    A [2 x i64] vector containing one of the source operands.
 /// \returns A [2 x i64] vector containing the product of both operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epu32(__m128i __a,
-                                                           __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_mul_epu32(__m128i __a, __m128i __b) {
   return __builtin_ia32_pmuludq128((__v4si)__a, (__v4si)__b);
 }
 
@@ -2505,8 +2498,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sad_epu8(__m128i __a,
 ///    A 128-bit integer vector containing the subtrahends.
 /// \returns A 128-bit integer vector containing the differences of the values
 ///    in the operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8(__m128i __a,
-                                                          __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_sub_epi8(__m128i __a, __m128i __b) {
   return (__m128i)((__v16qu)__a - (__v16qu)__b);
 }
 
@@ -2522,8 +2515,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8(__m128i __a,
 ///    A 128-bit integer vector containing the subtrahends.
 /// \returns A 128-bit integer vector containing the differences of the values
 ///    in the operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi16(__m128i __a,
-                                                           __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_sub_epi16(__m128i __a, __m128i __b) {
   return (__m128i)((__v8hu)__a - (__v8hu)__b);
 }
 
@@ -2557,8 +2550,9 @@ _mm_sub_epi32(__m128i __a, __m128i __b) {
 ///    A 64-bit integer vector containing the subtrahend.
 /// \returns A 64-bit integer vector containing the difference of the values in
 ///    the operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sub_si64(__m64 __a, __m64 __b) {
-  return (__m64)((unsigned long long)__a - (unsigned long long)__b);
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_si64(__m64 __a,
+                                                                  __m64 __b) {
+  return (__m64)(((__v1du)__a)[0] - ((__v1du)__b)[0]);
 }
 
 /// Subtracts the corresponding elements of two [2 x i64] vectors.
@@ -2595,8 +2589,8 @@ _mm_sub_epi64(__m128i __a, __m128i __b) {
 ///    A 128-bit integer vector containing the subtrahends.
 /// \returns A 128-bit integer vector containing the differences of the values
 ///    in the operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi8(__m128i __a,
-                                                           __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_subs_epi8(__m128i __a, __m128i __b) {
   return (__m128i)__builtin_elementwise_sub_sat((__v16qs)__a, (__v16qs)__b);
 }
 
@@ -2617,8 +2611,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi8(__m128i __a,
 ///    A 128-bit integer vector containing the subtrahends.
 /// \returns A 128-bit integer vector containing the differences of the values
 ///    in the operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi16(__m128i __a,
-                                                            __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_subs_epi16(__m128i __a, __m128i __b) {
   return (__m128i)__builtin_elementwise_sub_sat((__v8hi)__a, (__v8hi)__b);
 }
 
@@ -2638,8 +2632,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi16(__m128i __a,
 ///    A 128-bit integer vector containing the subtrahends.
 /// \returns A 128-bit integer vector containing the unsigned integer
 ///    differences of the values in the operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu8(__m128i __a,
-                                                           __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_subs_epu8(__m128i __a, __m128i __b) {
   return (__m128i)__builtin_elementwise_sub_sat((__v16qu)__a, (__v16qu)__b);
 }
 
@@ -2659,8 +2653,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu8(__m128i __a,
 ///    A 128-bit integer vector containing the subtrahends.
 /// \returns A 128-bit integer vector containing the unsigned integer
 ///    differences of the values in the operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu16(__m128i __a,
-                                                            __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_subs_epu16(__m128i __a, __m128i __b) {
   return (__m128i)__builtin_elementwise_sub_sat((__v8hu)__a, (__v8hu)__b);
 }
 
@@ -2676,8 +2670,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu16(__m128i __a,
 ///    A 128-bit integer vector containing one of the source operands.
 /// \returns A 128-bit integer vector containing the bitwise AND of the values
 ///    in both operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_and_si128(__m128i __a,
-                                                           __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_and_si128(__m128i __a, __m128i __b) {
   return (__m128i)((__v2du)__a & (__v2du)__b);
 }
 
@@ -2695,8 +2689,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_and_si128(__m128i __a,
 ///    A 128-bit vector containing the right source operand.
 /// \returns A 128-bit integer vector containing the bitwise AND of the one's
 ///    complement of the first operand and the values in the second operand.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_andnot_si128(__m128i __a,
-                                                              __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_andnot_si128(__m128i __a, __m128i __b) {
   return (__m128i)(~(__v2du)__a & (__v2du)__b);
 }
 /// Performs a bitwise OR of two 128-bit integer vectors.
@@ -2711,8 +2705,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_andnot_si128(__m128i __a,
 ///    A 128-bit integer vector containing one of the source operands.
 /// \returns A 128-bit integer vector containing the bitwise OR of the values
 ///    in both operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_or_si128(__m128i __a,
-                                                          __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_or_si128(__m128i __a, __m128i __b) {
   return (__m128i)((__v2du)__a | (__v2du)__b);
 }
 
@@ -2728,8 +2722,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_or_si128(__m128i __a,
 ///    A 128-bit integer vector containing one of the source operands.
 /// \returns A 128-bit integer vector containing the bitwise exclusive OR of the
 ///    values in both operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_xor_si128(__m128i __a,
-                                                           __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_xor_si128(__m128i __a, __m128i __b) {
   return (__m128i)((__v2du)__a ^ (__v2du)__b);
 }
 
@@ -2751,11 +2745,11 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_xor_si128(__m128i __a,
 ///    \a a.
 /// \returns A 128-bit integer vector containing the left-shifted value.
 #define _mm_slli_si128(a, imm)                                                 \
-  ((__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a),          \
+  ((__m128i)__builtin_ia32_pslldqi128_byteshift((__v16qi)(__m128i)(a),         \
                                                 (int)(imm)))
 
 #define _mm_bslli_si128(a, imm)                                                \
-  ((__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a),          \
+  ((__m128i)__builtin_ia32_pslldqi128_byteshift((__v16qi)(__m128i)(a),         \
                                                 (int)(imm)))
 
 /// Left-shifts each 16-bit value in the 128-bit integer vector operand
@@ -2771,8 +2765,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_xor_si128(__m128i __a,
 ///    An integer value specifying the number of bits to left-shift each value
 ///    in operand \a __a.
 /// \returns A 128-bit integer vector containing the left-shifted values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi16(__m128i __a,
-                                                            int __count) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_slli_epi16(__m128i __a, int __count) {
   return (__m128i)__builtin_ia32_psllwi128((__v8hi)__a, __count);
 }
 
@@ -2789,8 +2783,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi16(__m128i __a,
 ///    A 128-bit integer vector in which bits [63:0] specify the number of bits
 ///    to left-shift each value in operand \a __a.
 /// \returns A 128-bit integer vector containing the left-shifted values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi16(__m128i __a,
-                                                           __m128i __count) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_sll_epi16(__m128i __a, __m128i __count) {
   return (__m128i)__builtin_ia32_psllw128((__v8hi)__a, (__v8hi)__count);
 }
 
@@ -2807,8 +2801,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi16(__m128i __a,
 ///    An integer value specifying the number of bits to left-shift each value
 ///    in operand \a __a.
 /// \returns A 128-bit integer vector containing the left-shifted values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi32(__m128i __a,
-                                                            int __count) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_slli_epi32(__m128i __a, int __count) {
   return (__m128i)__builtin_ia32_pslldi128((__v4si)__a, __count);
 }
 
@@ -2825,8 +2819,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi32(__m128i __a,
 ///    A 128-bit integer vector in which bits [63:0] specify the number of bits
 ///    to left-shift each value in operand \a __a.
 /// \returns A 128-bit integer vector containing the left-shifted values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi32(__m128i __a,
-                                                           __m128i __count) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_sll_epi32(__m128i __a, __m128i __count) {
   return (__m128i)__builtin_ia32_pslld128((__v4si)__a, (__v4si)__count);
 }
 
@@ -2843,8 +2837,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi32(__m128i __a,
 ///    An integer value specifying the number of bits to left-shift each value
 ///    in operand \a __a.
 /// \returns A 128-bit integer vector containing the left-shifted values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi64(__m128i __a,
-                                                            int __count) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_slli_epi64(__m128i __a, int __count) {
   return __builtin_ia32_psllqi128((__v2di)__a, __count);
 }
 
@@ -2861,8 +2855,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi64(__m128i __a,
 ///    A 128-bit integer vector in which bits [63:0] specify the number of bits
 ///    to left-shift each value in operand \a __a.
 /// \returns A 128-bit integer vector containing the left-shifted values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi64(__m128i __a,
-                                                           __m128i __count) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_sll_epi64(__m128i __a, __m128i __count) {
   return __builtin_ia32_psllq128((__v2di)__a, (__v2di)__count);
 }
 
@@ -2880,8 +2874,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi64(__m128i __a,
 ///    An integer value specifying the number of bits to right-shift each value
 ///    in operand \a __a.
 /// \returns A 128-bit integer vector containing the right-shifted values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi16(__m128i __a,
-                                                            int __count) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_srai_epi16(__m128i __a, int __count) {
   return (__m128i)__builtin_ia32_psrawi128((__v8hi)__a, __count);
 }
 
@@ -2899,8 +2893,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi16(__m128i __a,
 ///    A 128-bit integer vector in which bits [63:0] specify the number of bits
 ///    to right-shift each value in operand \a __a.
 /// \returns A 128-bit integer vector containing the right-shifted values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi16(__m128i __a,
-                                                           __m128i __count) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_sra_epi16(__m128i __a, __m128i __count) {
   return (__m128i)__builtin_ia32_psraw128((__v8hi)__a, (__v8hi)__count);
 }
 
@@ -2918,8 +2912,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi16(__m128i __a,
 ///    An integer value specifying the number of bits to right-shift each value
 ///    in operand \a __a.
 /// \returns A 128-bit integer vector containing the right-shifted values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi32(__m128i __a,
-                                                            int __count) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_srai_epi32(__m128i __a, int __count) {
   return (__m128i)__builtin_ia32_psradi128((__v4si)__a, __count);
 }
 
@@ -2937,8 +2931,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi32(__m128i __a,
 ///    A 128-bit integer vector in which bits [63:0] specify the number of bits
 ///    to right-shift each value in operand \a __a.
 /// \returns A 128-bit integer vector containing the right-shifted values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi32(__m128i __a,
-                                                           __m128i __count) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_sra_epi32(__m128i __a, __m128i __count) {
   return (__m128i)__builtin_ia32_psrad128((__v4si)__a, (__v4si)__count);
 }
 
@@ -2960,11 +2954,11 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi32(__m128i __a,
 ///    \a a.
 /// \returns A 128-bit integer vector containing the right-shifted value.
 #define _mm_srli_si128(a, imm)                                                 \
-  ((__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a),          \
+  ((__m128i)__builtin_ia32_psrldqi128_byteshift((__v16qi)(__m128i)(a),         \
                                                 (int)(imm)))
 
 #define _mm_bsrli_si128(a, imm)                                                \
-  ((__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a),          \
+  ((__m128i)__builtin_ia32_psrldqi128_byteshift((__v16qi)(__m128i)(a),         \
                                                 (int)(imm)))
 
 /// Right-shifts each of 16-bit values in the 128-bit integer vector
@@ -2980,8 +2974,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi32(__m128i __a,
 ///    An integer value specifying the number of bits to right-shift each value
 ///    in operand \a __a.
 /// \returns A 128-bit integer vector containing the right-shifted values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi16(__m128i __a,
-                                                            int __count) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_srli_epi16(__m128i __a, int __count) {
   return (__m128i)__builtin_ia32_psrlwi128((__v8hi)__a, __count);
 }
 
@@ -2998,8 +2992,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi16(__m128i __a,
 ///    A 128-bit integer vector in which bits [63:0] specify the number of bits
 ///    to right-shift each value in operand \a __a.
 /// \returns A 128-bit integer vector containing the right-shifted values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi16(__m128i __a,
-                                                           __m128i __count) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_srl_epi16(__m128i __a, __m128i __count) {
   return (__m128i)__builtin_ia32_psrlw128((__v8hi)__a, (__v8hi)__count);
 }
 
@@ -3016,8 +3010,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi16(__m128i __a,
 ///    An integer value specifying the number of bits to right-shift each value
 ///    in operand \a __a.
 /// \returns A 128-bit integer vector containing the right-shifted values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi32(__m128i __a,
-                                                            int __count) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_srli_epi32(__m128i __a, int __count) {
   return (__m128i)__builtin_ia32_psrldi128((__v4si)__a, __count);
 }
 
@@ -3034,8 +3028,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi32(__m128i __a,
 ///    A 128-bit integer vector in which bits [63:0] specify the number of bits
 ///    to right-shift each value in operand \a __a.
 /// \returns A 128-bit integer vector containing the right-shifted values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi32(__m128i __a,
-                                                           __m128i __count) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_srl_epi32(__m128i __a, __m128i __count) {
   return (__m128i)__builtin_ia32_psrld128((__v4si)__a, (__v4si)__count);
 }
 
@@ -3052,8 +3046,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi32(__m128i __a,
 ///    An integer value specifying the number of bits to right-shift each value
 ///    in operand \a __a.
 /// \returns A 128-bit integer vector containing the right-shifted values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi64(__m128i __a,
-                                                            int __count) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_srli_epi64(__m128i __a, int __count) {
   return __builtin_ia32_psrlqi128((__v2di)__a, __count);
 }
 
@@ -3070,8 +3064,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi64(__m128i __a,
 ///    A 128-bit integer vector in which bits [63:0] specify the number of bits
 ///    to right-shift each value in operand \a __a.
 /// \returns A 128-bit integer vector containing the right-shifted values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi64(__m128i __a,
-                                                           __m128i __count) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_srl_epi64(__m128i __a, __m128i __count) {
   return __builtin_ia32_psrlq128((__v2di)__a, (__v2di)__count);
 }
 
@@ -3089,8 +3083,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi64(__m128i __a,
 /// \param __b
 ///    A 128-bit integer vector.
 /// \returns A 128-bit integer vector containing the comparison results.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi8(__m128i __a,
-                                                            __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cmpeq_epi8(__m128i __a, __m128i __b) {
   return (__m128i)((__v16qi)__a == (__v16qi)__b);
 }
 
@@ -3108,8 +3102,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi8(__m128i __a,
 /// \param __b
 ///    A 128-bit integer vector.
 /// \returns A 128-bit integer vector containing the comparison results.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi16(__m128i __a,
-                                                             __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cmpeq_epi16(__m128i __a, __m128i __b) {
   return (__m128i)((__v8hi)__a == (__v8hi)__b);
 }
 
@@ -3127,8 +3121,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi16(__m128i __a,
 /// \param __b
 ///    A 128-bit integer vector.
 /// \returns A 128-bit integer vector containing the comparison results.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi32(__m128i __a,
-                                                             __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cmpeq_epi32(__m128i __a, __m128i __b) {
   return (__m128i)((__v4si)__a == (__v4si)__b);
 }
 
@@ -3147,8 +3141,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi32(__m128i __a,
 /// \param __b
 ///    A 128-bit integer vector.
 /// \returns A 128-bit integer vector containing the comparison results.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi8(__m128i __a,
-                                                            __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cmpgt_epi8(__m128i __a, __m128i __b) {
   /* This function always performs a signed comparison, but __v16qi is a char
      which may be signed or unsigned, so use __v16qs. */
   return (__m128i)((__v16qs)__a > (__v16qs)__b);
@@ -3169,8 +3163,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi8(__m128i __a,
 /// \param __b
 ///    A 128-bit integer vector.
 /// \returns A 128-bit integer vector containing the comparison results.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi16(__m128i __a,
-                                                             __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cmpgt_epi16(__m128i __a, __m128i __b) {
   return (__m128i)((__v8hi)__a > (__v8hi)__b);
 }
 
@@ -3189,8 +3183,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi16(__m128i __a,
 /// \param __b
 ///    A 128-bit integer vector.
 /// \returns A 128-bit integer vector containing the comparison results.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi32(__m128i __a,
-                                                             __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cmpgt_epi32(__m128i __a, __m128i __b) {
   return (__m128i)((__v4si)__a > (__v4si)__b);
 }
 
@@ -3209,8 +3203,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi32(__m128i __a,
 /// \param __b
 ///    A 128-bit integer vector.
 /// \returns A 128-bit integer vector containing the comparison results.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi8(__m128i __a,
-                                                            __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cmplt_epi8(__m128i __a, __m128i __b) {
   return _mm_cmpgt_epi8(__b, __a);
 }
 
@@ -3229,8 +3223,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi8(__m128i __a,
 /// \param __b
 ///    A 128-bit integer vector.
 /// \returns A 128-bit integer vector containing the comparison results.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi16(__m128i __a,
-                                                             __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cmplt_epi16(__m128i __a, __m128i __b) {
   return _mm_cmpgt_epi16(__b, __a);
 }
 
@@ -3249,8 +3243,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi16(__m128i __a,
 /// \param __b
 ///    A 128-bit integer vector.
 /// \returns A 128-bit integer vector containing the comparison results.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi32(__m128i __a,
-                                                             __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cmplt_epi32(__m128i __a, __m128i __b) {
   return _mm_cmpgt_epi32(__b, __a);
 }
 
@@ -3379,7 +3373,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttps_epi32(__m128 __a) {
 /// \param __a
 ///    A 32-bit signed integer operand.
 /// \returns A 128-bit vector of [4 x i32].
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsi32_si128(int __a) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cvtsi32_si128(int __a) {
   return __extension__(__m128i)(__v4si){__a, 0, 0, 0};
 }
 
@@ -3394,7 +3389,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsi32_si128(int __a) {
 /// \param __a
 ///    A 64-bit signed integer operand containing the value to be converted.
 /// \returns A 128-bit vector of [2 x i64] containing the converted value.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsi64_si128(long long __a) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cvtsi64_si128(long long __a) {
   return __extension__(__m128i)(__v2di){__a, 0};
 }
 
@@ -3409,7 +3405,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsi64_si128(long long __a) {
 ///    A vector of [4 x i32]. The least significant 32 bits are moved to the
 ///    destination.
 /// \returns A 32-bit signed integer containing the moved value.
-static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsi128_si32(__m128i __a) {
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cvtsi128_si32(__m128i __a) {
   __v4si __b = (__v4si)__a;
   return __b[0];
 }
@@ -3425,7 +3422,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsi128_si32(__m128i __a) {
 ///    A vector of [2 x i64]. The least significant 64 bits are moved to the
 ///    destination.
 /// \returns A 64-bit signed integer containing the moved value.
-static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvtsi128_si64(__m128i __a) {
+static __inline__ long long __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cvtsi128_si64(__m128i __a) {
   return __a[0];
 }
 
@@ -4161,8 +4159,8 @@ void _mm_mfence(void);
 ///   A 128-bit integer vector of [8 x i16]. The converted [8 x i8] values are
 ///   written to the higher 64 bits of the result.
 /// \returns A 128-bit vector of [16 x i8] containing the converted values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi16(__m128i __a,
-                                                             __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_packs_epi16(__m128i __a, __m128i __b) {
   return (__m128i)__builtin_ia32_packsswb128((__v8hi)__a, (__v8hi)__b);
 }
 
@@ -4184,8 +4182,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi16(__m128i __a,
 ///    A 128-bit integer vector of [4 x i32]. The converted [4 x i16] values
 ///    are written to the higher 64 bits of the result.
 /// \returns A 128-bit vector of [8 x i16] containing the converted values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi32(__m128i __a,
-                                                             __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_packs_epi32(__m128i __a, __m128i __b) {
   return (__m128i)__builtin_ia32_packssdw128((__v4si)__a, (__v4si)__b);
 }
 
@@ -4207,8 +4205,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi32(__m128i __a,
 ///    A 128-bit integer vector of [8 x i16]. The converted [8 x i8] values are
 ///    written to the higher 64 bits of the result.
 /// \returns A 128-bit vector of [16 x i8] containing the converted values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi16(__m128i __a,
-                                                              __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_packus_epi16(__m128i __a, __m128i __b) {
   return (__m128i)__builtin_ia32_packuswb128((__v8hi)__a, (__v8hi)__b);
 }
 
@@ -4282,7 +4280,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi16(__m128i __a,
 ///    A 128-bit integer vector containing the values with bits to be extracted.
 /// \returns The most significant bits from each 8-bit element in \a __a,
 ///    written to bits [15:0]. The other bits are assigned zeros.
-static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a) {
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_movemask_epi8(__m128i __a) {
   return __builtin_ia32_pmovmskb128((__v16qi)__a);
 }
 
@@ -4415,8 +4414,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a) {
 ///    Bits [119:112] are written to bits [111:104] of the result. \n
 ///    Bits [127:120] are written to bits [127:120] of the result.
 /// \returns A 128-bit vector of [16 x i8] containing the interleaved values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi8(__m128i __a,
-                                                               __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_unpackhi_epi8(__m128i __a, __m128i __b) {
   return (__m128i)__builtin_shufflevector(
       (__v16qi)__a, (__v16qi)__b, 8, 16 + 8, 9, 16 + 9, 10, 16 + 10, 11,
       16 + 11, 12, 16 + 12, 13, 16 + 13, 14, 16 + 14, 15, 16 + 15);
@@ -4443,8 +4442,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi8(__m128i __a,
 ///    Bits [111:96] are written to bits [95:80] of the result. \n
 ///    Bits [127:112] are written to bits [127:112] of the result.
 /// \returns A 128-bit vector of [8 x i16] containing the interleaved values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi16(__m128i __a,
-                                                                __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_unpackhi_epi16(__m128i __a, __m128i __b) {
   return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 4, 8 + 4, 5,
                                           8 + 5, 6, 8 + 6, 7, 8 + 7);
 }
@@ -4466,8 +4465,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi16(__m128i __a,
 ///    Bits [95:64] are written to bits [64:32] of the destination. \n
 ///    Bits [127:96] are written to bits [127:96] of the destination.
 /// \returns A 128-bit vector of [4 x i32] containing the interleaved values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi32(__m128i __a,
-                                                                __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_unpackhi_epi32(__m128i __a, __m128i __b) {
   return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 2, 4 + 2, 3,
                                           4 + 3);
 }
@@ -4487,8 +4486,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi32(__m128i __a,
 ///    A 128-bit vector of [2 x i64]. \n
 ///    Bits [127:64] are written to bits [127:64] of the destination.
 /// \returns A 128-bit vector of [2 x i64] containing the interleaved values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi64(__m128i __a,
-                                                                __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_unpackhi_epi64(__m128i __a, __m128i __b) {
   return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 1, 2 + 1);
 }
 
@@ -4521,8 +4520,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi64(__m128i __a,
 ///    Bits [55:48] are written to bits [111:104] of the result. \n
 ///    Bits [63:56] are written to bits [127:120] of the result.
 /// \returns A 128-bit vector of [16 x i8] containing the interleaved values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi8(__m128i __a,
-                                                               __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_unpacklo_epi8(__m128i __a, __m128i __b) {
   return (__m128i)__builtin_shufflevector(
       (__v16qi)__a, (__v16qi)__b, 0, 16 + 0, 1, 16 + 1, 2, 16 + 2, 3, 16 + 3, 4,
       16 + 4, 5, 16 + 5, 6, 16 + 6, 7, 16 + 7);
@@ -4550,8 +4549,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi8(__m128i __a,
 ///    Bits [47:32] are written to bits [95:80] of the result. \n
 ///    Bits [63:48] are written to bits [127:112] of the result.
 /// \returns A 128-bit vector of [8 x i16] containing the interleaved values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi16(__m128i __a,
-                                                                __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_unpacklo_epi16(__m128i __a, __m128i __b) {
   return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 0, 8 + 0, 1,
                                           8 + 1, 2, 8 + 2, 3, 8 + 3);
 }
@@ -4573,8 +4572,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi16(__m128i __a,
 ///    Bits [31:0] are written to bits [64:32] of the destination. \n
 ///    Bits [63:32] are written to bits [127:96] of the destination.
 /// \returns A 128-bit vector of [4 x i32] containing the interleaved values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi32(__m128i __a,
-                                                                __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_unpacklo_epi32(__m128i __a, __m128i __b) {
   return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 0, 4 + 0, 1,
                                           4 + 1);
 }
@@ -4594,8 +4593,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi32(__m128i __a,
 ///    A 128-bit vector of [2 x i64]. \n
 ///    Bits [63:0] are written to bits [127:64] of the destination. \n
 /// \returns A 128-bit vector of [2 x i64] containing the interleaved values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi64(__m128i __a,
-                                                                __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_unpacklo_epi64(__m128i __a, __m128i __b) {
   return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 0, 2 + 0);
 }
 
@@ -4701,7 +4700,8 @@ _mm_unpacklo_pd(__m128d __a, __m128d __b) {
 ///    be extracted.
 /// \returns The sign bits from each of the double-precision elements in \a __a,
 ///    written to bits [1:0]. The remaining bits are assigned values of zero.
-static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_pd(__m128d __a) {
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_movemask_pd(__m128d __a) {
   return __builtin_ia32_movmskpd((__v2df)__a);
 }
 
diff --git a/lib/include/f16cintrin.h b/lib/include/f16cintrin.h
index 94a662c1d9..b6ca7088d3 100644
--- a/lib/include/f16cintrin.h
+++ b/lib/include/f16cintrin.h
@@ -15,10 +15,21 @@
 #define __F16CINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS128 \
-  __attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(128)))
-#define __DEFAULT_FN_ATTRS256 \
-  __attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(256)))
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS128                                                  \
+  __attribute__((__always_inline__, __nodebug__, __target__("f16c"),           \
+                 __min_vector_width__(128))) constexpr
+#define __DEFAULT_FN_ATTRS256                                                  \
+  __attribute__((__always_inline__, __nodebug__, __target__("f16c"),           \
+                 __min_vector_width__(256))) constexpr
+#else
+#define __DEFAULT_FN_ATTRS128                                                  \
+  __attribute__((__always_inline__, __nodebug__, __target__("f16c"),           \
+                 __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS256                                                  \
+  __attribute__((__always_inline__, __nodebug__, __target__("f16c"),           \
+                 __min_vector_width__(256)))
+#endif
 
 /* NOTE: Intel documents the 128-bit versions of these as being in emmintrin.h,
  * but that's because icc can emulate these without f16c using a library call.
@@ -38,9 +49,7 @@
 static __inline float __DEFAULT_FN_ATTRS128
 _cvtsh_ss(unsigned short __a)
 {
-  __v8hi __v = {(short)__a, 0, 0, 0, 0, 0, 0, 0};
-  __v4sf __r = __builtin_ia32_vcvtph2ps(__v);
-  return __r[0];
+  return (float)__builtin_bit_cast(__fp16, __a);
 }
 
 /// Converts a 32-bit single-precision float value to a 16-bit
@@ -109,7 +118,10 @@ _cvtsh_ss(unsigned short __a)
 static __inline __m128 __DEFAULT_FN_ATTRS128
 _mm_cvtph_ps(__m128i __a)
 {
-  return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__a);
+  typedef __fp16 __v4fp16 __attribute__((__vector_size__(8)));
+
+  __v4hi __v = __builtin_shufflevector((__v8hi)__a, (__v8hi)__a, 0, 1, 2, 3);
+  return (__m128) __builtin_convertvector((__v4fp16)__v, __v4sf);
 }
 
 /// Converts a 256-bit vector of [8 x float] into a 128-bit vector
@@ -153,7 +165,9 @@ _mm_cvtph_ps(__m128i __a)
 static __inline __m256 __DEFAULT_FN_ATTRS256
 _mm256_cvtph_ps(__m128i __a)
 {
-  return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a);
+  typedef __fp16 __v8fp16 __attribute__((__vector_size__(16), __aligned__(16)));
+
+  return (__m256) __builtin_convertvector((__v8fp16)__a, __v8sf);
 }
 
 #undef __DEFAULT_FN_ATTRS128
diff --git a/lib/include/float.h b/lib/include/float.h
index 84551af473..82974f6004 100644
--- a/lib/include/float.h
+++ b/lib/include/float.h
@@ -7,13 +7,21 @@
  *===-----------------------------------------------------------------------===
  */
 
-#ifndef __CLANG_FLOAT_H
-#define __CLANG_FLOAT_H
-
 #if defined(__MVS__) && __has_include_next(<float.h>)
+#include <__float_header_macro.h>
 #include_next <float.h>
 #else
 
+#if !defined(__need_infinity_nan)
+#define __need_float_float
+#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L) ||              \
+    !defined(__STRICT_ANSI__)
+#define __need_infinity_nan
+#endif
+#include <__float_header_macro.h>
+#endif
+
+#ifdef __need_float_float
 /* If we're on MinGW, fall back to the system's float.h, which might have
  * additional definitions provided for Windows.
  * For more details see http://msdn.microsoft.com/en-us/library/y0ybw9fy.aspx
@@ -26,162 +34,15 @@
 
 #  include_next <float.h>
 
-/* Undefine anything that we'll be redefining below. */
-#  undef FLT_EVAL_METHOD
-#  undef FLT_ROUNDS
-#  undef FLT_RADIX
-#  undef FLT_MANT_DIG
-#  undef DBL_MANT_DIG
-#  undef LDBL_MANT_DIG
-#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) ||              \
-    !defined(__STRICT_ANSI__) ||                                               \
-    (defined(__cplusplus) && __cplusplus >= 201103L) ||                        \
-    (__STDC_HOSTED__ && defined(_AIX) && defined(_ALL_SOURCE))
-#    undef DECIMAL_DIG
-#  endif
-#  undef FLT_DIG
-#  undef DBL_DIG
-#  undef LDBL_DIG
-#  undef FLT_MIN_EXP
-#  undef DBL_MIN_EXP
-#  undef LDBL_MIN_EXP
-#  undef FLT_MIN_10_EXP
-#  undef DBL_MIN_10_EXP
-#  undef LDBL_MIN_10_EXP
-#  undef FLT_MAX_EXP
-#  undef DBL_MAX_EXP
-#  undef LDBL_MAX_EXP
-#  undef FLT_MAX_10_EXP
-#  undef DBL_MAX_10_EXP
-#  undef LDBL_MAX_10_EXP
-#  undef FLT_MAX
-#  undef DBL_MAX
-#  undef LDBL_MAX
-#  undef FLT_EPSILON
-#  undef DBL_EPSILON
-#  undef LDBL_EPSILON
-#  undef FLT_MIN
-#  undef DBL_MIN
-#  undef LDBL_MIN
-#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) ||              \
-    !defined(__STRICT_ANSI__) ||                                               \
-    (defined(__cplusplus) && __cplusplus >= 201703L) ||                        \
-    (__STDC_HOSTED__ && defined(_AIX) && defined(_ALL_SOURCE))
-#    undef FLT_TRUE_MIN
-#    undef DBL_TRUE_MIN
-#    undef LDBL_TRUE_MIN
-#    undef FLT_DECIMAL_DIG
-#    undef DBL_DECIMAL_DIG
-#    undef LDBL_DECIMAL_DIG
-#    undef FLT_HAS_SUBNORM
-#    undef DBL_HAS_SUBNORM
-#    undef LDBL_HAS_SUBNORM
-#  endif
-#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L) ||              \
-    !defined(__STRICT_ANSI__)
-#    undef FLT_NORM_MAX
-#    undef DBL_NORM_MAX
-#    undef LDBL_NORM_MAX
-#endif
 #endif
 
-#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L) ||              \
-    !defined(__STRICT_ANSI__)
-#  undef INFINITY
-#  undef NAN
+#include <__float_float.h>
+#undef __need_float_float
 #endif
 
-/* Characteristics of floating point types, C99 5.2.4.2.2 */
-
-#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) ||              \
-    (defined(__cplusplus) && __cplusplus >= 201103L)
-#define FLT_EVAL_METHOD __FLT_EVAL_METHOD__
+#ifdef __need_infinity_nan
+#include <__float_infinity_nan.h>
+#undef __need_infinity_nan
 #endif
-#define FLT_ROUNDS (__builtin_flt_rounds())
-#define FLT_RADIX __FLT_RADIX__
-
-#define FLT_MANT_DIG __FLT_MANT_DIG__
-#define DBL_MANT_DIG __DBL_MANT_DIG__
-#define LDBL_MANT_DIG __LDBL_MANT_DIG__
-
-#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) ||              \
-    !defined(__STRICT_ANSI__) ||                                               \
-    (defined(__cplusplus) && __cplusplus >= 201103L) ||                        \
-    (__STDC_HOSTED__ && defined(_AIX) && defined(_ALL_SOURCE))
-#  define DECIMAL_DIG __DECIMAL_DIG__
-#endif
-
-#define FLT_DIG __FLT_DIG__
-#define DBL_DIG __DBL_DIG__
-#define LDBL_DIG __LDBL_DIG__
-
-#define FLT_MIN_EXP __FLT_MIN_EXP__
-#define DBL_MIN_EXP __DBL_MIN_EXP__
-#define LDBL_MIN_EXP __LDBL_MIN_EXP__
-
-#define FLT_MIN_10_EXP __FLT_MIN_10_EXP__
-#define DBL_MIN_10_EXP __DBL_MIN_10_EXP__
-#define LDBL_MIN_10_EXP __LDBL_MIN_10_EXP__
-
-#define FLT_MAX_EXP __FLT_MAX_EXP__
-#define DBL_MAX_EXP __DBL_MAX_EXP__
-#define LDBL_MAX_EXP __LDBL_MAX_EXP__
-
-#define FLT_MAX_10_EXP __FLT_MAX_10_EXP__
-#define DBL_MAX_10_EXP __DBL_MAX_10_EXP__
-#define LDBL_MAX_10_EXP __LDBL_MAX_10_EXP__
-
-#define FLT_MAX __FLT_MAX__
-#define DBL_MAX __DBL_MAX__
-#define LDBL_MAX __LDBL_MAX__
-
-#define FLT_EPSILON __FLT_EPSILON__
-#define DBL_EPSILON __DBL_EPSILON__
-#define LDBL_EPSILON __LDBL_EPSILON__
-
-#define FLT_MIN __FLT_MIN__
-#define DBL_MIN __DBL_MIN__
-#define LDBL_MIN __LDBL_MIN__
-
-#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) ||              \
-    !defined(__STRICT_ANSI__) ||                                               \
-    (defined(__cplusplus) && __cplusplus >= 201703L) ||                        \
-    (__STDC_HOSTED__ && defined(_AIX) && defined(_ALL_SOURCE))
-#  define FLT_TRUE_MIN __FLT_DENORM_MIN__
-#  define DBL_TRUE_MIN __DBL_DENORM_MIN__
-#  define LDBL_TRUE_MIN __LDBL_DENORM_MIN__
-#  define FLT_DECIMAL_DIG __FLT_DECIMAL_DIG__
-#  define DBL_DECIMAL_DIG __DBL_DECIMAL_DIG__
-#  define LDBL_DECIMAL_DIG __LDBL_DECIMAL_DIG__
-#  define FLT_HAS_SUBNORM __FLT_HAS_DENORM__
-#  define DBL_HAS_SUBNORM __DBL_HAS_DENORM__
-#  define LDBL_HAS_SUBNORM __LDBL_HAS_DENORM__
-#endif
-
-#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L) ||              \
-    !defined(__STRICT_ANSI__)
-   /* C23 5.2.5.3.3p29-30 */
-#  define INFINITY (__builtin_inff())
-#  define NAN (__builtin_nanf(""))
-   /* C23 5.2.5.3.3p32 */
-#  define FLT_NORM_MAX __FLT_NORM_MAX__
-#  define DBL_NORM_MAX __DBL_NORM_MAX__
-#  define LDBL_NORM_MAX __LDBL_NORM_MAX__
-#endif
-
-#ifdef __STDC_WANT_IEC_60559_TYPES_EXT__
-#  define FLT16_MANT_DIG    __FLT16_MANT_DIG__
-#  define FLT16_DECIMAL_DIG __FLT16_DECIMAL_DIG__
-#  define FLT16_DIG         __FLT16_DIG__
-#  define FLT16_MIN_EXP     __FLT16_MIN_EXP__
-#  define FLT16_MIN_10_EXP  __FLT16_MIN_10_EXP__
-#  define FLT16_MAX_EXP     __FLT16_MAX_EXP__
-#  define FLT16_MAX_10_EXP  __FLT16_MAX_10_EXP__
-#  define FLT16_MAX         __FLT16_MAX__
-#  define FLT16_EPSILON     __FLT16_EPSILON__
-#  define FLT16_MIN         __FLT16_MIN__
-#  define FLT16_TRUE_MIN    __FLT16_TRUE_MIN__
-#endif /* __STDC_WANT_IEC_60559_TYPES_EXT__ */
 
 #endif /* __MVS__ */
-#endif /* __CLANG_FLOAT_H */
diff --git a/lib/include/fma4intrin.h b/lib/include/fma4intrin.h
index 694801b3e8..20b8030b77 100644
--- a/lib/include/fma4intrin.h
+++ b/lib/include/fma4intrin.h
@@ -20,100 +20,100 @@
 #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("fma4"), __min_vector_width__(128)))
 #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("fma4"), __min_vector_width__(256)))
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_macc_ps(__m128 __A, __m128 __B, __m128 __C)
-{
-  return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
+#else
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
+#endif
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_macc_ps(__m128 __A, __m128 __B, __m128 __C) {
+  return (__m128)__builtin_elementwise_fma((__v4sf)__A, (__v4sf)__B,
+                                           (__v4sf)__C);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_macc_pd(__m128d __A, __m128d __B, __m128d __C)
-{
-  return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_macc_pd(__m128d __A, __m128d __B, __m128d __C) {
+  return (__m128d)__builtin_elementwise_fma((__v2df)__A, (__v2df)__B,
+                                            (__v2df)__C);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_macc_ss(__m128 __A, __m128 __B, __m128 __C)
-{
-  return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_macc_ss(__m128 __A, __m128 __B, __m128 __C) {
+  return _mm_set_ss(__builtin_elementwise_fma(__A[0], __B[0], __C[0]));
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_macc_sd(__m128d __A, __m128d __B, __m128d __C)
-{
-  return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_macc_sd(__m128d __A, __m128d __B, __m128d __C) {
+  return _mm_set_sd(__builtin_elementwise_fma(__A[0], __B[0], __C[0]));
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_msub_ps(__m128 __A, __m128 __B, __m128 __C)
-{
-  return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_msub_ps(__m128 __A, __m128 __B, __m128 __C) {
+  return (__m128)__builtin_elementwise_fma((__v4sf)__A, (__v4sf)__B,
+                                           -(__v4sf)__C);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_msub_pd(__m128d __A, __m128d __B, __m128d __C)
-{
-  return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_msub_pd(__m128d __A, __m128d __B, __m128d __C) {
+  return (__m128d)__builtin_elementwise_fma((__v2df)__A, (__v2df)__B,
+                                            -(__v2df)__C);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_msub_ss(__m128 __A, __m128 __B, __m128 __C)
-{
-  return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_msub_ss(__m128 __A, __m128 __B, __m128 __C) {
+  return _mm_set_ss(__builtin_elementwise_fma(__A[0], __B[0], -__C[0]));
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_msub_sd(__m128d __A, __m128d __B, __m128d __C)
-{
-  return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_msub_sd(__m128d __A, __m128d __B, __m128d __C) {
+  return _mm_set_sd(__builtin_elementwise_fma(__A[0], __B[0], -__C[0]));
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C)
-{
-  return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C) {
+  return (__m128)__builtin_elementwise_fma(-(__v4sf)__A, (__v4sf)__B,
+                                           (__v4sf)__C);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C)
-{
-  return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C) {
+  return (__m128d)__builtin_elementwise_fma(-(__v2df)__A, (__v2df)__B,
+                                            (__v2df)__C);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C)
-{
-  return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C) {
+  return _mm_set_ss(__builtin_elementwise_fma(-__A[0], __B[0], __C[0]));
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C)
-{
-  return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C) {
+  return _mm_set_sd(__builtin_elementwise_fma(-__A[0], __B[0], __C[0]));
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C)
-{
-  return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C) {
+  return (__m128)__builtin_elementwise_fma(-(__v4sf)__A, (__v4sf)__B,
+                                           -(__v4sf)__C);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C)
-{
-  return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C) {
+  return (__m128d)__builtin_elementwise_fma(-(__v2df)__A, (__v2df)__B,
+                                            -(__v2df)__C);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C)
-{
-  return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C) {
+  return _mm_set_ss(__builtin_elementwise_fma(-__A[0], __B[0], -__C[0]));
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C)
-{
-  return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C) {
+  return _mm_set_sd(__builtin_elementwise_fma(-__A[0], __B[0], -__C[0]));
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS128
@@ -140,52 +140,52 @@ _mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C)
   return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C)
-{
-  return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C) {
+  return (__m256)__builtin_elementwise_fma((__v8sf)__A, (__v8sf)__B,
+                                           (__v8sf)__C);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C)
-{
-  return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C) {
+  return (__m256d)__builtin_elementwise_fma((__v4df)__A, (__v4df)__B,
+                                            (__v4df)__C);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C)
-{
-  return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C) {
+  return (__m256)__builtin_elementwise_fma((__v8sf)__A, (__v8sf)__B,
+                                           -(__v8sf)__C);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C)
-{
-  return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C) {
+  return (__m256d)__builtin_elementwise_fma((__v4df)__A, (__v4df)__B,
+                                            -(__v4df)__C);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C)
-{
-  return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C) {
+  return (__m256)__builtin_elementwise_fma(-(__v8sf)__A, (__v8sf)__B,
+                                           (__v8sf)__C);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C)
-{
-  return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C);
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C) {
+  return (__m256d)__builtin_elementwise_fma(-(__v4df)__A, (__v4df)__B,
+                                            (__v4df)__C);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C)
-{
-  return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C) {
+  return (__m256)__builtin_elementwise_fma(-(__v8sf)__A, (__v8sf)__B,
+                                           -(__v8sf)__C);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C)
-{
-  return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C);
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C) {
+  return (__m256d)__builtin_elementwise_fma(-(__v4df)__A, (__v4df)__B,
+                                            -(__v4df)__C);
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS256
@@ -214,5 +214,7 @@ _mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C)
 
 #undef __DEFAULT_FN_ATTRS128
 #undef __DEFAULT_FN_ATTRS256
+#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
+#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
 
 #endif /* __FMA4INTRIN_H */
diff --git a/lib/include/fmaintrin.h b/lib/include/fmaintrin.h
index 22d1a780bb..eba527f360 100644
--- a/lib/include/fmaintrin.h
+++ b/lib/include/fmaintrin.h
@@ -15,8 +15,20 @@
 #define __FMAINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(128)))
-#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(256)))
+#define __DEFAULT_FN_ATTRS128                                                  \
+  __attribute__((__always_inline__, __nodebug__, __target__("fma"),            \
+                 __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS256                                                  \
+  __attribute__((__always_inline__, __nodebug__, __target__("fma"),            \
+                 __min_vector_width__(256)))
+
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
+#else
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
+#endif
 
 /// Computes a multiply-add of 128-bit vectors of [4 x float].
 ///    For each element, computes <c> (__A * __B) + __C </c>.
@@ -32,10 +44,11 @@
 /// \param __C
 ///    A 128-bit vector of [4 x float] containing the addend.
 /// \returns A 128-bit vector of [4 x float] containing the result.
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+  return (__m128)__builtin_elementwise_fma((__v4sf)__A, (__v4sf)__B,
+                                           (__v4sf)__C);
 }
 
 /// Computes a multiply-add of 128-bit vectors of [2 x double].
@@ -52,10 +65,11 @@ _mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C)
 /// \param __C
 ///    A 128-bit vector of [2 x double] containing the addend.
 /// \returns A 128-bit [2 x double] vector containing the result.
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
+  return (__m128d)__builtin_elementwise_fma((__v2df)__A, (__v2df)__B,
+                                            (__v2df)__C);
 }
 
 /// Computes a scalar multiply-add of the single-precision values in the
@@ -81,10 +95,10 @@ _mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C)
 ///    32 bits.
 /// \returns A 128-bit vector of [4 x float] containing the result in the low
 ///    32 bits and a copy of \a __A[127:32] in the upper 96 bits.
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C)
-{
-  return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C) {
+  __A[0] = __builtin_elementwise_fma(__A[0], __B[0], __C[0]);
+  return __A;
 }
 
 /// Computes a scalar multiply-add of the double-precision values in the
@@ -110,10 +124,10 @@ _mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C)
 ///    64 bits.
 /// \returns A 128-bit vector of [2 x double] containing the result in the low
 ///    64 bits and a copy of \a __A[127:64] in the upper 64 bits.
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C)
-{
-  return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C) {
+  __A[0] = __builtin_elementwise_fma(__A[0], __B[0], __C[0]);
+  return __A;
 }
 
 /// Computes a multiply-subtract of 128-bit vectors of [4 x float].
@@ -130,10 +144,11 @@ _mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C)
 /// \param __C
 ///    A 128-bit vector of [4 x float] containing the subtrahend.
 /// \returns A 128-bit vector of [4 x float] containing the result.
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
+  return (__m128)__builtin_elementwise_fma((__v4sf)__A, (__v4sf)__B,
+                                           -(__v4sf)__C);
 }
 
 /// Computes a multiply-subtract of 128-bit vectors of [2 x double].
@@ -150,10 +165,11 @@ _mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C)
 /// \param __C
 ///    A 128-bit vector of [2 x double] containing the addend.
 /// \returns A 128-bit vector of [2 x double] containing the result.
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
+  return (__m128d)__builtin_elementwise_fma((__v2df)__A, (__v2df)__B,
+                                            -(__v2df)__C);
 }
 
 /// Computes a scalar multiply-subtract of the single-precision values in
@@ -179,10 +195,10 @@ _mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C)
 ///   32 bits.
 /// \returns A 128-bit vector of [4 x float] containing the result in the low
 ///    32 bits, and a copy of \a __A[127:32] in the upper 96 bits.
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C)
-{
-  return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C) {
+  __A[0] = __builtin_elementwise_fma(__A[0], __B[0], -__C[0]);
+  return __A;
 }
 
 /// Computes a scalar multiply-subtract of the double-precision values in
@@ -208,10 +224,10 @@ _mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C)
 ///    64 bits.
 /// \returns A 128-bit vector of [2 x double] containing the result in the low
 ///    64 bits, and a copy of \a __A[127:64] in the upper 64 bits.
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C)
-{
-  return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, -(__v2df)__C);
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C) {
+  __A[0] = __builtin_elementwise_fma(__A[0], __B[0], -__C[0]);
+  return __A;
 }
 
 /// Computes a negated multiply-add of 128-bit vectors of [4 x float].
@@ -228,10 +244,11 @@ _mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C)
 /// \param __C
 ///    A 128-bit vector of [4 x float] containing the addend.
 /// \returns A 128-bit [4 x float] vector containing the result.
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+  return (__m128)__builtin_elementwise_fma(-(__v4sf)__A, (__v4sf)__B,
+                                           (__v4sf)__C);
 }
 
 /// Computes a negated multiply-add of 128-bit vectors of [2 x double].
@@ -248,10 +265,11 @@ _mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C)
 /// \param __C
 ///    A 128-bit vector of [2 x double] containing the addend.
 /// \returns A 128-bit vector of [2 x double] containing the result.
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);
+  return (__m128d)__builtin_elementwise_fma(-(__v2df)__A, (__v2df)__B,
+                                            (__v2df)__C);
 }
 
 /// Computes a scalar negated multiply-add of the single-precision values in
@@ -277,10 +295,10 @@ _mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C)
 ///    32 bits.
 /// \returns A 128-bit vector of [4 x float] containing the result in the low
 ///    32 bits, and a copy of \a __A[127:32] in the upper 96 bits.
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C)
-{
-  return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, (__v4sf)__C);
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C) {
+  __A[0] = __builtin_elementwise_fma(__A[0], -__B[0], __C[0]);
+  return __A;
 }
 
 /// Computes a scalar negated multiply-add of the double-precision values
@@ -306,10 +324,10 @@ _mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C)
 ///    64 bits.
 /// \returns A 128-bit vector of [2 x double] containing the result in the low
 ///    64 bits, and a copy of \a __A[127:64] in the upper 64 bits.
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C)
-{
-  return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, (__v2df)__C);
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C) {
+  __A[0] = __builtin_elementwise_fma(__A[0], -__B[0], __C[0]);
+  return __A;
 }
 
 /// Computes a negated multiply-subtract of 128-bit vectors of [4 x float].
@@ -326,10 +344,11 @@ _mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C)
 /// \param __C
 ///    A 128-bit vector of [4 x float] containing the subtrahend.
 /// \returns A 128-bit vector of [4 x float] containing the result.
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
+  return (__m128)__builtin_elementwise_fma(-(__v4sf)__A, (__v4sf)__B,
+                                           -(__v4sf)__C);
 }
 
 /// Computes a negated multiply-subtract of 128-bit vectors of [2 x double].
@@ -346,10 +365,11 @@ _mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C)
 /// \param __C
 ///    A 128-bit vector of [2 x double] containing the subtrahend.
 /// \returns A 128-bit vector of [2 x double] containing the result.
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
+  return (__m128d)__builtin_elementwise_fma(-(__v2df)__A, (__v2df)__B,
+                                            -(__v2df)__C);
 }
 
 /// Computes a scalar negated multiply-subtract of the single-precision
@@ -375,10 +395,10 @@ _mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C)
 ///    32 bits.
 /// \returns A 128-bit vector of [4 x float] containing the result in the low
 ///    32 bits, and a copy of \a __A[127:32] in the upper 96 bits.
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C)
-{
-  return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C);
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C) {
+  __A[0] = __builtin_elementwise_fma(__A[0], -__B[0], -__C[0]);
+  return __A;
 }
 
 /// Computes a scalar negated multiply-subtract of the double-precision
@@ -404,10 +424,10 @@ _mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C)
 ///    64 bits.
 /// \returns A 128-bit vector of [2 x double] containing the result in the low
 ///    64 bits, and a copy of \a __A[127:64] in the upper 64 bits.
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C)
-{
-  return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, -(__v2df)__C);
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C) {
+  __A[0] = __builtin_elementwise_fma(__A[0], -__B[0], -__C[0]);
+  return __A;
 }
 
 /// Computes a multiply with alternating add/subtract of 128-bit vectors of
@@ -528,10 +548,11 @@ _mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C)
 /// \param __C
 ///    A 256-bit vector of [8 x float] containing the addend.
 /// \returns A 256-bit vector of [8 x float] containing the result.
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C)
 {
-  return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
+  return (__m256)__builtin_elementwise_fma((__v8sf)__A, (__v8sf)__B,
+                                           (__v8sf)__C);
 }
 
 /// Computes a multiply-add of 256-bit vectors of [4 x double].
@@ -548,10 +569,11 @@ _mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C)
 /// \param __C
 ///    A 256-bit vector of [4 x double] containing the addend.
 /// \returns A 256-bit vector of [4 x double] containing the result.
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C)
 {
-  return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
+  return (__m256d)__builtin_elementwise_fma((__v4df)__A, (__v4df)__B,
+                                            (__v4df)__C);
 }
 
 /// Computes a multiply-subtract of 256-bit vectors of [8 x float].
@@ -568,10 +590,11 @@ _mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C)
 /// \param __C
 ///    A 256-bit vector of [8 x float] containing the subtrahend.
 /// \returns A 256-bit vector of [8 x float] containing the result.
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C)
 {
-  return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
+  return (__m256)__builtin_elementwise_fma((__v8sf)__A, (__v8sf)__B,
+                                           -(__v8sf)__C);
 }
 
 /// Computes a multiply-subtract of 256-bit vectors of [4 x double].
@@ -588,10 +611,11 @@ _mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C)
 /// \param __C
 ///    A 256-bit vector of [4 x double] containing the subtrahend.
 /// \returns A 256-bit vector of [4 x double] containing the result.
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C)
 {
-  return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
+  return (__m256d)__builtin_elementwise_fma((__v4df)__A, (__v4df)__B,
+                                            -(__v4df)__C);
 }
 
 /// Computes a negated multiply-add of 256-bit vectors of [8 x float].
@@ -608,10 +632,11 @@ _mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C)
 /// \param __C
 ///    A 256-bit vector of [8 x float] containing the addend.
 /// \returns A 256-bit vector of [8 x float] containing the result.
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C)
 {
-  return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
+  return (__m256)__builtin_elementwise_fma(-(__v8sf)__A, (__v8sf)__B,
+                                           (__v8sf)__C);
 }
 
 /// Computes a negated multiply-add of 256-bit vectors of [4 x double].
@@ -628,10 +653,11 @@ _mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C)
 /// \param __C
 ///    A 256-bit vector of [4 x double] containing the addend.
 /// \returns A 256-bit vector of [4 x double] containing the result.
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C)
 {
-  return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C);
+  return (__m256d)__builtin_elementwise_fma(-(__v4df)__A, (__v4df)__B,
+                                            (__v4df)__C);
 }
 
 /// Computes a negated multiply-subtract of 256-bit vectors of [8 x float].
@@ -648,10 +674,11 @@ _mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C)
 /// \param __C
 ///    A 256-bit vector of [8 x float] containing the subtrahend.
 /// \returns A 256-bit vector of [8 x float] containing the result.
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C)
 {
-  return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
+  return (__m256)__builtin_elementwise_fma(-(__v8sf)__A, (__v8sf)__B,
+                                           -(__v8sf)__C);
 }
 
 /// Computes a negated multiply-subtract of 256-bit vectors of [4 x double].
@@ -668,10 +695,11 @@ _mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C)
 /// \param __C
 ///    A 256-bit vector of [4 x double] containing the subtrahend.
 /// \returns A 256-bit vector of [4 x double] containing the result.
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C)
 {
-  return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C);
+  return (__m256d)__builtin_elementwise_fma(-(__v4df)__A, (__v4df)__B,
+                                            -(__v4df)__C);
 }
 
 /// Computes a multiply with alternating add/subtract of 256-bit vectors of
@@ -792,5 +820,7 @@ _mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C)
 
 #undef __DEFAULT_FN_ATTRS128
 #undef __DEFAULT_FN_ATTRS256
+#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
+#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
 
 #endif /* __FMAINTRIN_H */
diff --git a/lib/include/gfniintrin.h b/lib/include/gfniintrin.h
index 9a5743d4b6..2c559f13c6 100644
--- a/lib/include/gfniintrin.h
+++ b/lib/include/gfniintrin.h
@@ -14,29 +14,36 @@
 #ifndef __GFNIINTRIN_H
 #define __GFNIINTRIN_H
 
-#if defined(__EVEX512__) && !defined(__AVX10_1_512__)
 /* Default attributes for simple form (no masking). */
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
 #define __DEFAULT_FN_ATTRS                                                     \
-  __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("gfni,no-evex512"), __min_vector_width__(128)))
+  __attribute__((__always_inline__, __nodebug__, __target__("gfni"),           \
+                 __min_vector_width__(128))) constexpr
 
 /* Default attributes for YMM unmasked form. */
 #define __DEFAULT_FN_ATTRS_Y                                                   \
-  __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx,gfni,no-evex512"),                            \
-                 __min_vector_width__(256)))
+  __attribute__((__always_inline__, __nodebug__, __target__("avx,gfni"),       \
+                 __min_vector_width__(256))) constexpr
 
 /* Default attributes for VLX masked forms. */
 #define __DEFAULT_FN_ATTRS_VL128                                               \
   __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx512bw,avx512vl,gfni,no-evex512"),              \
-                 __min_vector_width__(128)))
+                 __target__("avx512bw,avx512vl,gfni"),                         \
+                 __min_vector_width__(128))) constexpr
 #define __DEFAULT_FN_ATTRS_VL256                                               \
   __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx512bw,avx512vl,gfni,no-evex512"),              \
-                 __min_vector_width__(256)))
+                 __target__("avx512bw,avx512vl,gfni"),                         \
+                 __min_vector_width__(256))) constexpr
+
+/* Default attributes for ZMM unmasked forms. */
+#define __DEFAULT_FN_ATTRS_Z                                                   \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx512f,gfni"),   \
+                 __min_vector_width__(512))) constexpr
+/* Default attributes for ZMM masked forms. */
+#define __DEFAULT_FN_ATTRS_Z_MASK                                              \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,gfni"),  \
+                 __min_vector_width__(512))) constexpr
 #else
-/* Default attributes for simple form (no masking). */
 #define __DEFAULT_FN_ATTRS                                                     \
   __attribute__((__always_inline__, __nodebug__, __target__("gfni"),           \
                  __min_vector_width__(128)))
@@ -55,18 +62,16 @@
   __attribute__((__always_inline__, __nodebug__,                               \
                  __target__("avx512bw,avx512vl,gfni"),                         \
                  __min_vector_width__(256)))
-#endif
 
 /* Default attributes for ZMM unmasked forms. */
 #define __DEFAULT_FN_ATTRS_Z                                                   \
-  __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx512f,evex512,gfni"),                           \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx512f,gfni"),   \
                  __min_vector_width__(512)))
 /* Default attributes for ZMM masked forms. */
 #define __DEFAULT_FN_ATTRS_Z_MASK                                              \
-  __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx512bw,evex512,gfni"),                          \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,gfni"),  \
                  __min_vector_width__(512)))
+#endif
 
 #define _mm_gf2p8affineinv_epi64_epi8(A, B, I) \
   ((__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A), \
diff --git a/lib/include/hexagon_types.h b/lib/include/hexagon_types.h
index 8e73fad4bc..54e8c1dd69 100644
--- a/lib/include/hexagon_types.h
+++ b/lib/include/hexagon_types.h
@@ -11,6 +11,11 @@
 
 #include <hexagon_protos.h>
 
+// Save and undefine B0 to avoid conflicts with POSIX termios.h which
+// defines B0 as a macro for baud rate 0.
+#pragma push_macro("B0")
+#undef B0
+
 /* Hexagon names */
 #define HEXAGON_Vect HEXAGON_Vect64
 #define HEXAGON_V_GET_D HEXAGON_V64_GET_D
@@ -697,9 +702,8 @@ public:
   };
 
   // Extract byte methods
-  signed char B0(void) {
-    return HEXAGON_V64_GET_B0(data);
-  };
+  signed char b0(void) { return HEXAGON_V64_GET_B0(data); };
+  signed char B0(void) { return b0(); };
   signed char B1(void) {
     return HEXAGON_V64_GET_B1(data);
   };
@@ -776,9 +780,10 @@ public:
   };
 
   // Set byte methods
-  HEXAGON_Vect64C B0(signed char b) {
+  HEXAGON_Vect64C b0(signed char b) {
     return HEXAGON_Vect64C(HEXAGON_V64_PUT_B0(data, b));
   };
+  HEXAGON_Vect64C B0(signed char b) { return b0(b); };
   HEXAGON_Vect64C B1(signed char b) {
     return HEXAGON_Vect64C(HEXAGON_V64_PUT_B1(data, b));
   };
@@ -1121,9 +1126,8 @@ public:
   };
 
   // Extract byte methods
-  signed char B0(void) {
-    return HEXAGON_V32_GET_B0(data);
-  };
+  signed char b0(void) { return HEXAGON_V32_GET_B0(data); };
+  signed char B0(void) { return b0(); };
   signed char B1(void) {
     return HEXAGON_V32_GET_B1(data);
   };
@@ -1162,9 +1166,10 @@ public:
   };
 
   // Set byte methods
-  HEXAGON_Vect32C B0(signed char b) {
+  HEXAGON_Vect32C b0(signed char b) {
     return HEXAGON_Vect32C(HEXAGON_V32_PUT_B0(data, b));
   };
+  HEXAGON_Vect32C B0(signed char b) { return b0(b); };
   HEXAGON_Vect32C B1(signed char b) {
     return HEXAGON_Vect32C(HEXAGON_V32_PUT_B1(data, b));
   };
@@ -1924,9 +1929,8 @@ public:
   };
 
   // Extract byte methods
-  signed char B0(void) {
-    return Q6V64_GET_B0(data);
-  };
+  signed char b0(void) { return Q6V64_GET_B0(data); };
+  signed char B0(void) { return b0(); };
   signed char B1(void) {
     return Q6V64_GET_B1(data);
   };
@@ -2003,9 +2007,8 @@ public:
   };
 
   // Set byte methods
-  Q6Vect64C B0(signed char b) {
-    return Q6Vect64C(Q6V64_PUT_B0(data, b));
-  };
+  Q6Vect64C b0(signed char b) { return Q6Vect64C(Q6V64_PUT_B0(data, b)); };
+  Q6Vect64C B0(signed char b) { return b0(b); };
   Q6Vect64C B1(signed char b) {
     return Q6Vect64C(Q6V64_PUT_B1(data, b));
   };
@@ -2348,9 +2351,8 @@ public:
   };
 
   // Extract byte methods
-  signed char B0(void) {
-    return Q6V32_GET_B0(data);
-  };
+  signed char b0(void) { return Q6V32_GET_B0(data); };
+  signed char B0(void) { return b0(); };
   signed char B1(void) {
     return Q6V32_GET_B1(data);
   };
@@ -2389,9 +2391,8 @@ public:
   };
 
   // Set byte methods
-  Q6Vect32C B0(signed char b) {
-    return Q6Vect32C(Q6V32_PUT_B0(data, b));
-  };
+  Q6Vect32C b0(signed char b) { return Q6Vect32C(Q6V32_PUT_B0(data, b)); };
+  Q6Vect32C B0(signed char b) { return b0(b); };
   Q6Vect32C B1(signed char b) {
     return Q6Vect32C(Q6V32_PUT_B1(data, b));
   };
@@ -2622,4 +2623,6 @@ typedef struct hexagon_udma_descriptor_type1_s
     unsigned int dstwidthoffset:16;
 } hexagon_udma_descriptor_type1_t;
 
+#pragma pop_macro("B0")
+
 #endif /* !HEXAGON_TYPES_H */
diff --git a/lib/include/hvx_hexagon_protos.h b/lib/include/hvx_hexagon_protos.h
index fd120a589f..981fbd1a12 100644
--- a/lib/include/hvx_hexagon_protos.h
+++ b/lib/include/hvx_hexagon_protos.h
@@ -19,7 +19,6 @@
 #define __BUILTIN_VECTOR_WRAP(a) a
 #endif
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Rd32=vextract(Vu32,Rs32)
    C Intrinsic Prototype: Word32 Q6_R_vextract_VR(HVX_Vector Vu, Word32 Rs)
@@ -28,9 +27,7 @@
    ========================================================================== */
 
 #define Q6_R_vextract_VR(Vu,Rs) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_extractw)(Vu,Rs)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32=hi(Vss32)
    C Intrinsic Prototype: HVX_Vector Q6_V_hi_W(HVX_VectorPair Vss)
@@ -39,9 +36,7 @@
    ========================================================================== */
 
 #define Q6_V_hi_W(Vss) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_hi)(Vss)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32=lo(Vss32)
    C Intrinsic Prototype: HVX_Vector Q6_V_lo_W(HVX_VectorPair Vss)
@@ -50,9 +45,7 @@
    ========================================================================== */
 
 #define Q6_V_lo_W(Vss) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_lo)(Vss)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32=vsplat(Rt32)
    C Intrinsic Prototype: HVX_Vector Q6_V_vsplat_R(Word32 Rt)
@@ -61,9 +54,7 @@
    ========================================================================== */
 
 #define Q6_V_vsplat_R(Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_lvsplatw)(Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Qd4=and(Qs4,Qt4)
    C Intrinsic Prototype: HVX_VectorPred Q6_Q_and_QQ(HVX_VectorPred Qs, HVX_VectorPred Qt)
@@ -72,9 +63,7 @@
    ========================================================================== */
 
 #define Q6_Q_and_QQ(Qs,Qt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1))),-1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Qd4=and(Qs4,!Qt4)
    C Intrinsic Prototype: HVX_VectorPred Q6_Q_and_QQn(HVX_VectorPred Qs, HVX_VectorPred Qt)
@@ -83,9 +72,7 @@
    ========================================================================== */
 
 #define Q6_Q_and_QQn(Qs,Qt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_and_n)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1))),-1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Qd4=not(Qs4)
    C Intrinsic Prototype: HVX_VectorPred Q6_Q_not_Q(HVX_VectorPred Qs)
@@ -94,9 +81,7 @@
    ========================================================================== */
 
 #define Q6_Q_not_Q(Qs) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_not)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1))),-1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Qd4=or(Qs4,Qt4)
    C Intrinsic Prototype: HVX_VectorPred Q6_Q_or_QQ(HVX_VectorPred Qs, HVX_VectorPred Qt)
@@ -105,9 +90,7 @@
    ========================================================================== */
 
 #define Q6_Q_or_QQ(Qs,Qt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1))),-1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Qd4=or(Qs4,!Qt4)
    C Intrinsic Prototype: HVX_VectorPred Q6_Q_or_QQn(HVX_VectorPred Qs, HVX_VectorPred Qt)
@@ -116,9 +99,7 @@
    ========================================================================== */
 
 #define Q6_Q_or_QQn(Qs,Qt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_or_n)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1))),-1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Qd4=vsetq(Rt32)
    C Intrinsic Prototype: HVX_VectorPred Q6_Q_vsetq_R(Word32 Rt)
@@ -127,9 +108,7 @@
    ========================================================================== */
 
 #define Q6_Q_vsetq_R(Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_scalar2)(Rt)),-1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Qd4=xor(Qs4,Qt4)
    C Intrinsic Prototype: HVX_VectorPred Q6_Q_xor_QQ(HVX_VectorPred Qs, HVX_VectorPred Qt)
@@ -138,9 +117,7 @@
    ========================================================================== */
 
 #define Q6_Q_xor_QQ(Qs,Qt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1))),-1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       if (!Qv4) vmem(Rt32+#s4)=Vs32
    C Intrinsic Prototype: void Q6_vmem_QnRIV(HVX_VectorPred Qv, HVX_Vector* Rt, HVX_Vector Vs)
@@ -149,9 +126,7 @@
    ========================================================================== */
 
 #define Q6_vmem_QnRIV(Qv,Rt,Vs) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vS32b_nqpred_ai)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Rt,Vs)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       if (!Qv4) vmem(Rt32+#s4):nt=Vs32
    C Intrinsic Prototype: void Q6_vmem_QnRIV_nt(HVX_VectorPred Qv, HVX_Vector* Rt, HVX_Vector Vs)
@@ -160,9 +135,7 @@
    ========================================================================== */
 
 #define Q6_vmem_QnRIV_nt(Qv,Rt,Vs) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vS32b_nt_nqpred_ai)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Rt,Vs)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       if (Qv4) vmem(Rt32+#s4):nt=Vs32
    C Intrinsic Prototype: void Q6_vmem_QRIV_nt(HVX_VectorPred Qv, HVX_Vector* Rt, HVX_Vector Vs)
@@ -171,9 +144,7 @@
    ========================================================================== */
 
 #define Q6_vmem_QRIV_nt(Qv,Rt,Vs) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vS32b_nt_qpred_ai)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Rt,Vs)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       if (Qv4) vmem(Rt32+#s4)=Vs32
    C Intrinsic Prototype: void Q6_vmem_QRIV(HVX_VectorPred Qv, HVX_Vector* Rt, HVX_Vector Vs)
@@ -182,9 +153,7 @@
    ========================================================================== */
 
 #define Q6_vmem_QRIV(Qv,Rt,Vs) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vS32b_qpred_ai)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Rt,Vs)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.uh=vabsdiff(Vu32.h,Vv32.h)
    C Intrinsic Prototype: HVX_Vector Q6_Vuh_vabsdiff_VhVh(HVX_Vector Vu, HVX_Vector Vv)
@@ -193,9 +162,7 @@
    ========================================================================== */
 
 #define Q6_Vuh_vabsdiff_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsdiffh)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.ub=vabsdiff(Vu32.ub,Vv32.ub)
    C Intrinsic Prototype: HVX_Vector Q6_Vub_vabsdiff_VubVub(HVX_Vector Vu, HVX_Vector Vv)
@@ -204,9 +171,7 @@
    ========================================================================== */
 
 #define Q6_Vub_vabsdiff_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsdiffub)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.uh=vabsdiff(Vu32.uh,Vv32.uh)
    C Intrinsic Prototype: HVX_Vector Q6_Vuh_vabsdiff_VuhVuh(HVX_Vector Vu, HVX_Vector Vv)
@@ -215,9 +180,7 @@
    ========================================================================== */
 
 #define Q6_Vuh_vabsdiff_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsdiffuh)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.uw=vabsdiff(Vu32.w,Vv32.w)
    C Intrinsic Prototype: HVX_Vector Q6_Vuw_vabsdiff_VwVw(HVX_Vector Vu, HVX_Vector Vv)
@@ -226,9 +189,7 @@
    ========================================================================== */
 
 #define Q6_Vuw_vabsdiff_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsdiffw)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.h=vabs(Vu32.h)
    C Intrinsic Prototype: HVX_Vector Q6_Vh_vabs_Vh(HVX_Vector Vu)
@@ -237,9 +198,7 @@
    ========================================================================== */
 
 #define Q6_Vh_vabs_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsh)(Vu)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.h=vabs(Vu32.h):sat
    C Intrinsic Prototype: HVX_Vector Q6_Vh_vabs_Vh_sat(HVX_Vector Vu)
@@ -248,9 +207,7 @@
    ========================================================================== */
 
 #define Q6_Vh_vabs_Vh_sat(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsh_sat)(Vu)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.w=vabs(Vu32.w)
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vabs_Vw(HVX_Vector Vu)
@@ -259,9 +216,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vabs_Vw(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsw)(Vu)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.w=vabs(Vu32.w):sat
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vabs_Vw_sat(HVX_Vector Vu)
@@ -270,9 +225,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vabs_Vw_sat(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsw_sat)(Vu)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.b=vadd(Vu32.b,Vv32.b)
    C Intrinsic Prototype: HVX_Vector Q6_Vb_vadd_VbVb(HVX_Vector Vu, HVX_Vector Vv)
@@ -281,9 +234,7 @@
    ========================================================================== */
 
 #define Q6_Vb_vadd_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddb)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.b=vadd(Vuu32.b,Vvv32.b)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wb_vadd_WbWb(HVX_VectorPair Vuu, HVX_VectorPair Vvv)
@@ -292,9 +243,7 @@
    ========================================================================== */
 
 #define Q6_Wb_vadd_WbWb(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddb_dv)(Vuu,Vvv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       if (!Qv4) Vx32.b+=Vu32.b
    C Intrinsic Prototype: HVX_Vector Q6_Vb_condacc_QnVbVb(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu)
@@ -303,9 +252,7 @@
    ========================================================================== */
 
 #define Q6_Vb_condacc_QnVbVb(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddbnq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       if (Qv4) Vx32.b+=Vu32.b
    C Intrinsic Prototype: HVX_Vector Q6_Vb_condacc_QVbVb(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu)
@@ -314,9 +261,7 @@
    ========================================================================== */
 
 #define Q6_Vb_condacc_QVbVb(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddbq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.h=vadd(Vu32.h,Vv32.h)
    C Intrinsic Prototype: HVX_Vector Q6_Vh_vadd_VhVh(HVX_Vector Vu, HVX_Vector Vv)
@@ -325,9 +270,7 @@
    ========================================================================== */
 
 #define Q6_Vh_vadd_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddh)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.h=vadd(Vuu32.h,Vvv32.h)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vadd_WhWh(HVX_VectorPair Vuu, HVX_VectorPair Vvv)
@@ -336,9 +279,7 @@
    ========================================================================== */
 
 #define Q6_Wh_vadd_WhWh(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddh_dv)(Vuu,Vvv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       if (!Qv4) Vx32.h+=Vu32.h
    C Intrinsic Prototype: HVX_Vector Q6_Vh_condacc_QnVhVh(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu)
@@ -347,9 +288,7 @@
    ========================================================================== */
 
 #define Q6_Vh_condacc_QnVhVh(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhnq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       if (Qv4) Vx32.h+=Vu32.h
    C Intrinsic Prototype: HVX_Vector Q6_Vh_condacc_QVhVh(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu)
@@ -358,9 +297,7 @@
    ========================================================================== */
 
 #define Q6_Vh_condacc_QVhVh(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.h=vadd(Vu32.h,Vv32.h):sat
    C Intrinsic Prototype: HVX_Vector Q6_Vh_vadd_VhVh_sat(HVX_Vector Vu, HVX_Vector Vv)
@@ -369,9 +306,7 @@
    ========================================================================== */
 
 #define Q6_Vh_vadd_VhVh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhsat)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.h=vadd(Vuu32.h,Vvv32.h):sat
    C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vadd_WhWh_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv)
@@ -380,9 +315,7 @@
    ========================================================================== */
 
 #define Q6_Wh_vadd_WhWh_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhsat_dv)(Vuu,Vvv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.w=vadd(Vu32.h,Vv32.h)
    C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vadd_VhVh(HVX_Vector Vu, HVX_Vector Vv)
@@ -391,9 +324,7 @@
    ========================================================================== */
 
 #define Q6_Ww_vadd_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhw)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.h=vadd(Vu32.ub,Vv32.ub)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vadd_VubVub(HVX_Vector Vu, HVX_Vector Vv)
@@ -402,9 +333,7 @@
    ========================================================================== */
 
 #define Q6_Wh_vadd_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddubh)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.ub=vadd(Vu32.ub,Vv32.ub):sat
    C Intrinsic Prototype: HVX_Vector Q6_Vub_vadd_VubVub_sat(HVX_Vector Vu, HVX_Vector Vv)
@@ -413,9 +342,7 @@
    ========================================================================== */
 
 #define Q6_Vub_vadd_VubVub_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddubsat)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.ub=vadd(Vuu32.ub,Vvv32.ub):sat
    C Intrinsic Prototype: HVX_VectorPair Q6_Wub_vadd_WubWub_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv)
@@ -424,9 +351,7 @@
    ========================================================================== */
 
 #define Q6_Wub_vadd_WubWub_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddubsat_dv)(Vuu,Vvv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.uh=vadd(Vu32.uh,Vv32.uh):sat
    C Intrinsic Prototype: HVX_Vector Q6_Vuh_vadd_VuhVuh_sat(HVX_Vector Vu, HVX_Vector Vv)
@@ -435,9 +360,7 @@
    ========================================================================== */
 
 #define Q6_Vuh_vadd_VuhVuh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduhsat)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.uh=vadd(Vuu32.uh,Vvv32.uh):sat
    C Intrinsic Prototype: HVX_VectorPair Q6_Wuh_vadd_WuhWuh_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv)
@@ -446,9 +369,7 @@
    ========================================================================== */
 
 #define Q6_Wuh_vadd_WuhWuh_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduhsat_dv)(Vuu,Vvv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.w=vadd(Vu32.uh,Vv32.uh)
    C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vadd_VuhVuh(HVX_Vector Vu, HVX_Vector Vv)
@@ -457,9 +378,7 @@
    ========================================================================== */
 
 #define Q6_Ww_vadd_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduhw)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.w=vadd(Vu32.w,Vv32.w)
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vadd_VwVw(HVX_Vector Vu, HVX_Vector Vv)
@@ -468,9 +387,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vadd_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddw)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.w=vadd(Vuu32.w,Vvv32.w)
    C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vadd_WwWw(HVX_VectorPair Vuu, HVX_VectorPair Vvv)
@@ -479,9 +396,7 @@
    ========================================================================== */
 
 #define Q6_Ww_vadd_WwWw(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddw_dv)(Vuu,Vvv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       if (!Qv4) Vx32.w+=Vu32.w
    C Intrinsic Prototype: HVX_Vector Q6_Vw_condacc_QnVwVw(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu)
@@ -490,9 +405,7 @@
    ========================================================================== */
 
 #define Q6_Vw_condacc_QnVwVw(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddwnq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       if (Qv4) Vx32.w+=Vu32.w
    C Intrinsic Prototype: HVX_Vector Q6_Vw_condacc_QVwVw(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu)
@@ -501,9 +414,7 @@
    ========================================================================== */
 
 #define Q6_Vw_condacc_QVwVw(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddwq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.w=vadd(Vu32.w,Vv32.w):sat
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vadd_VwVw_sat(HVX_Vector Vu, HVX_Vector Vv)
@@ -512,9 +423,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vadd_VwVw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddwsat)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.w=vadd(Vuu32.w,Vvv32.w):sat
    C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vadd_WwWw_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv)
@@ -523,9 +432,7 @@
    ========================================================================== */
 
 #define Q6_Ww_vadd_WwWw_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddwsat_dv)(Vuu,Vvv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32=valign(Vu32,Vv32,Rt8)
    C Intrinsic Prototype: HVX_Vector Q6_V_valign_VVR(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt)
@@ -534,9 +441,7 @@
    ========================================================================== */
 
 #define Q6_V_valign_VVR(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_valignb)(Vu,Vv,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32=valign(Vu32,Vv32,#u3)
    C Intrinsic Prototype: HVX_Vector Q6_V_valign_VVI(HVX_Vector Vu, HVX_Vector Vv, Word32 Iu3)
@@ -545,9 +450,7 @@
    ========================================================================== */
 
 #define Q6_V_valign_VVI(Vu,Vv,Iu3) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_valignbi)(Vu,Vv,Iu3)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32=vand(Vu32,Vv32)
    C Intrinsic Prototype: HVX_Vector Q6_V_vand_VV(HVX_Vector Vu, HVX_Vector Vv)
@@ -556,9 +459,7 @@
    ========================================================================== */
 
 #define Q6_V_vand_VV(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vand)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32=vand(Qu4,Rt32)
    C Intrinsic Prototype: HVX_Vector Q6_V_vand_QR(HVX_VectorPred Qu, Word32 Rt)
@@ -567,9 +468,7 @@
    ========================================================================== */
 
 #define Q6_V_vand_QR(Qu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qu),-1),Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vx32|=vand(Qu4,Rt32)
    C Intrinsic Prototype: HVX_Vector Q6_V_vandor_VQR(HVX_Vector Vx, HVX_VectorPred Qu, Word32 Rt)
@@ -578,9 +477,7 @@
    ========================================================================== */
 
 #define Q6_V_vandor_VQR(Vx,Qu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt_acc)(Vx,__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qu),-1),Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Qd4=vand(Vu32,Rt32)
    C Intrinsic Prototype: HVX_VectorPred Q6_Q_vand_VR(HVX_Vector Vu, Word32 Rt)
@@ -589,9 +486,7 @@
    ========================================================================== */
 
 #define Q6_Q_vand_VR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)(Vu,Rt)),-1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Qx4|=vand(Vu32,Rt32)
    C Intrinsic Prototype: HVX_VectorPred Q6_Q_vandor_QVR(HVX_VectorPred Qx, HVX_Vector Vu, Word32 Rt)
@@ -600,9 +495,7 @@
    ========================================================================== */
 
 #define Q6_Q_vandor_QVR(Qx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt_acc)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Rt)),-1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.h=vasl(Vu32.h,Rt32)
    C Intrinsic Prototype: HVX_Vector Q6_Vh_vasl_VhR(HVX_Vector Vu, Word32 Rt)
@@ -611,9 +504,7 @@
    ========================================================================== */
 
 #define Q6_Vh_vasl_VhR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslh)(Vu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.h=vasl(Vu32.h,Vv32.h)
    C Intrinsic Prototype: HVX_Vector Q6_Vh_vasl_VhVh(HVX_Vector Vu, HVX_Vector Vv)
@@ -622,9 +513,7 @@
    ========================================================================== */
 
 #define Q6_Vh_vasl_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslhv)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.w=vasl(Vu32.w,Rt32)
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vasl_VwR(HVX_Vector Vu, Word32 Rt)
@@ -633,9 +522,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vasl_VwR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslw)(Vu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vx32.w+=vasl(Vu32.w,Rt32)
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vaslacc_VwVwR(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt)
@@ -644,9 +531,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vaslacc_VwVwR(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslw_acc)(Vx,Vu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.w=vasl(Vu32.w,Vv32.w)
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vasl_VwVw(HVX_Vector Vu, HVX_Vector Vv)
@@ -655,9 +540,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vasl_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslwv)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.h=vasr(Vu32.h,Rt32)
    C Intrinsic Prototype: HVX_Vector Q6_Vh_vasr_VhR(HVX_Vector Vu, Word32 Rt)
@@ -666,9 +549,7 @@
    ========================================================================== */
 
 #define Q6_Vh_vasr_VhR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrh)(Vu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.b=vasr(Vu32.h,Vv32.h,Rt8):rnd:sat
    C Intrinsic Prototype: HVX_Vector Q6_Vb_vasr_VhVhR_rnd_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt)
@@ -677,9 +558,7 @@
    ========================================================================== */
 
 #define Q6_Vb_vasr_VhVhR_rnd_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhbrndsat)(Vu,Vv,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.ub=vasr(Vu32.h,Vv32.h,Rt8):rnd:sat
    C Intrinsic Prototype: HVX_Vector Q6_Vub_vasr_VhVhR_rnd_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt)
@@ -688,9 +567,7 @@
    ========================================================================== */
 
 #define Q6_Vub_vasr_VhVhR_rnd_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhubrndsat)(Vu,Vv,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.ub=vasr(Vu32.h,Vv32.h,Rt8):sat
    C Intrinsic Prototype: HVX_Vector Q6_Vub_vasr_VhVhR_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt)
@@ -699,9 +576,7 @@
    ========================================================================== */
 
 #define Q6_Vub_vasr_VhVhR_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhubsat)(Vu,Vv,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.h=vasr(Vu32.h,Vv32.h)
    C Intrinsic Prototype: HVX_Vector Q6_Vh_vasr_VhVh(HVX_Vector Vu, HVX_Vector Vv)
@@ -710,9 +585,7 @@
    ========================================================================== */
 
 #define Q6_Vh_vasr_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhv)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.w=vasr(Vu32.w,Rt32)
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vasr_VwR(HVX_Vector Vu, Word32 Rt)
@@ -721,9 +594,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vasr_VwR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrw)(Vu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vx32.w+=vasr(Vu32.w,Rt32)
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vasracc_VwVwR(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt)
@@ -732,9 +603,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vasracc_VwVwR(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrw_acc)(Vx,Vu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.h=vasr(Vu32.w,Vv32.w,Rt8)
    C Intrinsic Prototype: HVX_Vector Q6_Vh_vasr_VwVwR(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt)
@@ -743,9 +612,7 @@
    ========================================================================== */
 
 #define Q6_Vh_vasr_VwVwR(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwh)(Vu,Vv,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.h=vasr(Vu32.w,Vv32.w,Rt8):rnd:sat
    C Intrinsic Prototype: HVX_Vector Q6_Vh_vasr_VwVwR_rnd_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt)
@@ -754,9 +621,7 @@
    ========================================================================== */
 
 #define Q6_Vh_vasr_VwVwR_rnd_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwhrndsat)(Vu,Vv,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.h=vasr(Vu32.w,Vv32.w,Rt8):sat
    C Intrinsic Prototype: HVX_Vector Q6_Vh_vasr_VwVwR_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt)
@@ -765,9 +630,7 @@
    ========================================================================== */
 
 #define Q6_Vh_vasr_VwVwR_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwhsat)(Vu,Vv,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.uh=vasr(Vu32.w,Vv32.w,Rt8):sat
    C Intrinsic Prototype: HVX_Vector Q6_Vuh_vasr_VwVwR_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt)
@@ -776,9 +639,7 @@
    ========================================================================== */
 
 #define Q6_Vuh_vasr_VwVwR_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwuhsat)(Vu,Vv,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.w=vasr(Vu32.w,Vv32.w)
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vasr_VwVw(HVX_Vector Vu, HVX_Vector Vv)
@@ -787,9 +648,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vasr_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwv)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32=Vu32
    C Intrinsic Prototype: HVX_Vector Q6_V_equals_V(HVX_Vector Vu)
@@ -798,9 +657,7 @@
    ========================================================================== */
 
 #define Q6_V_equals_V(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vassign)(Vu)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32=Vuu32
    C Intrinsic Prototype: HVX_VectorPair Q6_W_equals_W(HVX_VectorPair Vuu)
@@ -809,9 +666,7 @@
    ========================================================================== */
 
 #define Q6_W_equals_W(Vuu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vassignp)(Vuu)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.h=vavg(Vu32.h,Vv32.h)
    C Intrinsic Prototype: HVX_Vector Q6_Vh_vavg_VhVh(HVX_Vector Vu, HVX_Vector Vv)
@@ -820,9 +675,7 @@
    ========================================================================== */
 
 #define Q6_Vh_vavg_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgh)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.h=vavg(Vu32.h,Vv32.h):rnd
    C Intrinsic Prototype: HVX_Vector Q6_Vh_vavg_VhVh_rnd(HVX_Vector Vu, HVX_Vector Vv)
@@ -831,9 +684,7 @@
    ========================================================================== */
 
 #define Q6_Vh_vavg_VhVh_rnd(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavghrnd)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.ub=vavg(Vu32.ub,Vv32.ub)
    C Intrinsic Prototype: HVX_Vector Q6_Vub_vavg_VubVub(HVX_Vector Vu, HVX_Vector Vv)
@@ -842,9 +693,7 @@
    ========================================================================== */
 
 #define Q6_Vub_vavg_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgub)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.ub=vavg(Vu32.ub,Vv32.ub):rnd
    C Intrinsic Prototype: HVX_Vector Q6_Vub_vavg_VubVub_rnd(HVX_Vector Vu, HVX_Vector Vv)
@@ -853,9 +702,7 @@
    ========================================================================== */
 
 #define Q6_Vub_vavg_VubVub_rnd(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgubrnd)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.uh=vavg(Vu32.uh,Vv32.uh)
    C Intrinsic Prototype: HVX_Vector Q6_Vuh_vavg_VuhVuh(HVX_Vector Vu, HVX_Vector Vv)
@@ -864,9 +711,7 @@
    ========================================================================== */
 
 #define Q6_Vuh_vavg_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavguh)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.uh=vavg(Vu32.uh,Vv32.uh):rnd
    C Intrinsic Prototype: HVX_Vector Q6_Vuh_vavg_VuhVuh_rnd(HVX_Vector Vu, HVX_Vector Vv)
@@ -875,9 +720,7 @@
    ========================================================================== */
 
 #define Q6_Vuh_vavg_VuhVuh_rnd(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavguhrnd)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.w=vavg(Vu32.w,Vv32.w)
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vavg_VwVw(HVX_Vector Vu, HVX_Vector Vv)
@@ -886,9 +729,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vavg_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgw)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.w=vavg(Vu32.w,Vv32.w):rnd
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vavg_VwVw_rnd(HVX_Vector Vu, HVX_Vector Vv)
@@ -897,9 +738,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vavg_VwVw_rnd(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgwrnd)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.uh=vcl0(Vu32.uh)
    C Intrinsic Prototype: HVX_Vector Q6_Vuh_vcl0_Vuh(HVX_Vector Vu)
@@ -908,9 +747,7 @@
    ========================================================================== */
 
 #define Q6_Vuh_vcl0_Vuh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcl0h)(Vu)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.uw=vcl0(Vu32.uw)
    C Intrinsic Prototype: HVX_Vector Q6_Vuw_vcl0_Vuw(HVX_Vector Vu)
@@ -919,9 +756,7 @@
    ========================================================================== */
 
 #define Q6_Vuw_vcl0_Vuw(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcl0w)(Vu)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32=vcombine(Vu32,Vv32)
    C Intrinsic Prototype: HVX_VectorPair Q6_W_vcombine_VV(HVX_Vector Vu, HVX_Vector Vv)
@@ -930,9 +765,7 @@
    ========================================================================== */
 
 #define Q6_W_vcombine_VV(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcombine)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32=#0
    C Intrinsic Prototype: HVX_Vector Q6_V_vzero()
@@ -941,9 +774,7 @@
    ========================================================================== */
 
 #define Q6_V_vzero() __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vd0)()
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.b=vdeal(Vu32.b)
    C Intrinsic Prototype: HVX_Vector Q6_Vb_vdeal_Vb(HVX_Vector Vu)
@@ -952,9 +783,7 @@
    ========================================================================== */
 
 #define Q6_Vb_vdeal_Vb(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdealb)(Vu)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.b=vdeale(Vu32.b,Vv32.b)
    C Intrinsic Prototype: HVX_Vector Q6_Vb_vdeale_VbVb(HVX_Vector Vu, HVX_Vector Vv)
@@ -963,9 +792,7 @@
    ========================================================================== */
 
 #define Q6_Vb_vdeale_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdealb4w)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.h=vdeal(Vu32.h)
    C Intrinsic Prototype: HVX_Vector Q6_Vh_vdeal_Vh(HVX_Vector Vu)
@@ -974,9 +801,7 @@
    ========================================================================== */
 
 #define Q6_Vh_vdeal_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdealh)(Vu)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32=vdeal(Vu32,Vv32,Rt8)
    C Intrinsic Prototype: HVX_VectorPair Q6_W_vdeal_VVR(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt)
@@ -985,9 +810,7 @@
    ========================================================================== */
 
 #define Q6_W_vdeal_VVR(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdealvdd)(Vu,Vv,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32=vdelta(Vu32,Vv32)
    C Intrinsic Prototype: HVX_Vector Q6_V_vdelta_VV(HVX_Vector Vu, HVX_Vector Vv)
@@ -996,9 +819,7 @@
    ========================================================================== */
 
 #define Q6_V_vdelta_VV(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdelta)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.h=vdmpy(Vu32.ub,Rt32.b)
    C Intrinsic Prototype: HVX_Vector Q6_Vh_vdmpy_VubRb(HVX_Vector Vu, Word32 Rt)
@@ -1007,9 +828,7 @@
    ========================================================================== */
 
 #define Q6_Vh_vdmpy_VubRb(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpybus)(Vu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vx32.h+=vdmpy(Vu32.ub,Rt32.b)
    C Intrinsic Prototype: HVX_Vector Q6_Vh_vdmpyacc_VhVubRb(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt)
@@ -1018,9 +837,7 @@
    ========================================================================== */
 
 #define Q6_Vh_vdmpyacc_VhVubRb(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpybus_acc)(Vx,Vu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.h=vdmpy(Vuu32.ub,Rt32.b)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vdmpy_WubRb(HVX_VectorPair Vuu, Word32 Rt)
@@ -1029,9 +846,7 @@
    ========================================================================== */
 
 #define Q6_Wh_vdmpy_WubRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpybus_dv)(Vuu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vxx32.h+=vdmpy(Vuu32.ub,Rt32.b)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vdmpyacc_WhWubRb(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt)
@@ -1040,9 +855,7 @@
    ========================================================================== */
 
 #define Q6_Wh_vdmpyacc_WhWubRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpybus_dv_acc)(Vxx,Vuu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.w=vdmpy(Vu32.h,Rt32.b)
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpy_VhRb(HVX_Vector Vu, Word32 Rt)
@@ -1051,9 +864,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vdmpy_VhRb(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhb)(Vu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vx32.w+=vdmpy(Vu32.h,Rt32.b)
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpyacc_VwVhRb(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt)
@@ -1062,9 +873,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vdmpyacc_VwVhRb(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhb_acc)(Vx,Vu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.w=vdmpy(Vuu32.h,Rt32.b)
    C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vdmpy_WhRb(HVX_VectorPair Vuu, Word32 Rt)
@@ -1073,9 +882,7 @@
    ========================================================================== */
 
 #define Q6_Ww_vdmpy_WhRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhb_dv)(Vuu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vxx32.w+=vdmpy(Vuu32.h,Rt32.b)
    C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vdmpyacc_WwWhRb(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt)
@@ -1084,9 +891,7 @@
    ========================================================================== */
 
 #define Q6_Ww_vdmpyacc_WwWhRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhb_dv_acc)(Vxx,Vuu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.w=vdmpy(Vuu32.h,Rt32.h):sat
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpy_WhRh_sat(HVX_VectorPair Vuu, Word32 Rt)
@@ -1095,9 +900,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vdmpy_WhRh_sat(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhisat)(Vuu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vx32.w+=vdmpy(Vuu32.h,Rt32.h):sat
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpyacc_VwWhRh_sat(HVX_Vector Vx, HVX_VectorPair Vuu, Word32 Rt)
@@ -1106,9 +909,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vdmpyacc_VwWhRh_sat(Vx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhisat_acc)(Vx,Vuu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.w=vdmpy(Vu32.h,Rt32.h):sat
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpy_VhRh_sat(HVX_Vector Vu, Word32 Rt)
@@ -1117,9 +918,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vdmpy_VhRh_sat(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsat)(Vu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vx32.w+=vdmpy(Vu32.h,Rt32.h):sat
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpyacc_VwVhRh_sat(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt)
@@ -1128,9 +927,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vdmpyacc_VwVhRh_sat(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsat_acc)(Vx,Vu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.w=vdmpy(Vuu32.h,Rt32.uh,#1):sat
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpy_WhRuh_sat(HVX_VectorPair Vuu, Word32 Rt)
@@ -1139,9 +936,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vdmpy_WhRuh_sat(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsuisat)(Vuu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vx32.w+=vdmpy(Vuu32.h,Rt32.uh,#1):sat
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpyacc_VwWhRuh_sat(HVX_Vector Vx, HVX_VectorPair Vuu, Word32 Rt)
@@ -1150,9 +945,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vdmpyacc_VwWhRuh_sat(Vx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsuisat_acc)(Vx,Vuu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.w=vdmpy(Vu32.h,Rt32.uh):sat
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpy_VhRuh_sat(HVX_Vector Vu, Word32 Rt)
@@ -1161,9 +954,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vdmpy_VhRuh_sat(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsusat)(Vu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vx32.w+=vdmpy(Vu32.h,Rt32.uh):sat
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpyacc_VwVhRuh_sat(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt)
@@ -1172,9 +963,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vdmpyacc_VwVhRuh_sat(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsusat_acc)(Vx,Vu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.w=vdmpy(Vu32.h,Vv32.h):sat
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpy_VhVh_sat(HVX_Vector Vu, HVX_Vector Vv)
@@ -1183,9 +972,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vdmpy_VhVh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhvsat)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vx32.w+=vdmpy(Vu32.h,Vv32.h):sat
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpyacc_VwVhVh_sat(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv)
@@ -1194,9 +981,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vdmpyacc_VwVhVh_sat(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhvsat_acc)(Vx,Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.uw=vdsad(Vuu32.uh,Rt32.uh)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vdsad_WuhRuh(HVX_VectorPair Vuu, Word32 Rt)
@@ -1205,9 +990,7 @@
    ========================================================================== */
 
 #define Q6_Wuw_vdsad_WuhRuh(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdsaduh)(Vuu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vxx32.uw+=vdsad(Vuu32.uh,Rt32.uh)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vdsadacc_WuwWuhRuh(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt)
@@ -1216,9 +999,7 @@
    ========================================================================== */
 
 #define Q6_Wuw_vdsadacc_WuwWuhRuh(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdsaduh_acc)(Vxx,Vuu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Qd4=vcmp.eq(Vu32.b,Vv32.b)
    C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eq_VbVb(HVX_Vector Vu, HVX_Vector Vv)
@@ -1227,9 +1008,7 @@
    ========================================================================== */
 
 #define Q6_Q_vcmp_eq_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqb)(Vu,Vv)),-1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Qx4&=vcmp.eq(Vu32.b,Vv32.b)
    C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqand_QVbVb(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
@@ -1238,9 +1017,7 @@
    ========================================================================== */
 
 #define Q6_Q_vcmp_eqand_QVbVb(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqb_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Qx4|=vcmp.eq(Vu32.b,Vv32.b)
    C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqor_QVbVb(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
@@ -1249,9 +1026,7 @@
    ========================================================================== */
 
 #define Q6_Q_vcmp_eqor_QVbVb(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqb_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Qx4^=vcmp.eq(Vu32.b,Vv32.b)
    C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqxacc_QVbVb(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
@@ -1260,9 +1035,7 @@
    ========================================================================== */
 
 #define Q6_Q_vcmp_eqxacc_QVbVb(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqb_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Qd4=vcmp.eq(Vu32.h,Vv32.h)
    C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eq_VhVh(HVX_Vector Vu, HVX_Vector Vv)
@@ -1271,9 +1044,7 @@
    ========================================================================== */
 
 #define Q6_Q_vcmp_eq_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqh)(Vu,Vv)),-1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Qx4&=vcmp.eq(Vu32.h,Vv32.h)
    C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqand_QVhVh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
@@ -1282,9 +1053,7 @@
    ========================================================================== */
 
 #define Q6_Q_vcmp_eqand_QVhVh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqh_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Qx4|=vcmp.eq(Vu32.h,Vv32.h)
    C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqor_QVhVh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
@@ -1293,9 +1062,7 @@
    ========================================================================== */
 
 #define Q6_Q_vcmp_eqor_QVhVh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqh_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Qx4^=vcmp.eq(Vu32.h,Vv32.h)
    C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqxacc_QVhVh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
@@ -1304,9 +1071,7 @@
    ========================================================================== */
 
 #define Q6_Q_vcmp_eqxacc_QVhVh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqh_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Qd4=vcmp.eq(Vu32.w,Vv32.w)
    C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eq_VwVw(HVX_Vector Vu, HVX_Vector Vv)
@@ -1315,9 +1080,7 @@
    ========================================================================== */
 
 #define Q6_Q_vcmp_eq_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqw)(Vu,Vv)),-1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Qx4&=vcmp.eq(Vu32.w,Vv32.w)
    C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqand_QVwVw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
@@ -1326,9 +1089,7 @@
    ========================================================================== */
 
 #define Q6_Q_vcmp_eqand_QVwVw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqw_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Qx4|=vcmp.eq(Vu32.w,Vv32.w)
    C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqor_QVwVw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
@@ -1337,9 +1098,7 @@
    ========================================================================== */
 
 #define Q6_Q_vcmp_eqor_QVwVw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqw_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Qx4^=vcmp.eq(Vu32.w,Vv32.w)
    C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqxacc_QVwVw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
@@ -1348,9 +1107,7 @@
    ========================================================================== */
 
 #define Q6_Q_vcmp_eqxacc_QVwVw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqw_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Qd4=vcmp.gt(Vu32.b,Vv32.b)
    C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gt_VbVb(HVX_Vector Vu, HVX_Vector Vv)
@@ -1359,9 +1116,7 @@
    ========================================================================== */
 
 #define Q6_Q_vcmp_gt_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtb)(Vu,Vv)),-1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Qx4&=vcmp.gt(Vu32.b,Vv32.b)
    C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtand_QVbVb(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
@@ -1370,9 +1125,7 @@
    ========================================================================== */
 
 #define Q6_Q_vcmp_gtand_QVbVb(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtb_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Qx4|=vcmp.gt(Vu32.b,Vv32.b)
    C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtor_QVbVb(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
@@ -1381,9 +1134,7 @@
    ========================================================================== */
 
 #define Q6_Q_vcmp_gtor_QVbVb(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtb_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Qx4^=vcmp.gt(Vu32.b,Vv32.b)
    C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtxacc_QVbVb(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
@@ -1392,9 +1143,7 @@
    ========================================================================== */
 
 #define Q6_Q_vcmp_gtxacc_QVbVb(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtb_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Qd4=vcmp.gt(Vu32.h,Vv32.h)
    C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gt_VhVh(HVX_Vector Vu, HVX_Vector Vv)
@@ -1403,9 +1152,7 @@
    ========================================================================== */
 
 #define Q6_Q_vcmp_gt_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgth)(Vu,Vv)),-1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Qx4&=vcmp.gt(Vu32.h,Vv32.h)
    C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtand_QVhVh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
@@ -1414,9 +1161,7 @@
    ========================================================================== */
 
 #define Q6_Q_vcmp_gtand_QVhVh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgth_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Qx4|=vcmp.gt(Vu32.h,Vv32.h)
    C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtor_QVhVh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
@@ -1425,9 +1170,7 @@
    ========================================================================== */
 
 #define Q6_Q_vcmp_gtor_QVhVh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgth_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Qx4^=vcmp.gt(Vu32.h,Vv32.h)
    C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtxacc_QVhVh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
@@ -1436,9 +1179,7 @@
    ========================================================================== */
 
 #define Q6_Q_vcmp_gtxacc_QVhVh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgth_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Qd4=vcmp.gt(Vu32.ub,Vv32.ub)
    C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gt_VubVub(HVX_Vector Vu, HVX_Vector Vv)
@@ -1447,9 +1188,7 @@
    ========================================================================== */
 
 #define Q6_Q_vcmp_gt_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtub)(Vu,Vv)),-1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Qx4&=vcmp.gt(Vu32.ub,Vv32.ub)
    C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtand_QVubVub(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
@@ -1458,9 +1197,7 @@
    ========================================================================== */
 
 #define Q6_Q_vcmp_gtand_QVubVub(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtub_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Qx4|=vcmp.gt(Vu32.ub,Vv32.ub)
    C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtor_QVubVub(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
@@ -1469,9 +1206,7 @@
    ========================================================================== */
 
 #define Q6_Q_vcmp_gtor_QVubVub(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtub_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Qx4^=vcmp.gt(Vu32.ub,Vv32.ub)
    C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtxacc_QVubVub(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
@@ -1480,9 +1215,7 @@
    ========================================================================== */
 
 #define Q6_Q_vcmp_gtxacc_QVubVub(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtub_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Qd4=vcmp.gt(Vu32.uh,Vv32.uh)
    C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gt_VuhVuh(HVX_Vector Vu, HVX_Vector Vv)
@@ -1491,9 +1224,7 @@
    ========================================================================== */
 
 #define Q6_Q_vcmp_gt_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuh)(Vu,Vv)),-1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Qx4&=vcmp.gt(Vu32.uh,Vv32.uh)
    C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtand_QVuhVuh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
@@ -1502,9 +1233,7 @@
    ========================================================================== */
 
 #define Q6_Q_vcmp_gtand_QVuhVuh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuh_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Qx4|=vcmp.gt(Vu32.uh,Vv32.uh)
    C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtor_QVuhVuh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
@@ -1513,9 +1242,7 @@
    ========================================================================== */
 
 #define Q6_Q_vcmp_gtor_QVuhVuh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuh_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Qx4^=vcmp.gt(Vu32.uh,Vv32.uh)
    C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtxacc_QVuhVuh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
@@ -1524,9 +1251,7 @@
    ========================================================================== */
 
 #define Q6_Q_vcmp_gtxacc_QVuhVuh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuh_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Qd4=vcmp.gt(Vu32.uw,Vv32.uw)
    C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gt_VuwVuw(HVX_Vector Vu, HVX_Vector Vv)
@@ -1535,9 +1260,7 @@
    ========================================================================== */
 
 #define Q6_Q_vcmp_gt_VuwVuw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuw)(Vu,Vv)),-1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Qx4&=vcmp.gt(Vu32.uw,Vv32.uw)
    C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtand_QVuwVuw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
@@ -1546,9 +1269,7 @@
    ========================================================================== */
 
 #define Q6_Q_vcmp_gtand_QVuwVuw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuw_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Qx4|=vcmp.gt(Vu32.uw,Vv32.uw)
    C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtor_QVuwVuw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
@@ -1557,9 +1278,7 @@
    ========================================================================== */
 
 #define Q6_Q_vcmp_gtor_QVuwVuw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuw_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Qx4^=vcmp.gt(Vu32.uw,Vv32.uw)
    C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtxacc_QVuwVuw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
@@ -1568,9 +1287,7 @@
    ========================================================================== */
 
 #define Q6_Q_vcmp_gtxacc_QVuwVuw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuw_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Qd4=vcmp.gt(Vu32.w,Vv32.w)
    C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gt_VwVw(HVX_Vector Vu, HVX_Vector Vv)
@@ -1579,9 +1296,7 @@
    ========================================================================== */
 
 #define Q6_Q_vcmp_gt_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtw)(Vu,Vv)),-1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Qx4&=vcmp.gt(Vu32.w,Vv32.w)
    C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtand_QVwVw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
@@ -1590,9 +1305,7 @@
    ========================================================================== */
 
 #define Q6_Q_vcmp_gtand_QVwVw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtw_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Qx4|=vcmp.gt(Vu32.w,Vv32.w)
    C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtor_QVwVw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
@@ -1601,9 +1314,7 @@
    ========================================================================== */
 
 #define Q6_Q_vcmp_gtor_QVwVw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtw_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Qx4^=vcmp.gt(Vu32.w,Vv32.w)
    C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtxacc_QVwVw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
@@ -1612,9 +1323,7 @@
    ========================================================================== */
 
 #define Q6_Q_vcmp_gtxacc_QVwVw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtw_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vx32.w=vinsert(Rt32)
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vinsert_VwR(HVX_Vector Vx, Word32 Rt)
@@ -1623,9 +1332,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vinsert_VwR(Vx,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vinsertwr)(Vx,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32=vlalign(Vu32,Vv32,Rt8)
    C Intrinsic Prototype: HVX_Vector Q6_V_vlalign_VVR(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt)
@@ -1634,9 +1341,7 @@
    ========================================================================== */
 
 #define Q6_V_vlalign_VVR(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlalignb)(Vu,Vv,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32=vlalign(Vu32,Vv32,#u3)
    C Intrinsic Prototype: HVX_Vector Q6_V_vlalign_VVI(HVX_Vector Vu, HVX_Vector Vv, Word32 Iu3)
@@ -1645,9 +1350,7 @@
    ========================================================================== */
 
 #define Q6_V_vlalign_VVI(Vu,Vv,Iu3) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlalignbi)(Vu,Vv,Iu3)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.uh=vlsr(Vu32.uh,Rt32)
    C Intrinsic Prototype: HVX_Vector Q6_Vuh_vlsr_VuhR(HVX_Vector Vu, Word32 Rt)
@@ -1656,9 +1359,7 @@
    ========================================================================== */
 
 #define Q6_Vuh_vlsr_VuhR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrh)(Vu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.h=vlsr(Vu32.h,Vv32.h)
    C Intrinsic Prototype: HVX_Vector Q6_Vh_vlsr_VhVh(HVX_Vector Vu, HVX_Vector Vv)
@@ -1667,9 +1368,7 @@
    ========================================================================== */
 
 #define Q6_Vh_vlsr_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrhv)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.uw=vlsr(Vu32.uw,Rt32)
    C Intrinsic Prototype: HVX_Vector Q6_Vuw_vlsr_VuwR(HVX_Vector Vu, Word32 Rt)
@@ -1678,9 +1377,7 @@
    ========================================================================== */
 
 #define Q6_Vuw_vlsr_VuwR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrw)(Vu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.w=vlsr(Vu32.w,Vv32.w)
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vlsr_VwVw(HVX_Vector Vu, HVX_Vector Vv)
@@ -1689,9 +1386,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vlsr_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrwv)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.b=vlut32(Vu32.b,Vv32.b,Rt8)
    C Intrinsic Prototype: HVX_Vector Q6_Vb_vlut32_VbVbR(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt)
@@ -1700,9 +1395,7 @@
    ========================================================================== */
 
 #define Q6_Vb_vlut32_VbVbR(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvb)(Vu,Vv,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vx32.b|=vlut32(Vu32.b,Vv32.b,Rt8)
    C Intrinsic Prototype: HVX_Vector Q6_Vb_vlut32or_VbVbVbR(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv, Word32 Rt)
@@ -1711,9 +1404,7 @@
    ========================================================================== */
 
 #define Q6_Vb_vlut32or_VbVbVbR(Vx,Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvb_oracc)(Vx,Vu,Vv,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.h=vlut16(Vu32.b,Vv32.h,Rt8)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vlut16_VbVhR(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt)
@@ -1722,9 +1413,7 @@
    ========================================================================== */
 
 #define Q6_Wh_vlut16_VbVhR(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwh)(Vu,Vv,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vxx32.h|=vlut16(Vu32.b,Vv32.h,Rt8)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vlut16or_WhVbVhR(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv, Word32 Rt)
@@ -1733,9 +1422,7 @@
    ========================================================================== */
 
 #define Q6_Wh_vlut16or_WhVbVhR(Vxx,Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwh_oracc)(Vxx,Vu,Vv,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.h=vmax(Vu32.h,Vv32.h)
    C Intrinsic Prototype: HVX_Vector Q6_Vh_vmax_VhVh(HVX_Vector Vu, HVX_Vector Vv)
@@ -1744,9 +1431,7 @@
    ========================================================================== */
 
 #define Q6_Vh_vmax_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxh)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.ub=vmax(Vu32.ub,Vv32.ub)
    C Intrinsic Prototype: HVX_Vector Q6_Vub_vmax_VubVub(HVX_Vector Vu, HVX_Vector Vv)
@@ -1755,9 +1440,7 @@
    ========================================================================== */
 
 #define Q6_Vub_vmax_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxub)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.uh=vmax(Vu32.uh,Vv32.uh)
    C Intrinsic Prototype: HVX_Vector Q6_Vuh_vmax_VuhVuh(HVX_Vector Vu, HVX_Vector Vv)
@@ -1766,9 +1449,7 @@
    ========================================================================== */
 
 #define Q6_Vuh_vmax_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxuh)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.w=vmax(Vu32.w,Vv32.w)
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vmax_VwVw(HVX_Vector Vu, HVX_Vector Vv)
@@ -1777,9 +1458,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vmax_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxw)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.h=vmin(Vu32.h,Vv32.h)
    C Intrinsic Prototype: HVX_Vector Q6_Vh_vmin_VhVh(HVX_Vector Vu, HVX_Vector Vv)
@@ -1788,9 +1467,7 @@
    ========================================================================== */
 
 #define Q6_Vh_vmin_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminh)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.ub=vmin(Vu32.ub,Vv32.ub)
    C Intrinsic Prototype: HVX_Vector Q6_Vub_vmin_VubVub(HVX_Vector Vu, HVX_Vector Vv)
@@ -1799,9 +1476,7 @@
    ========================================================================== */
 
 #define Q6_Vub_vmin_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminub)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.uh=vmin(Vu32.uh,Vv32.uh)
    C Intrinsic Prototype: HVX_Vector Q6_Vuh_vmin_VuhVuh(HVX_Vector Vu, HVX_Vector Vv)
@@ -1810,9 +1485,7 @@
    ========================================================================== */
 
 #define Q6_Vuh_vmin_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminuh)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.w=vmin(Vu32.w,Vv32.w)
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vmin_VwVw(HVX_Vector Vu, HVX_Vector Vv)
@@ -1821,9 +1494,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vmin_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminw)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.h=vmpa(Vuu32.ub,Rt32.b)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpa_WubRb(HVX_VectorPair Vuu, Word32 Rt)
@@ -1832,9 +1503,7 @@
    ========================================================================== */
 
 #define Q6_Wh_vmpa_WubRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabus)(Vuu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vxx32.h+=vmpa(Vuu32.ub,Rt32.b)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpaacc_WhWubRb(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt)
@@ -1843,9 +1512,7 @@
    ========================================================================== */
 
 #define Q6_Wh_vmpaacc_WhWubRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabus_acc)(Vxx,Vuu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.h=vmpa(Vuu32.ub,Vvv32.b)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpa_WubWb(HVX_VectorPair Vuu, HVX_VectorPair Vvv)
@@ -1854,9 +1521,7 @@
    ========================================================================== */
 
 #define Q6_Wh_vmpa_WubWb(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabusv)(Vuu,Vvv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.h=vmpa(Vuu32.ub,Vvv32.ub)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpa_WubWub(HVX_VectorPair Vuu, HVX_VectorPair Vvv)
@@ -1865,9 +1530,7 @@
    ========================================================================== */
 
 #define Q6_Wh_vmpa_WubWub(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabuuv)(Vuu,Vvv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.w=vmpa(Vuu32.h,Rt32.b)
    C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpa_WhRb(HVX_VectorPair Vuu, Word32 Rt)
@@ -1876,9 +1539,7 @@
    ========================================================================== */
 
 #define Q6_Ww_vmpa_WhRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpahb)(Vuu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vxx32.w+=vmpa(Vuu32.h,Rt32.b)
    C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpaacc_WwWhRb(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt)
@@ -1887,9 +1548,7 @@
    ========================================================================== */
 
 #define Q6_Ww_vmpaacc_WwWhRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpahb_acc)(Vxx,Vuu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.h=vmpy(Vu32.ub,Rt32.b)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpy_VubRb(HVX_Vector Vu, Word32 Rt)
@@ -1898,9 +1557,7 @@
    ========================================================================== */
 
 #define Q6_Wh_vmpy_VubRb(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybus)(Vu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vxx32.h+=vmpy(Vu32.ub,Rt32.b)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpyacc_WhVubRb(HVX_VectorPair Vxx, HVX_Vector Vu, Word32 Rt)
@@ -1909,9 +1566,7 @@
    ========================================================================== */
 
 #define Q6_Wh_vmpyacc_WhVubRb(Vxx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybus_acc)(Vxx,Vu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.h=vmpy(Vu32.ub,Vv32.b)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpy_VubVb(HVX_Vector Vu, HVX_Vector Vv)
@@ -1920,9 +1575,7 @@
    ========================================================================== */
 
 #define Q6_Wh_vmpy_VubVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybusv)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vxx32.h+=vmpy(Vu32.ub,Vv32.b)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpyacc_WhVubVb(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv)
@@ -1931,9 +1584,7 @@
    ========================================================================== */
 
 #define Q6_Wh_vmpyacc_WhVubVb(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybusv_acc)(Vxx,Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.h=vmpy(Vu32.b,Vv32.b)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpy_VbVb(HVX_Vector Vu, HVX_Vector Vv)
@@ -1942,9 +1593,7 @@
    ========================================================================== */
 
 #define Q6_Wh_vmpy_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybv)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vxx32.h+=vmpy(Vu32.b,Vv32.b)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpyacc_WhVbVb(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv)
@@ -1953,9 +1602,7 @@
    ========================================================================== */
 
 #define Q6_Wh_vmpyacc_WhVbVb(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybv_acc)(Vxx,Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.w=vmpye(Vu32.w,Vv32.uh)
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpye_VwVuh(HVX_Vector Vu, HVX_Vector Vv)
@@ -1964,9 +1611,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vmpye_VwVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyewuh)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.w=vmpy(Vu32.h,Rt32.h)
    C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpy_VhRh(HVX_Vector Vu, Word32 Rt)
@@ -1975,9 +1620,7 @@
    ========================================================================== */
 
 #define Q6_Ww_vmpy_VhRh(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyh)(Vu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vxx32.w+=vmpy(Vu32.h,Rt32.h):sat
    C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpyacc_WwVhRh_sat(HVX_VectorPair Vxx, HVX_Vector Vu, Word32 Rt)
@@ -1986,9 +1629,7 @@
    ========================================================================== */
 
 #define Q6_Ww_vmpyacc_WwVhRh_sat(Vxx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhsat_acc)(Vxx,Vu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.h=vmpy(Vu32.h,Rt32.h):<<1:rnd:sat
    C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpy_VhRh_s1_rnd_sat(HVX_Vector Vu, Word32 Rt)
@@ -1997,9 +1638,7 @@
    ========================================================================== */
 
 #define Q6_Vh_vmpy_VhRh_s1_rnd_sat(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhsrs)(Vu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.h=vmpy(Vu32.h,Rt32.h):<<1:sat
    C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpy_VhRh_s1_sat(HVX_Vector Vu, Word32 Rt)
@@ -2008,9 +1647,7 @@
    ========================================================================== */
 
 #define Q6_Vh_vmpy_VhRh_s1_sat(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhss)(Vu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.w=vmpy(Vu32.h,Vv32.uh)
    C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpy_VhVuh(HVX_Vector Vu, HVX_Vector Vv)
@@ -2019,9 +1656,7 @@
    ========================================================================== */
 
 #define Q6_Ww_vmpy_VhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhus)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vxx32.w+=vmpy(Vu32.h,Vv32.uh)
    C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpyacc_WwVhVuh(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv)
@@ -2030,9 +1665,7 @@
    ========================================================================== */
 
 #define Q6_Ww_vmpyacc_WwVhVuh(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhus_acc)(Vxx,Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.w=vmpy(Vu32.h,Vv32.h)
    C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpy_VhVh(HVX_Vector Vu, HVX_Vector Vv)
@@ -2041,9 +1674,7 @@
    ========================================================================== */
 
 #define Q6_Ww_vmpy_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhv)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vxx32.w+=vmpy(Vu32.h,Vv32.h)
    C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpyacc_WwVhVh(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv)
@@ -2052,9 +1683,7 @@
    ========================================================================== */
 
 #define Q6_Ww_vmpyacc_WwVhVh(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhv_acc)(Vxx,Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.h=vmpy(Vu32.h,Vv32.h):<<1:rnd:sat
    C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpy_VhVh_s1_rnd_sat(HVX_Vector Vu, HVX_Vector Vv)
@@ -2063,9 +1692,7 @@
    ========================================================================== */
 
 #define Q6_Vh_vmpy_VhVh_s1_rnd_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhvsrs)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.w=vmpyieo(Vu32.h,Vv32.h)
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyieo_VhVh(HVX_Vector Vu, HVX_Vector Vv)
@@ -2074,9 +1701,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vmpyieo_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyieoh)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vx32.w+=vmpyie(Vu32.w,Vv32.h)
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyieacc_VwVwVh(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv)
@@ -2085,9 +1710,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vmpyieacc_VwVwVh(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiewh_acc)(Vx,Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.w=vmpyie(Vu32.w,Vv32.uh)
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyie_VwVuh(HVX_Vector Vu, HVX_Vector Vv)
@@ -2096,9 +1719,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vmpyie_VwVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiewuh)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vx32.w+=vmpyie(Vu32.w,Vv32.uh)
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyieacc_VwVwVuh(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv)
@@ -2107,9 +1728,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vmpyieacc_VwVwVuh(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiewuh_acc)(Vx,Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.h=vmpyi(Vu32.h,Vv32.h)
    C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpyi_VhVh(HVX_Vector Vu, HVX_Vector Vv)
@@ -2118,9 +1737,7 @@
    ========================================================================== */
 
 #define Q6_Vh_vmpyi_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyih)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vx32.h+=vmpyi(Vu32.h,Vv32.h)
    C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpyiacc_VhVhVh(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv)
@@ -2129,9 +1746,7 @@
    ========================================================================== */
 
 #define Q6_Vh_vmpyiacc_VhVhVh(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyih_acc)(Vx,Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.h=vmpyi(Vu32.h,Rt32.b)
    C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpyi_VhRb(HVX_Vector Vu, Word32 Rt)
@@ -2140,9 +1755,7 @@
    ========================================================================== */
 
 #define Q6_Vh_vmpyi_VhRb(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyihb)(Vu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vx32.h+=vmpyi(Vu32.h,Rt32.b)
    C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpyiacc_VhVhRb(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt)
@@ -2151,9 +1764,7 @@
    ========================================================================== */
 
 #define Q6_Vh_vmpyiacc_VhVhRb(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyihb_acc)(Vx,Vu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.w=vmpyio(Vu32.w,Vv32.h)
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyio_VwVh(HVX_Vector Vu, HVX_Vector Vv)
@@ -2162,9 +1773,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vmpyio_VwVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiowh)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.w=vmpyi(Vu32.w,Rt32.b)
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyi_VwRb(HVX_Vector Vu, Word32 Rt)
@@ -2173,9 +1782,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vmpyi_VwRb(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwb)(Vu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vx32.w+=vmpyi(Vu32.w,Rt32.b)
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyiacc_VwVwRb(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt)
@@ -2184,9 +1791,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vmpyiacc_VwVwRb(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwb_acc)(Vx,Vu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.w=vmpyi(Vu32.w,Rt32.h)
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyi_VwRh(HVX_Vector Vu, Word32 Rt)
@@ -2195,9 +1800,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vmpyi_VwRh(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwh)(Vu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vx32.w+=vmpyi(Vu32.w,Rt32.h)
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyiacc_VwVwRh(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt)
@@ -2206,9 +1809,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vmpyiacc_VwVwRh(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwh_acc)(Vx,Vu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.w=vmpyo(Vu32.w,Vv32.h):<<1:sat
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyo_VwVh_s1_sat(HVX_Vector Vu, HVX_Vector Vv)
@@ -2217,9 +1818,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vmpyo_VwVh_s1_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.w=vmpyo(Vu32.w,Vv32.h):<<1:rnd:sat
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyo_VwVh_s1_rnd_sat(HVX_Vector Vu, HVX_Vector Vv)
@@ -2228,9 +1827,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vmpyo_VwVh_s1_rnd_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh_rnd)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vx32.w+=vmpyo(Vu32.w,Vv32.h):<<1:rnd:sat:shift
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyoacc_VwVwVh_s1_rnd_sat_shift(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv)
@@ -2239,9 +1836,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vmpyoacc_VwVwVh_s1_rnd_sat_shift(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh_rnd_sacc)(Vx,Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vx32.w+=vmpyo(Vu32.w,Vv32.h):<<1:sat:shift
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv)
@@ -2250,9 +1845,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh_sacc)(Vx,Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.uh=vmpy(Vu32.ub,Rt32.ub)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wuh_vmpy_VubRub(HVX_Vector Vu, Word32 Rt)
@@ -2261,9 +1854,7 @@
    ========================================================================== */
 
 #define Q6_Wuh_vmpy_VubRub(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyub)(Vu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vxx32.uh+=vmpy(Vu32.ub,Rt32.ub)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wuh_vmpyacc_WuhVubRub(HVX_VectorPair Vxx, HVX_Vector Vu, Word32 Rt)
@@ -2272,9 +1863,7 @@
    ========================================================================== */
 
 #define Q6_Wuh_vmpyacc_WuhVubRub(Vxx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyub_acc)(Vxx,Vu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.uh=vmpy(Vu32.ub,Vv32.ub)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wuh_vmpy_VubVub(HVX_Vector Vu, HVX_Vector Vv)
@@ -2283,9 +1872,7 @@
    ========================================================================== */
 
 #define Q6_Wuh_vmpy_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyubv)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vxx32.uh+=vmpy(Vu32.ub,Vv32.ub)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wuh_vmpyacc_WuhVubVub(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv)
@@ -2294,9 +1881,7 @@
    ========================================================================== */
 
 #define Q6_Wuh_vmpyacc_WuhVubVub(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyubv_acc)(Vxx,Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.uw=vmpy(Vu32.uh,Rt32.uh)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vmpy_VuhRuh(HVX_Vector Vu, Word32 Rt)
@@ -2305,9 +1890,7 @@
    ========================================================================== */
 
 #define Q6_Wuw_vmpy_VuhRuh(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuh)(Vu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vxx32.uw+=vmpy(Vu32.uh,Rt32.uh)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vmpyacc_WuwVuhRuh(HVX_VectorPair Vxx, HVX_Vector Vu, Word32 Rt)
@@ -2316,9 +1899,7 @@
    ========================================================================== */
 
 #define Q6_Wuw_vmpyacc_WuwVuhRuh(Vxx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuh_acc)(Vxx,Vu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.uw=vmpy(Vu32.uh,Vv32.uh)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vmpy_VuhVuh(HVX_Vector Vu, HVX_Vector Vv)
@@ -2327,9 +1908,7 @@
    ========================================================================== */
 
 #define Q6_Wuw_vmpy_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuhv)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vxx32.uw+=vmpy(Vu32.uh,Vv32.uh)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vmpyacc_WuwVuhVuh(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv)
@@ -2338,9 +1917,7 @@
    ========================================================================== */
 
 #define Q6_Wuw_vmpyacc_WuwVuhVuh(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuhv_acc)(Vxx,Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32=vmux(Qt4,Vu32,Vv32)
    C Intrinsic Prototype: HVX_Vector Q6_V_vmux_QVV(HVX_VectorPred Qt, HVX_Vector Vu, HVX_Vector Vv)
@@ -2349,9 +1926,7 @@
    ========================================================================== */
 
 #define Q6_V_vmux_QVV(Qt,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmux)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1),Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.h=vnavg(Vu32.h,Vv32.h)
    C Intrinsic Prototype: HVX_Vector Q6_Vh_vnavg_VhVh(HVX_Vector Vu, HVX_Vector Vv)
@@ -2360,9 +1935,7 @@
    ========================================================================== */
 
 #define Q6_Vh_vnavg_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnavgh)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.b=vnavg(Vu32.ub,Vv32.ub)
    C Intrinsic Prototype: HVX_Vector Q6_Vb_vnavg_VubVub(HVX_Vector Vu, HVX_Vector Vv)
@@ -2371,9 +1944,7 @@
    ========================================================================== */
 
 #define Q6_Vb_vnavg_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnavgub)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.w=vnavg(Vu32.w,Vv32.w)
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vnavg_VwVw(HVX_Vector Vu, HVX_Vector Vv)
@@ -2382,9 +1953,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vnavg_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnavgw)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.h=vnormamt(Vu32.h)
    C Intrinsic Prototype: HVX_Vector Q6_Vh_vnormamt_Vh(HVX_Vector Vu)
@@ -2393,9 +1962,7 @@
    ========================================================================== */
 
 #define Q6_Vh_vnormamt_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnormamth)(Vu)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.w=vnormamt(Vu32.w)
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vnormamt_Vw(HVX_Vector Vu)
@@ -2404,9 +1971,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vnormamt_Vw(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnormamtw)(Vu)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32=vnot(Vu32)
    C Intrinsic Prototype: HVX_Vector Q6_V_vnot_V(HVX_Vector Vu)
@@ -2415,9 +1980,7 @@
    ========================================================================== */
 
 #define Q6_V_vnot_V(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnot)(Vu)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32=vor(Vu32,Vv32)
    C Intrinsic Prototype: HVX_Vector Q6_V_vor_VV(HVX_Vector Vu, HVX_Vector Vv)
@@ -2426,9 +1989,7 @@
    ========================================================================== */
 
 #define Q6_V_vor_VV(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vor)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.b=vpacke(Vu32.h,Vv32.h)
    C Intrinsic Prototype: HVX_Vector Q6_Vb_vpacke_VhVh(HVX_Vector Vu, HVX_Vector Vv)
@@ -2437,9 +1998,7 @@
    ========================================================================== */
 
 #define Q6_Vb_vpacke_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackeb)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.h=vpacke(Vu32.w,Vv32.w)
    C Intrinsic Prototype: HVX_Vector Q6_Vh_vpacke_VwVw(HVX_Vector Vu, HVX_Vector Vv)
@@ -2448,9 +2007,7 @@
    ========================================================================== */
 
 #define Q6_Vh_vpacke_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackeh)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.b=vpack(Vu32.h,Vv32.h):sat
    C Intrinsic Prototype: HVX_Vector Q6_Vb_vpack_VhVh_sat(HVX_Vector Vu, HVX_Vector Vv)
@@ -2459,9 +2016,7 @@
    ========================================================================== */
 
 #define Q6_Vb_vpack_VhVh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackhb_sat)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.ub=vpack(Vu32.h,Vv32.h):sat
    C Intrinsic Prototype: HVX_Vector Q6_Vub_vpack_VhVh_sat(HVX_Vector Vu, HVX_Vector Vv)
@@ -2470,9 +2025,7 @@
    ========================================================================== */
 
 #define Q6_Vub_vpack_VhVh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackhub_sat)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.b=vpacko(Vu32.h,Vv32.h)
    C Intrinsic Prototype: HVX_Vector Q6_Vb_vpacko_VhVh(HVX_Vector Vu, HVX_Vector Vv)
@@ -2481,9 +2034,7 @@
    ========================================================================== */
 
 #define Q6_Vb_vpacko_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackob)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.h=vpacko(Vu32.w,Vv32.w)
    C Intrinsic Prototype: HVX_Vector Q6_Vh_vpacko_VwVw(HVX_Vector Vu, HVX_Vector Vv)
@@ -2492,9 +2043,7 @@
    ========================================================================== */
 
 #define Q6_Vh_vpacko_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackoh)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.h=vpack(Vu32.w,Vv32.w):sat
    C Intrinsic Prototype: HVX_Vector Q6_Vh_vpack_VwVw_sat(HVX_Vector Vu, HVX_Vector Vv)
@@ -2503,9 +2052,7 @@
    ========================================================================== */
 
 #define Q6_Vh_vpack_VwVw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackwh_sat)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.uh=vpack(Vu32.w,Vv32.w):sat
    C Intrinsic Prototype: HVX_Vector Q6_Vuh_vpack_VwVw_sat(HVX_Vector Vu, HVX_Vector Vv)
@@ -2514,9 +2061,7 @@
    ========================================================================== */
 
 #define Q6_Vuh_vpack_VwVw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackwuh_sat)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.h=vpopcount(Vu32.h)
    C Intrinsic Prototype: HVX_Vector Q6_Vh_vpopcount_Vh(HVX_Vector Vu)
@@ -2525,9 +2070,7 @@
    ========================================================================== */
 
 #define Q6_Vh_vpopcount_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpopcounth)(Vu)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32=vrdelta(Vu32,Vv32)
    C Intrinsic Prototype: HVX_Vector Q6_V_vrdelta_VV(HVX_Vector Vu, HVX_Vector Vv)
@@ -2536,9 +2079,7 @@
    ========================================================================== */
 
 #define Q6_V_vrdelta_VV(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrdelta)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.w=vrmpy(Vu32.ub,Rt32.b)
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vrmpy_VubRb(HVX_Vector Vu, Word32 Rt)
@@ -2547,9 +2088,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vrmpy_VubRb(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybus)(Vu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vx32.w+=vrmpy(Vu32.ub,Rt32.b)
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vrmpyacc_VwVubRb(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt)
@@ -2558,9 +2097,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vrmpyacc_VwVubRb(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybus_acc)(Vx,Vu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.w=vrmpy(Vuu32.ub,Rt32.b,#u1)
    C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vrmpy_WubRbI(HVX_VectorPair Vuu, Word32 Rt, Word32 Iu1)
@@ -2569,9 +2106,7 @@
    ========================================================================== */
 
 #define Q6_Ww_vrmpy_WubRbI(Vuu,Rt,Iu1) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybusi)(Vuu,Rt,Iu1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vxx32.w+=vrmpy(Vuu32.ub,Rt32.b,#u1)
    C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vrmpyacc_WwWubRbI(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt, Word32 Iu1)
@@ -2580,9 +2115,7 @@
    ========================================================================== */
 
 #define Q6_Ww_vrmpyacc_WwWubRbI(Vxx,Vuu,Rt,Iu1) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybusi_acc)(Vxx,Vuu,Rt,Iu1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.w=vrmpy(Vu32.ub,Vv32.b)
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vrmpy_VubVb(HVX_Vector Vu, HVX_Vector Vv)
@@ -2591,9 +2124,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vrmpy_VubVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybusv)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vx32.w+=vrmpy(Vu32.ub,Vv32.b)
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vrmpyacc_VwVubVb(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv)
@@ -2602,9 +2133,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vrmpyacc_VwVubVb(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybusv_acc)(Vx,Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.w=vrmpy(Vu32.b,Vv32.b)
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vrmpy_VbVb(HVX_Vector Vu, HVX_Vector Vv)
@@ -2613,9 +2142,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vrmpy_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybv)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vx32.w+=vrmpy(Vu32.b,Vv32.b)
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vrmpyacc_VwVbVb(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv)
@@ -2624,9 +2151,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vrmpyacc_VwVbVb(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybv_acc)(Vx,Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.uw=vrmpy(Vu32.ub,Rt32.ub)
    C Intrinsic Prototype: HVX_Vector Q6_Vuw_vrmpy_VubRub(HVX_Vector Vu, Word32 Rt)
@@ -2635,9 +2160,7 @@
    ========================================================================== */
 
 #define Q6_Vuw_vrmpy_VubRub(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyub)(Vu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vx32.uw+=vrmpy(Vu32.ub,Rt32.ub)
    C Intrinsic Prototype: HVX_Vector Q6_Vuw_vrmpyacc_VuwVubRub(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt)
@@ -2646,9 +2169,7 @@
    ========================================================================== */
 
 #define Q6_Vuw_vrmpyacc_VuwVubRub(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyub_acc)(Vx,Vu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.uw=vrmpy(Vuu32.ub,Rt32.ub,#u1)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vrmpy_WubRubI(HVX_VectorPair Vuu, Word32 Rt, Word32 Iu1)
@@ -2657,9 +2178,7 @@
    ========================================================================== */
 
 #define Q6_Wuw_vrmpy_WubRubI(Vuu,Rt,Iu1) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyubi)(Vuu,Rt,Iu1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vxx32.uw+=vrmpy(Vuu32.ub,Rt32.ub,#u1)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vrmpyacc_WuwWubRubI(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt, Word32 Iu1)
@@ -2668,9 +2187,7 @@
    ========================================================================== */
 
 #define Q6_Wuw_vrmpyacc_WuwWubRubI(Vxx,Vuu,Rt,Iu1) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyubi_acc)(Vxx,Vuu,Rt,Iu1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.uw=vrmpy(Vu32.ub,Vv32.ub)
    C Intrinsic Prototype: HVX_Vector Q6_Vuw_vrmpy_VubVub(HVX_Vector Vu, HVX_Vector Vv)
@@ -2679,9 +2196,7 @@
    ========================================================================== */
 
 #define Q6_Vuw_vrmpy_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyubv)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vx32.uw+=vrmpy(Vu32.ub,Vv32.ub)
    C Intrinsic Prototype: HVX_Vector Q6_Vuw_vrmpyacc_VuwVubVub(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv)
@@ -2690,9 +2205,7 @@
    ========================================================================== */
 
 #define Q6_Vuw_vrmpyacc_VuwVubVub(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyubv_acc)(Vx,Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32=vror(Vu32,Rt32)
    C Intrinsic Prototype: HVX_Vector Q6_V_vror_VR(HVX_Vector Vu, Word32 Rt)
@@ -2701,9 +2214,7 @@
    ========================================================================== */
 
 #define Q6_V_vror_VR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vror)(Vu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.b=vround(Vu32.h,Vv32.h):sat
    C Intrinsic Prototype: HVX_Vector Q6_Vb_vround_VhVh_sat(HVX_Vector Vu, HVX_Vector Vv)
@@ -2712,9 +2223,7 @@
    ========================================================================== */
 
 #define Q6_Vb_vround_VhVh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vroundhb)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.ub=vround(Vu32.h,Vv32.h):sat
    C Intrinsic Prototype: HVX_Vector Q6_Vub_vround_VhVh_sat(HVX_Vector Vu, HVX_Vector Vv)
@@ -2723,9 +2232,7 @@
    ========================================================================== */
 
 #define Q6_Vub_vround_VhVh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vroundhub)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.h=vround(Vu32.w,Vv32.w):sat
    C Intrinsic Prototype: HVX_Vector Q6_Vh_vround_VwVw_sat(HVX_Vector Vu, HVX_Vector Vv)
@@ -2734,9 +2241,7 @@
    ========================================================================== */
 
 #define Q6_Vh_vround_VwVw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vroundwh)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.uh=vround(Vu32.w,Vv32.w):sat
    C Intrinsic Prototype: HVX_Vector Q6_Vuh_vround_VwVw_sat(HVX_Vector Vu, HVX_Vector Vv)
@@ -2745,9 +2250,7 @@
    ========================================================================== */
 
 #define Q6_Vuh_vround_VwVw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vroundwuh)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.uw=vrsad(Vuu32.ub,Rt32.ub,#u1)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vrsad_WubRubI(HVX_VectorPair Vuu, Word32 Rt, Word32 Iu1)
@@ -2756,9 +2259,7 @@
    ========================================================================== */
 
 #define Q6_Wuw_vrsad_WubRubI(Vuu,Rt,Iu1) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrsadubi)(Vuu,Rt,Iu1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vxx32.uw+=vrsad(Vuu32.ub,Rt32.ub,#u1)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vrsadacc_WuwWubRubI(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt, Word32 Iu1)
@@ -2767,9 +2268,7 @@
    ========================================================================== */
 
 #define Q6_Wuw_vrsadacc_WuwWubRubI(Vxx,Vuu,Rt,Iu1) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrsadubi_acc)(Vxx,Vuu,Rt,Iu1)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.ub=vsat(Vu32.h,Vv32.h)
    C Intrinsic Prototype: HVX_Vector Q6_Vub_vsat_VhVh(HVX_Vector Vu, HVX_Vector Vv)
@@ -2778,9 +2277,7 @@
    ========================================================================== */
 
 #define Q6_Vub_vsat_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsathub)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.h=vsat(Vu32.w,Vv32.w)
    C Intrinsic Prototype: HVX_Vector Q6_Vh_vsat_VwVw(HVX_Vector Vu, HVX_Vector Vv)
@@ -2789,9 +2286,7 @@
    ========================================================================== */
 
 #define Q6_Vh_vsat_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsatwh)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.h=vsxt(Vu32.b)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vsxt_Vb(HVX_Vector Vu)
@@ -2800,9 +2295,7 @@
    ========================================================================== */
 
 #define Q6_Wh_vsxt_Vb(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsb)(Vu)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.w=vsxt(Vu32.h)
    C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vsxt_Vh(HVX_Vector Vu)
@@ -2811,9 +2304,7 @@
    ========================================================================== */
 
 #define Q6_Ww_vsxt_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsh)(Vu)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.h=vshuffe(Vu32.h,Vv32.h)
    C Intrinsic Prototype: HVX_Vector Q6_Vh_vshuffe_VhVh(HVX_Vector Vu, HVX_Vector Vv)
@@ -2822,9 +2313,7 @@
    ========================================================================== */
 
 #define Q6_Vh_vshuffe_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshufeh)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.b=vshuff(Vu32.b)
    C Intrinsic Prototype: HVX_Vector Q6_Vb_vshuff_Vb(HVX_Vector Vu)
@@ -2833,9 +2322,7 @@
    ========================================================================== */
 
 #define Q6_Vb_vshuff_Vb(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffb)(Vu)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.b=vshuffe(Vu32.b,Vv32.b)
    C Intrinsic Prototype: HVX_Vector Q6_Vb_vshuffe_VbVb(HVX_Vector Vu, HVX_Vector Vv)
@@ -2844,9 +2331,7 @@
    ========================================================================== */
 
 #define Q6_Vb_vshuffe_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffeb)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.h=vshuff(Vu32.h)
    C Intrinsic Prototype: HVX_Vector Q6_Vh_vshuff_Vh(HVX_Vector Vu)
@@ -2855,9 +2340,7 @@
    ========================================================================== */
 
 #define Q6_Vh_vshuff_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffh)(Vu)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.b=vshuffo(Vu32.b,Vv32.b)
    C Intrinsic Prototype: HVX_Vector Q6_Vb_vshuffo_VbVb(HVX_Vector Vu, HVX_Vector Vv)
@@ -2866,9 +2349,7 @@
    ========================================================================== */
 
 #define Q6_Vb_vshuffo_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffob)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32=vshuff(Vu32,Vv32,Rt8)
    C Intrinsic Prototype: HVX_VectorPair Q6_W_vshuff_VVR(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt)
@@ -2877,9 +2358,7 @@
    ========================================================================== */
 
 #define Q6_W_vshuff_VVR(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffvdd)(Vu,Vv,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.b=vshuffoe(Vu32.b,Vv32.b)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wb_vshuffoe_VbVb(HVX_Vector Vu, HVX_Vector Vv)
@@ -2888,9 +2367,7 @@
    ========================================================================== */
 
 #define Q6_Wb_vshuffoe_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshufoeb)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.h=vshuffoe(Vu32.h,Vv32.h)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vshuffoe_VhVh(HVX_Vector Vu, HVX_Vector Vv)
@@ -2899,9 +2376,7 @@
    ========================================================================== */
 
 #define Q6_Wh_vshuffoe_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshufoeh)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.h=vshuffo(Vu32.h,Vv32.h)
    C Intrinsic Prototype: HVX_Vector Q6_Vh_vshuffo_VhVh(HVX_Vector Vu, HVX_Vector Vv)
@@ -2910,9 +2385,7 @@
    ========================================================================== */
 
 #define Q6_Vh_vshuffo_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshufoh)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.b=vsub(Vu32.b,Vv32.b)
    C Intrinsic Prototype: HVX_Vector Q6_Vb_vsub_VbVb(HVX_Vector Vu, HVX_Vector Vv)
@@ -2921,9 +2394,7 @@
    ========================================================================== */
 
 #define Q6_Vb_vsub_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubb)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.b=vsub(Vuu32.b,Vvv32.b)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wb_vsub_WbWb(HVX_VectorPair Vuu, HVX_VectorPair Vvv)
@@ -2932,9 +2403,7 @@
    ========================================================================== */
 
 #define Q6_Wb_vsub_WbWb(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubb_dv)(Vuu,Vvv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       if (!Qv4) Vx32.b-=Vu32.b
    C Intrinsic Prototype: HVX_Vector Q6_Vb_condnac_QnVbVb(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu)
@@ -2943,9 +2412,7 @@
    ========================================================================== */
 
 #define Q6_Vb_condnac_QnVbVb(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubbnq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       if (Qv4) Vx32.b-=Vu32.b
    C Intrinsic Prototype: HVX_Vector Q6_Vb_condnac_QVbVb(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu)
@@ -2954,9 +2421,7 @@
    ========================================================================== */
 
 #define Q6_Vb_condnac_QVbVb(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubbq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.h=vsub(Vu32.h,Vv32.h)
    C Intrinsic Prototype: HVX_Vector Q6_Vh_vsub_VhVh(HVX_Vector Vu, HVX_Vector Vv)
@@ -2965,9 +2430,7 @@
    ========================================================================== */
 
 #define Q6_Vh_vsub_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubh)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.h=vsub(Vuu32.h,Vvv32.h)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vsub_WhWh(HVX_VectorPair Vuu, HVX_VectorPair Vvv)
@@ -2976,9 +2439,7 @@
    ========================================================================== */
 
 #define Q6_Wh_vsub_WhWh(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubh_dv)(Vuu,Vvv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       if (!Qv4) Vx32.h-=Vu32.h
    C Intrinsic Prototype: HVX_Vector Q6_Vh_condnac_QnVhVh(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu)
@@ -2987,9 +2448,7 @@
    ========================================================================== */
 
 #define Q6_Vh_condnac_QnVhVh(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhnq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       if (Qv4) Vx32.h-=Vu32.h
    C Intrinsic Prototype: HVX_Vector Q6_Vh_condnac_QVhVh(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu)
@@ -2998,9 +2457,7 @@
    ========================================================================== */
 
 #define Q6_Vh_condnac_QVhVh(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.h=vsub(Vu32.h,Vv32.h):sat
    C Intrinsic Prototype: HVX_Vector Q6_Vh_vsub_VhVh_sat(HVX_Vector Vu, HVX_Vector Vv)
@@ -3009,9 +2466,7 @@
    ========================================================================== */
 
 #define Q6_Vh_vsub_VhVh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhsat)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.h=vsub(Vuu32.h,Vvv32.h):sat
    C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vsub_WhWh_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv)
@@ -3020,9 +2475,7 @@
    ========================================================================== */
 
 #define Q6_Wh_vsub_WhWh_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhsat_dv)(Vuu,Vvv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.w=vsub(Vu32.h,Vv32.h)
    C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vsub_VhVh(HVX_Vector Vu, HVX_Vector Vv)
@@ -3031,9 +2484,7 @@
    ========================================================================== */
 
 #define Q6_Ww_vsub_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhw)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.h=vsub(Vu32.ub,Vv32.ub)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vsub_VubVub(HVX_Vector Vu, HVX_Vector Vv)
@@ -3042,9 +2493,7 @@
    ========================================================================== */
 
 #define Q6_Wh_vsub_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsububh)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.ub=vsub(Vu32.ub,Vv32.ub):sat
    C Intrinsic Prototype: HVX_Vector Q6_Vub_vsub_VubVub_sat(HVX_Vector Vu, HVX_Vector Vv)
@@ -3053,9 +2502,7 @@
    ========================================================================== */
 
 #define Q6_Vub_vsub_VubVub_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsububsat)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.ub=vsub(Vuu32.ub,Vvv32.ub):sat
    C Intrinsic Prototype: HVX_VectorPair Q6_Wub_vsub_WubWub_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv)
@@ -3064,9 +2511,7 @@
    ========================================================================== */
 
 #define Q6_Wub_vsub_WubWub_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsububsat_dv)(Vuu,Vvv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.uh=vsub(Vu32.uh,Vv32.uh):sat
    C Intrinsic Prototype: HVX_Vector Q6_Vuh_vsub_VuhVuh_sat(HVX_Vector Vu, HVX_Vector Vv)
@@ -3075,9 +2520,7 @@
    ========================================================================== */
 
 #define Q6_Vuh_vsub_VuhVuh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuhsat)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.uh=vsub(Vuu32.uh,Vvv32.uh):sat
    C Intrinsic Prototype: HVX_VectorPair Q6_Wuh_vsub_WuhWuh_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv)
@@ -3086,9 +2529,7 @@
    ========================================================================== */
 
 #define Q6_Wuh_vsub_WuhWuh_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuhsat_dv)(Vuu,Vvv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.w=vsub(Vu32.uh,Vv32.uh)
    C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vsub_VuhVuh(HVX_Vector Vu, HVX_Vector Vv)
@@ -3097,9 +2538,7 @@
    ========================================================================== */
 
 #define Q6_Ww_vsub_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuhw)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.w=vsub(Vu32.w,Vv32.w)
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vsub_VwVw(HVX_Vector Vu, HVX_Vector Vv)
@@ -3108,9 +2547,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vsub_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubw)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.w=vsub(Vuu32.w,Vvv32.w)
    C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vsub_WwWw(HVX_VectorPair Vuu, HVX_VectorPair Vvv)
@@ -3119,9 +2556,7 @@
    ========================================================================== */
 
 #define Q6_Ww_vsub_WwWw(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubw_dv)(Vuu,Vvv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       if (!Qv4) Vx32.w-=Vu32.w
    C Intrinsic Prototype: HVX_Vector Q6_Vw_condnac_QnVwVw(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu)
@@ -3130,9 +2565,7 @@
    ========================================================================== */
 
 #define Q6_Vw_condnac_QnVwVw(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubwnq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       if (Qv4) Vx32.w-=Vu32.w
    C Intrinsic Prototype: HVX_Vector Q6_Vw_condnac_QVwVw(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu)
@@ -3141,9 +2574,7 @@
    ========================================================================== */
 
 #define Q6_Vw_condnac_QVwVw(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubwq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32.w=vsub(Vu32.w,Vv32.w):sat
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vsub_VwVw_sat(HVX_Vector Vu, HVX_Vector Vv)
@@ -3152,9 +2583,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vsub_VwVw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubwsat)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.w=vsub(Vuu32.w,Vvv32.w):sat
    C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vsub_WwWw_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv)
@@ -3163,9 +2592,7 @@
    ========================================================================== */
 
 #define Q6_Ww_vsub_WwWw_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubwsat_dv)(Vuu,Vvv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32=vswap(Qt4,Vu32,Vv32)
    C Intrinsic Prototype: HVX_VectorPair Q6_W_vswap_QVV(HVX_VectorPred Qt, HVX_Vector Vu, HVX_Vector Vv)
@@ -3174,9 +2601,7 @@
    ========================================================================== */
 
 #define Q6_W_vswap_QVV(Qt,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vswap)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1),Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.h=vtmpy(Vuu32.b,Rt32.b)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vtmpy_WbRb(HVX_VectorPair Vuu, Word32 Rt)
@@ -3185,9 +2610,7 @@
    ========================================================================== */
 
 #define Q6_Wh_vtmpy_WbRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpyb)(Vuu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vxx32.h+=vtmpy(Vuu32.b,Rt32.b)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vtmpyacc_WhWbRb(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt)
@@ -3196,9 +2619,7 @@
    ========================================================================== */
 
 #define Q6_Wh_vtmpyacc_WhWbRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpyb_acc)(Vxx,Vuu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.h=vtmpy(Vuu32.ub,Rt32.b)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vtmpy_WubRb(HVX_VectorPair Vuu, Word32 Rt)
@@ -3207,9 +2628,7 @@
    ========================================================================== */
 
 #define Q6_Wh_vtmpy_WubRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpybus)(Vuu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vxx32.h+=vtmpy(Vuu32.ub,Rt32.b)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vtmpyacc_WhWubRb(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt)
@@ -3218,9 +2637,7 @@
    ========================================================================== */
 
 #define Q6_Wh_vtmpyacc_WhWubRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpybus_acc)(Vxx,Vuu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.w=vtmpy(Vuu32.h,Rt32.b)
    C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vtmpy_WhRb(HVX_VectorPair Vuu, Word32 Rt)
@@ -3229,9 +2646,7 @@
    ========================================================================== */
 
 #define Q6_Ww_vtmpy_WhRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpyhb)(Vuu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vxx32.w+=vtmpy(Vuu32.h,Rt32.b)
    C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vtmpyacc_WwWhRb(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt)
@@ -3240,9 +2655,7 @@
    ========================================================================== */
 
 #define Q6_Ww_vtmpyacc_WwWhRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpyhb_acc)(Vxx,Vuu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.h=vunpack(Vu32.b)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vunpack_Vb(HVX_Vector Vu)
@@ -3251,9 +2664,7 @@
    ========================================================================== */
 
 #define Q6_Wh_vunpack_Vb(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackb)(Vu)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.w=vunpack(Vu32.h)
    C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vunpack_Vh(HVX_Vector Vu)
@@ -3262,9 +2673,7 @@
    ========================================================================== */
 
 #define Q6_Ww_vunpack_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackh)(Vu)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vxx32.h|=vunpacko(Vu32.b)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vunpackoor_WhVb(HVX_VectorPair Vxx, HVX_Vector Vu)
@@ -3273,9 +2682,7 @@
    ========================================================================== */
 
 #define Q6_Wh_vunpackoor_WhVb(Vxx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackob)(Vxx,Vu)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vxx32.w|=vunpacko(Vu32.h)
    C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vunpackoor_WwVh(HVX_VectorPair Vxx, HVX_Vector Vu)
@@ -3284,9 +2691,7 @@
    ========================================================================== */
 
 #define Q6_Ww_vunpackoor_WwVh(Vxx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackoh)(Vxx,Vu)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.uh=vunpack(Vu32.ub)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wuh_vunpack_Vub(HVX_Vector Vu)
@@ -3295,9 +2700,7 @@
    ========================================================================== */
 
 #define Q6_Wuh_vunpack_Vub(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackub)(Vu)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.uw=vunpack(Vu32.uh)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vunpack_Vuh(HVX_Vector Vu)
@@ -3306,9 +2709,7 @@
    ========================================================================== */
 
 #define Q6_Wuw_vunpack_Vuh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackuh)(Vu)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vd32=vxor(Vu32,Vv32)
    C Intrinsic Prototype: HVX_Vector Q6_V_vxor_VV(HVX_Vector Vu, HVX_Vector Vv)
@@ -3317,9 +2718,7 @@
    ========================================================================== */
 
 #define Q6_V_vxor_VV(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vxor)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.uh=vzxt(Vu32.ub)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wuh_vzxt_Vub(HVX_Vector Vu)
@@ -3328,9 +2727,7 @@
    ========================================================================== */
 
 #define Q6_Wuh_vzxt_Vub(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vzb)(Vu)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
-#if __HVX_ARCH__ >= 60
 /* ==========================================================================
    Assembly Syntax:       Vdd32.uw=vzxt(Vu32.uh)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vzxt_Vuh(HVX_Vector Vu)
@@ -3339,7 +2736,6 @@
    ========================================================================== */
 
 #define Q6_Wuw_vzxt_Vuh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vzh)(Vu)
-#endif /* __HEXAGON_ARCH___ >= 60 */
 
 #if __HVX_ARCH__ >= 62
 /* ==========================================================================
@@ -3350,7 +2746,7 @@
    ========================================================================== */
 
 #define Q6_Vb_vsplat_R(Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_lvsplatb)(Rt)
-#endif /* __HEXAGON_ARCH___ >= 62 */
+#endif
 
 #if __HVX_ARCH__ >= 62
 /* ==========================================================================
@@ -3361,7 +2757,7 @@
    ========================================================================== */
 
 #define Q6_Vh_vsplat_R(Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_lvsplath)(Rt)
-#endif /* __HEXAGON_ARCH___ >= 62 */
+#endif
 
 #if __HVX_ARCH__ >= 62
 /* ==========================================================================
@@ -3372,7 +2768,7 @@
    ========================================================================== */
 
 #define Q6_Q_vsetq2_R(Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_scalar2v2)(Rt)),-1)
-#endif /* __HEXAGON_ARCH___ >= 62 */
+#endif
 
 #if __HVX_ARCH__ >= 62
 /* ==========================================================================
@@ -3383,7 +2779,7 @@
    ========================================================================== */
 
 #define Q6_Qb_vshuffe_QhQh(Qs,Qt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_shuffeqh)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1))),-1)
-#endif /* __HEXAGON_ARCH___ >= 62 */
+#endif
 
 #if __HVX_ARCH__ >= 62
 /* ==========================================================================
@@ -3394,7 +2790,7 @@
    ========================================================================== */
 
 #define Q6_Qh_vshuffe_QwQw(Qs,Qt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_shuffeqw)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1))),-1)
-#endif /* __HEXAGON_ARCH___ >= 62 */
+#endif
 
 #if __HVX_ARCH__ >= 62
 /* ==========================================================================
@@ -3405,7 +2801,7 @@
    ========================================================================== */
 
 #define Q6_Vb_vadd_VbVb_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddbsat)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 62 */
+#endif
 
 #if __HVX_ARCH__ >= 62
 /* ==========================================================================
@@ -3416,7 +2812,7 @@
    ========================================================================== */
 
 #define Q6_Wb_vadd_WbWb_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddbsat_dv)(Vuu,Vvv)
-#endif /* __HEXAGON_ARCH___ >= 62 */
+#endif
 
 #if __HVX_ARCH__ >= 62
 /* ==========================================================================
@@ -3427,7 +2823,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vadd_VwVwQ_carry(Vu,Vv,Qx) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddcarry)(Vu,Vv,Qx)
-#endif /* __HEXAGON_ARCH___ >= 62 */
+#endif
 
 #if __HVX_ARCH__ >= 62
 /* ==========================================================================
@@ -3438,7 +2834,7 @@
    ========================================================================== */
 
 #define Q6_Vh_vadd_vclb_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddclbh)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 62 */
+#endif
 
 #if __HVX_ARCH__ >= 62
 /* ==========================================================================
@@ -3449,7 +2845,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vadd_vclb_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddclbw)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 62 */
+#endif
 
 #if __HVX_ARCH__ >= 62
 /* ==========================================================================
@@ -3460,7 +2856,7 @@
    ========================================================================== */
 
 #define Q6_Ww_vaddacc_WwVhVh(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhw_acc)(Vxx,Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 62 */
+#endif
 
 #if __HVX_ARCH__ >= 62
 /* ==========================================================================
@@ -3471,7 +2867,7 @@
    ========================================================================== */
 
 #define Q6_Wh_vaddacc_WhVubVub(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddubh_acc)(Vxx,Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 62 */
+#endif
 
 #if __HVX_ARCH__ >= 62
 /* ==========================================================================
@@ -3482,7 +2878,7 @@
    ========================================================================== */
 
 #define Q6_Vub_vadd_VubVb_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddububb_sat)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 62 */
+#endif
 
 #if __HVX_ARCH__ >= 62
 /* ==========================================================================
@@ -3493,7 +2889,7 @@
    ========================================================================== */
 
 #define Q6_Ww_vaddacc_WwVuhVuh(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduhw_acc)(Vxx,Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 62 */
+#endif
 
 #if __HVX_ARCH__ >= 62
 /* ==========================================================================
@@ -3504,7 +2900,7 @@
    ========================================================================== */
 
 #define Q6_Vuw_vadd_VuwVuw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduwsat)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 62 */
+#endif
 
 #if __HVX_ARCH__ >= 62
 /* ==========================================================================
@@ -3515,7 +2911,7 @@
    ========================================================================== */
 
 #define Q6_Wuw_vadd_WuwWuw_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduwsat_dv)(Vuu,Vvv)
-#endif /* __HEXAGON_ARCH___ >= 62 */
+#endif
 
 #if __HVX_ARCH__ >= 62
 /* ==========================================================================
@@ -3526,7 +2922,7 @@
    ========================================================================== */
 
 #define Q6_V_vand_QnR(Qu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandnqrt)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qu),-1),Rt)
-#endif /* __HEXAGON_ARCH___ >= 62 */
+#endif
 
 #if __HVX_ARCH__ >= 62
 /* ==========================================================================
@@ -3537,7 +2933,7 @@
    ========================================================================== */
 
 #define Q6_V_vandor_VQnR(Vx,Qu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandnqrt_acc)(Vx,__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qu),-1),Rt)
-#endif /* __HEXAGON_ARCH___ >= 62 */
+#endif
 
 #if __HVX_ARCH__ >= 62
 /* ==========================================================================
@@ -3548,7 +2944,7 @@
    ========================================================================== */
 
 #define Q6_V_vand_QnV(Qv,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvnqv)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vu)
-#endif /* __HEXAGON_ARCH___ >= 62 */
+#endif
 
 #if __HVX_ARCH__ >= 62
 /* ==========================================================================
@@ -3559,7 +2955,7 @@
    ========================================================================== */
 
 #define Q6_V_vand_QV(Qv,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvqv)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vu)
-#endif /* __HEXAGON_ARCH___ >= 62 */
+#endif
 
 #if __HVX_ARCH__ >= 62
 /* ==========================================================================
@@ -3570,7 +2966,7 @@
    ========================================================================== */
 
 #define Q6_Vb_vasr_VhVhR_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhbsat)(Vu,Vv,Rt)
-#endif /* __HEXAGON_ARCH___ >= 62 */
+#endif
 
 #if __HVX_ARCH__ >= 62
 /* ==========================================================================
@@ -3581,7 +2977,7 @@
    ========================================================================== */
 
 #define Q6_Vuh_vasr_VuwVuwR_rnd_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasruwuhrndsat)(Vu,Vv,Rt)
-#endif /* __HEXAGON_ARCH___ >= 62 */
+#endif
 
 #if __HVX_ARCH__ >= 62
 /* ==========================================================================
@@ -3592,7 +2988,7 @@
    ========================================================================== */
 
 #define Q6_Vuh_vasr_VwVwR_rnd_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwuhrndsat)(Vu,Vv,Rt)
-#endif /* __HEXAGON_ARCH___ >= 62 */
+#endif
 
 #if __HVX_ARCH__ >= 62
 /* ==========================================================================
@@ -3603,7 +2999,7 @@
    ========================================================================== */
 
 #define Q6_Vub_vlsr_VubR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrb)(Vu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 62 */
+#endif
 
 #if __HVX_ARCH__ >= 62
 /* ==========================================================================
@@ -3614,7 +3010,7 @@
    ========================================================================== */
 
 #define Q6_Vb_vlut32_VbVbR_nomatch(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvb_nm)(Vu,Vv,Rt)
-#endif /* __HEXAGON_ARCH___ >= 62 */
+#endif
 
 #if __HVX_ARCH__ >= 62
 /* ==========================================================================
@@ -3625,7 +3021,7 @@
    ========================================================================== */
 
 #define Q6_Vb_vlut32or_VbVbVbI(Vx,Vu,Vv,Iu3) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvb_oracci)(Vx,Vu,Vv,Iu3)
-#endif /* __HEXAGON_ARCH___ >= 62 */
+#endif
 
 #if __HVX_ARCH__ >= 62
 /* ==========================================================================
@@ -3636,7 +3032,7 @@
    ========================================================================== */
 
 #define Q6_Vb_vlut32_VbVbI(Vu,Vv,Iu3) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvbi)(Vu,Vv,Iu3)
-#endif /* __HEXAGON_ARCH___ >= 62 */
+#endif
 
 #if __HVX_ARCH__ >= 62
 /* ==========================================================================
@@ -3647,7 +3043,7 @@
    ========================================================================== */
 
 #define Q6_Wh_vlut16_VbVhR_nomatch(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwh_nm)(Vu,Vv,Rt)
-#endif /* __HEXAGON_ARCH___ >= 62 */
+#endif
 
 #if __HVX_ARCH__ >= 62
 /* ==========================================================================
@@ -3658,7 +3054,7 @@
    ========================================================================== */
 
 #define Q6_Wh_vlut16or_WhVbVhI(Vxx,Vu,Vv,Iu3) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwh_oracci)(Vxx,Vu,Vv,Iu3)
-#endif /* __HEXAGON_ARCH___ >= 62 */
+#endif
 
 #if __HVX_ARCH__ >= 62
 /* ==========================================================================
@@ -3669,7 +3065,7 @@
    ========================================================================== */
 
 #define Q6_Wh_vlut16_VbVhI(Vu,Vv,Iu3) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwhi)(Vu,Vv,Iu3)
-#endif /* __HEXAGON_ARCH___ >= 62 */
+#endif
 
 #if __HVX_ARCH__ >= 62
 /* ==========================================================================
@@ -3680,7 +3076,7 @@
    ========================================================================== */
 
 #define Q6_Vb_vmax_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxb)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 62 */
+#endif
 
 #if __HVX_ARCH__ >= 62
 /* ==========================================================================
@@ -3691,7 +3087,7 @@
    ========================================================================== */
 
 #define Q6_Vb_vmin_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminb)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 62 */
+#endif
 
 #if __HVX_ARCH__ >= 62
 /* ==========================================================================
@@ -3702,7 +3098,7 @@
    ========================================================================== */
 
 #define Q6_Ww_vmpa_WuhRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpauhb)(Vuu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 62 */
+#endif
 
 #if __HVX_ARCH__ >= 62
 /* ==========================================================================
@@ -3713,7 +3109,7 @@
    ========================================================================== */
 
 #define Q6_Ww_vmpaacc_WwWuhRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpauhb_acc)(Vxx,Vuu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 62 */
+#endif
 
 #if __HVX_ARCH__ >= 62
 /* ==========================================================================
@@ -3724,7 +3120,7 @@
    ========================================================================== */
 
 #define Q6_W_vmpye_VwVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyewuh_64)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 62 */
+#endif
 
 #if __HVX_ARCH__ >= 62
 /* ==========================================================================
@@ -3735,7 +3131,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vmpyi_VwRub(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwub)(Vu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 62 */
+#endif
 
 #if __HVX_ARCH__ >= 62
 /* ==========================================================================
@@ -3746,7 +3142,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vmpyiacc_VwVwRub(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwub_acc)(Vx,Vu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 62 */
+#endif
 
 #if __HVX_ARCH__ >= 62
 /* ==========================================================================
@@ -3757,7 +3153,7 @@
    ========================================================================== */
 
 #define Q6_W_vmpyoacc_WVwVh(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh_64_acc)(Vxx,Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 62 */
+#endif
 
 #if __HVX_ARCH__ >= 62
 /* ==========================================================================
@@ -3768,7 +3164,7 @@
    ========================================================================== */
 
 #define Q6_Vub_vround_VuhVuh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrounduhub)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 62 */
+#endif
 
 #if __HVX_ARCH__ >= 62
 /* ==========================================================================
@@ -3779,7 +3175,7 @@
    ========================================================================== */
 
 #define Q6_Vuh_vround_VuwVuw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrounduwuh)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 62 */
+#endif
 
 #if __HVX_ARCH__ >= 62
 /* ==========================================================================
@@ -3790,7 +3186,7 @@
    ========================================================================== */
 
 #define Q6_Vuh_vsat_VuwVuw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsatuwuh)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 62 */
+#endif
 
 #if __HVX_ARCH__ >= 62
 /* ==========================================================================
@@ -3801,7 +3197,7 @@
    ========================================================================== */
 
 #define Q6_Vb_vsub_VbVb_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubbsat)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 62 */
+#endif
 
 #if __HVX_ARCH__ >= 62
 /* ==========================================================================
@@ -3812,7 +3208,7 @@
    ========================================================================== */
 
 #define Q6_Wb_vsub_WbWb_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubbsat_dv)(Vuu,Vvv)
-#endif /* __HEXAGON_ARCH___ >= 62 */
+#endif
 
 #if __HVX_ARCH__ >= 62
 /* ==========================================================================
@@ -3823,7 +3219,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vsub_VwVwQ_carry(Vu,Vv,Qx) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubcarry)(Vu,Vv,Qx)
-#endif /* __HEXAGON_ARCH___ >= 62 */
+#endif
 
 #if __HVX_ARCH__ >= 62
 /* ==========================================================================
@@ -3834,7 +3230,7 @@
    ========================================================================== */
 
 #define Q6_Vub_vsub_VubVb_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubububb_sat)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 62 */
+#endif
 
 #if __HVX_ARCH__ >= 62
 /* ==========================================================================
@@ -3845,7 +3241,7 @@
    ========================================================================== */
 
 #define Q6_Vuw_vsub_VuwVuw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuwsat)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 62 */
+#endif
 
 #if __HVX_ARCH__ >= 62
 /* ==========================================================================
@@ -3856,7 +3252,7 @@
    ========================================================================== */
 
 #define Q6_Wuw_vsub_WuwWuw_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuwsat_dv)(Vuu,Vvv)
-#endif /* __HEXAGON_ARCH___ >= 62 */
+#endif
 
 #if __HVX_ARCH__ >= 65
 /* ==========================================================================
@@ -3867,7 +3263,7 @@
    ========================================================================== */
 
 #define Q6_Vb_vabs_Vb(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsb)(Vu)
-#endif /* __HEXAGON_ARCH___ >= 65 */
+#endif
 
 #if __HVX_ARCH__ >= 65
 /* ==========================================================================
@@ -3878,7 +3274,7 @@
    ========================================================================== */
 
 #define Q6_Vb_vabs_Vb_sat(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsb_sat)(Vu)
-#endif /* __HEXAGON_ARCH___ >= 65 */
+#endif
 
 #if __HVX_ARCH__ >= 65
 /* ==========================================================================
@@ -3889,7 +3285,7 @@
    ========================================================================== */
 
 #define Q6_Vh_vaslacc_VhVhR(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslh_acc)(Vx,Vu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 65 */
+#endif
 
 #if __HVX_ARCH__ >= 65
 /* ==========================================================================
@@ -3900,7 +3296,7 @@
    ========================================================================== */
 
 #define Q6_Vh_vasracc_VhVhR(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrh_acc)(Vx,Vu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 65 */
+#endif
 
 #if __HVX_ARCH__ >= 65
 /* ==========================================================================
@@ -3911,7 +3307,7 @@
    ========================================================================== */
 
 #define Q6_Vub_vasr_VuhVuhR_rnd_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasruhubrndsat)(Vu,Vv,Rt)
-#endif /* __HEXAGON_ARCH___ >= 65 */
+#endif
 
 #if __HVX_ARCH__ >= 65
 /* ==========================================================================
@@ -3922,7 +3318,7 @@
    ========================================================================== */
 
 #define Q6_Vub_vasr_VuhVuhR_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasruhubsat)(Vu,Vv,Rt)
-#endif /* __HEXAGON_ARCH___ >= 65 */
+#endif
 
 #if __HVX_ARCH__ >= 65
 /* ==========================================================================
@@ -3933,7 +3329,7 @@
    ========================================================================== */
 
 #define Q6_Vuh_vasr_VuwVuwR_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasruwuhsat)(Vu,Vv,Rt)
-#endif /* __HEXAGON_ARCH___ >= 65 */
+#endif
 
 #if __HVX_ARCH__ >= 65
 /* ==========================================================================
@@ -3944,7 +3340,7 @@
    ========================================================================== */
 
 #define Q6_Vb_vavg_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgb)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 65 */
+#endif
 
 #if __HVX_ARCH__ >= 65
 /* ==========================================================================
@@ -3955,7 +3351,7 @@
    ========================================================================== */
 
 #define Q6_Vb_vavg_VbVb_rnd(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgbrnd)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 65 */
+#endif
 
 #if __HVX_ARCH__ >= 65
 /* ==========================================================================
@@ -3966,7 +3362,7 @@
    ========================================================================== */
 
 #define Q6_Vuw_vavg_VuwVuw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavguw)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 65 */
+#endif
 
 #if __HVX_ARCH__ >= 65
 /* ==========================================================================
@@ -3977,7 +3373,7 @@
    ========================================================================== */
 
 #define Q6_Vuw_vavg_VuwVuw_rnd(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavguwrnd)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 65 */
+#endif
 
 #if __HVX_ARCH__ >= 65
 /* ==========================================================================
@@ -3988,7 +3384,7 @@
    ========================================================================== */
 
 #define Q6_W_vzero() __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdd0)()
-#endif /* __HEXAGON_ARCH___ >= 65 */
+#endif
 
 #if __HVX_ARCH__ >= 65
 /* ==========================================================================
@@ -3999,7 +3395,7 @@
    ========================================================================== */
 
 #define Q6_vgather_ARMVh(Rs,Rt,Mu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermh)(Rs,Rt,Mu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 65 */
+#endif
 
 #if __HVX_ARCH__ >= 65
 /* ==========================================================================
@@ -4010,7 +3406,7 @@
    ========================================================================== */
 
 #define Q6_vgather_AQRMVh(Rs,Qs,Rt,Mu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermhq)(Rs,__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),Rt,Mu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 65 */
+#endif
 
 #if __HVX_ARCH__ >= 65
 /* ==========================================================================
@@ -4021,7 +3417,7 @@
    ========================================================================== */
 
 #define Q6_vgather_ARMWw(Rs,Rt,Mu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermhw)(Rs,Rt,Mu,Vvv)
-#endif /* __HEXAGON_ARCH___ >= 65 */
+#endif
 
 #if __HVX_ARCH__ >= 65
 /* ==========================================================================
@@ -4032,7 +3428,7 @@
    ========================================================================== */
 
 #define Q6_vgather_AQRMWw(Rs,Qs,Rt,Mu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermhwq)(Rs,__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),Rt,Mu,Vvv)
-#endif /* __HEXAGON_ARCH___ >= 65 */
+#endif
 
 #if __HVX_ARCH__ >= 65
 /* ==========================================================================
@@ -4043,7 +3439,7 @@
    ========================================================================== */
 
 #define Q6_vgather_ARMVw(Rs,Rt,Mu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermw)(Rs,Rt,Mu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 65 */
+#endif
 
 #if __HVX_ARCH__ >= 65
 /* ==========================================================================
@@ -4054,7 +3450,7 @@
    ========================================================================== */
 
 #define Q6_vgather_AQRMVw(Rs,Qs,Rt,Mu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermwq)(Rs,__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),Rt,Mu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 65 */
+#endif
 
 #if __HVX_ARCH__ >= 65
 /* ==========================================================================
@@ -4065,7 +3461,7 @@
    ========================================================================== */
 
 #define Q6_Vh_vlut4_VuhPh(Vu,Rtt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlut4)(Vu,Rtt)
-#endif /* __HEXAGON_ARCH___ >= 65 */
+#endif
 
 #if __HVX_ARCH__ >= 65
 /* ==========================================================================
@@ -4076,7 +3472,7 @@
    ========================================================================== */
 
 #define Q6_Wh_vmpa_WubRub(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabuu)(Vuu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 65 */
+#endif
 
 #if __HVX_ARCH__ >= 65
 /* ==========================================================================
@@ -4087,7 +3483,7 @@
    ========================================================================== */
 
 #define Q6_Wh_vmpaacc_WhWubRub(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabuu_acc)(Vxx,Vuu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 65 */
+#endif
 
 #if __HVX_ARCH__ >= 65
 /* ==========================================================================
@@ -4098,7 +3494,7 @@
    ========================================================================== */
 
 #define Q6_Vh_vmpa_VhVhVhPh_sat(Vx,Vu,Rtt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpahhsat)(Vx,Vu,Rtt)
-#endif /* __HEXAGON_ARCH___ >= 65 */
+#endif
 
 #if __HVX_ARCH__ >= 65
 /* ==========================================================================
@@ -4109,7 +3505,7 @@
    ========================================================================== */
 
 #define Q6_Vh_vmpa_VhVhVuhPuh_sat(Vx,Vu,Rtt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpauhuhsat)(Vx,Vu,Rtt)
-#endif /* __HEXAGON_ARCH___ >= 65 */
+#endif
 
 #if __HVX_ARCH__ >= 65
 /* ==========================================================================
@@ -4120,7 +3516,7 @@
    ========================================================================== */
 
 #define Q6_Vh_vmps_VhVhVuhPuh_sat(Vx,Vu,Rtt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpsuhuhsat)(Vx,Vu,Rtt)
-#endif /* __HEXAGON_ARCH___ >= 65 */
+#endif
 
 #if __HVX_ARCH__ >= 65
 /* ==========================================================================
@@ -4131,7 +3527,7 @@
    ========================================================================== */
 
 #define Q6_Ww_vmpyacc_WwVhRh(Vxx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyh_acc)(Vxx,Vu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 65 */
+#endif
 
 #if __HVX_ARCH__ >= 65
 /* ==========================================================================
@@ -4142,7 +3538,7 @@
    ========================================================================== */
 
 #define Q6_Vuw_vmpye_VuhRuh(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuhe)(Vu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 65 */
+#endif
 
 #if __HVX_ARCH__ >= 65
 /* ==========================================================================
@@ -4153,7 +3549,7 @@
    ========================================================================== */
 
 #define Q6_Vuw_vmpyeacc_VuwVuhRuh(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuhe_acc)(Vx,Vu,Rt)
-#endif /* __HEXAGON_ARCH___ >= 65 */
+#endif
 
 #if __HVX_ARCH__ >= 65
 /* ==========================================================================
@@ -4164,7 +3560,7 @@
    ========================================================================== */
 
 #define Q6_Vb_vnavg_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnavgb)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 65 */
+#endif
 
 #if __HVX_ARCH__ >= 65
 /* ==========================================================================
@@ -4175,7 +3571,7 @@
    ========================================================================== */
 
 #define Q6_Vb_prefixsum_Q(Qv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vprefixqb)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1))
-#endif /* __HEXAGON_ARCH___ >= 65 */
+#endif
 
 #if __HVX_ARCH__ >= 65
 /* ==========================================================================
@@ -4186,7 +3582,7 @@
    ========================================================================== */
 
 #define Q6_Vh_prefixsum_Q(Qv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vprefixqh)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1))
-#endif /* __HEXAGON_ARCH___ >= 65 */
+#endif
 
 #if __HVX_ARCH__ >= 65
 /* ==========================================================================
@@ -4197,7 +3593,7 @@
    ========================================================================== */
 
 #define Q6_Vw_prefixsum_Q(Qv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vprefixqw)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1))
-#endif /* __HEXAGON_ARCH___ >= 65 */
+#endif
 
 #if __HVX_ARCH__ >= 65
 /* ==========================================================================
@@ -4208,7 +3604,7 @@
    ========================================================================== */
 
 #define Q6_vscatter_RMVhV(Rt,Mu,Vv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermh)(Rt,Mu,Vv,Vw)
-#endif /* __HEXAGON_ARCH___ >= 65 */
+#endif
 
 #if __HVX_ARCH__ >= 65
 /* ==========================================================================
@@ -4219,7 +3615,7 @@
    ========================================================================== */
 
 #define Q6_vscatteracc_RMVhV(Rt,Mu,Vv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermh_add)(Rt,Mu,Vv,Vw)
-#endif /* __HEXAGON_ARCH___ >= 65 */
+#endif
 
 #if __HVX_ARCH__ >= 65
 /* ==========================================================================
@@ -4230,7 +3626,7 @@
    ========================================================================== */
 
 #define Q6_vscatter_QRMVhV(Qs,Rt,Mu,Vv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermhq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),Rt,Mu,Vv,Vw)
-#endif /* __HEXAGON_ARCH___ >= 65 */
+#endif
 
 #if __HVX_ARCH__ >= 65
 /* ==========================================================================
@@ -4241,7 +3637,7 @@
    ========================================================================== */
 
 #define Q6_vscatter_RMWwV(Rt,Mu,Vvv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermhw)(Rt,Mu,Vvv,Vw)
-#endif /* __HEXAGON_ARCH___ >= 65 */
+#endif
 
 #if __HVX_ARCH__ >= 65
 /* ==========================================================================
@@ -4252,7 +3648,7 @@
    ========================================================================== */
 
 #define Q6_vscatteracc_RMWwV(Rt,Mu,Vvv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermhw_add)(Rt,Mu,Vvv,Vw)
-#endif /* __HEXAGON_ARCH___ >= 65 */
+#endif
 
 #if __HVX_ARCH__ >= 65
 /* ==========================================================================
@@ -4263,7 +3659,7 @@
    ========================================================================== */
 
 #define Q6_vscatter_QRMWwV(Qs,Rt,Mu,Vvv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermhwq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),Rt,Mu,Vvv,Vw)
-#endif /* __HEXAGON_ARCH___ >= 65 */
+#endif
 
 #if __HVX_ARCH__ >= 65
 /* ==========================================================================
@@ -4274,7 +3670,7 @@
    ========================================================================== */
 
 #define Q6_vscatter_RMVwV(Rt,Mu,Vv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermw)(Rt,Mu,Vv,Vw)
-#endif /* __HEXAGON_ARCH___ >= 65 */
+#endif
 
 #if __HVX_ARCH__ >= 65
 /* ==========================================================================
@@ -4285,7 +3681,7 @@
    ========================================================================== */
 
 #define Q6_vscatteracc_RMVwV(Rt,Mu,Vv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermw_add)(Rt,Mu,Vv,Vw)
-#endif /* __HEXAGON_ARCH___ >= 65 */
+#endif
 
 #if __HVX_ARCH__ >= 65
 /* ==========================================================================
@@ -4296,7 +3692,7 @@
    ========================================================================== */
 
 #define Q6_vscatter_QRMVwV(Qs,Rt,Mu,Vv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermwq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),Rt,Mu,Vv,Vw)
-#endif /* __HEXAGON_ARCH___ >= 65 */
+#endif
 
 #if __HVX_ARCH__ >= 66
 /* ==========================================================================
@@ -4307,7 +3703,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vadd_VwVwQ_carry_sat(Vu,Vv,Qs) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddcarrysat)(Vu,Vv,__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1))
-#endif /* __HEXAGON_ARCH___ >= 66 */
+#endif
 
 #if __HVX_ARCH__ >= 66
 /* ==========================================================================
@@ -4318,7 +3714,7 @@
    ========================================================================== */
 
 #define Q6_Ww_vasrinto_WwVwVw(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasr_into)(Vxx,Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 66 */
+#endif
 
 #if __HVX_ARCH__ >= 66
 /* ==========================================================================
@@ -4329,7 +3725,7 @@
    ========================================================================== */
 
 #define Q6_Vuw_vrotr_VuwVuw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrotr)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 66 */
+#endif
 
 #if __HVX_ARCH__ >= 66
 /* ==========================================================================
@@ -4340,7 +3736,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vsatdw_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsatdw)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 66 */
+#endif
 
 #if __HVX_ARCH__ >= 68
 /* ==========================================================================
@@ -4351,7 +3747,7 @@
    ========================================================================== */
 
 #define Q6_Ww_v6mpy_WubWbI_h(Vuu,Vvv,Iu2) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_v6mpyhubs10)(Vuu,Vvv,Iu2)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
 #if __HVX_ARCH__ >= 68
 /* ==========================================================================
@@ -4362,7 +3758,7 @@
    ========================================================================== */
 
 #define Q6_Ww_v6mpyacc_WwWubWbI_h(Vxx,Vuu,Vvv,Iu2) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_v6mpyhubs10_vxx)(Vxx,Vuu,Vvv,Iu2)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
 #if __HVX_ARCH__ >= 68
 /* ==========================================================================
@@ -4373,7 +3769,7 @@
    ========================================================================== */
 
 #define Q6_Ww_v6mpy_WubWbI_v(Vuu,Vvv,Iu2) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_v6mpyvubs10)(Vuu,Vvv,Iu2)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
 #if __HVX_ARCH__ >= 68
 /* ==========================================================================
@@ -4384,9 +3780,9 @@
    ========================================================================== */
 
 #define Q6_Ww_v6mpyacc_WwWubWbI_v(Vxx,Vuu,Vvv,Iu2) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_v6mpyvubs10_vxx)(Vxx,Vuu,Vvv,Iu2)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
-#if __HVX_ARCH__ >= 68
+#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vd32.hf=vabs(Vu32.hf)
    C Intrinsic Prototype: HVX_Vector Q6_Vhf_vabs_Vhf(HVX_Vector Vu)
@@ -4395,9 +3791,9 @@
    ========================================================================== */
 
 #define Q6_Vhf_vabs_Vhf(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabs_hf)(Vu)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
-#if __HVX_ARCH__ >= 68
+#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vd32.sf=vabs(Vu32.sf)
    C Intrinsic Prototype: HVX_Vector Q6_Vsf_vabs_Vsf(HVX_Vector Vu)
@@ -4406,7 +3802,7 @@
    ========================================================================== */
 
 #define Q6_Vsf_vabs_Vsf(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabs_sf)(Vu)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
 #if __HVX_ARCH__ >= 68
 /* ==========================================================================
@@ -4417,9 +3813,9 @@
    ========================================================================== */
 
 #define Q6_Vqf16_vadd_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_hf)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
-#if __HVX_ARCH__ >= 68
+#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vd32.hf=vadd(Vu32.hf,Vv32.hf)
    C Intrinsic Prototype: HVX_Vector Q6_Vhf_vadd_VhfVhf(HVX_Vector Vu, HVX_Vector Vv)
@@ -4428,7 +3824,7 @@
    ========================================================================== */
 
 #define Q6_Vhf_vadd_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_hf_hf)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
 #if __HVX_ARCH__ >= 68
 /* ==========================================================================
@@ -4439,7 +3835,7 @@
    ========================================================================== */
 
 #define Q6_Vqf16_vadd_Vqf16Vqf16(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_qf16)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
 #if __HVX_ARCH__ >= 68
 /* ==========================================================================
@@ -4450,7 +3846,7 @@
    ========================================================================== */
 
 #define Q6_Vqf16_vadd_Vqf16Vhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_qf16_mix)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
 #if __HVX_ARCH__ >= 68
 /* ==========================================================================
@@ -4461,7 +3857,7 @@
    ========================================================================== */
 
 #define Q6_Vqf32_vadd_Vqf32Vqf32(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_qf32)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
 #if __HVX_ARCH__ >= 68
 /* ==========================================================================
@@ -4472,7 +3868,7 @@
    ========================================================================== */
 
 #define Q6_Vqf32_vadd_Vqf32Vsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_qf32_mix)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
 #if __HVX_ARCH__ >= 68
 /* ==========================================================================
@@ -4483,9 +3879,9 @@
    ========================================================================== */
 
 #define Q6_Vqf32_vadd_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_sf)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
-#if __HVX_ARCH__ >= 68
+#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vdd32.sf=vadd(Vu32.hf,Vv32.hf)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vadd_VhfVhf(HVX_Vector Vu, HVX_Vector Vv)
@@ -4494,9 +3890,9 @@
    ========================================================================== */
 
 #define Q6_Wsf_vadd_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_sf_hf)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
-#if __HVX_ARCH__ >= 68
+#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vd32.sf=vadd(Vu32.sf,Vv32.sf)
    C Intrinsic Prototype: HVX_Vector Q6_Vsf_vadd_VsfVsf(HVX_Vector Vu, HVX_Vector Vv)
@@ -4505,9 +3901,9 @@
    ========================================================================== */
 
 #define Q6_Vsf_vadd_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_sf_sf)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
-#if __HVX_ARCH__ >= 68
+#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vd32.w=vfmv(Vu32.w)
    C Intrinsic Prototype: HVX_Vector Q6_Vw_vfmv_Vw(HVX_Vector Vu)
@@ -4516,7 +3912,7 @@
    ========================================================================== */
 
 #define Q6_Vw_vfmv_Vw(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vassign_fp)(Vu)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
 #if __HVX_ARCH__ >= 68
 /* ==========================================================================
@@ -4527,7 +3923,7 @@
    ========================================================================== */
 
 #define Q6_Vhf_equals_Vqf16(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_hf_qf16)(Vu)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
 #if __HVX_ARCH__ >= 68
 /* ==========================================================================
@@ -4538,7 +3934,7 @@
    ========================================================================== */
 
 #define Q6_Vhf_equals_Wqf32(Vuu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_hf_qf32)(Vuu)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
 #if __HVX_ARCH__ >= 68
 /* ==========================================================================
@@ -4549,9 +3945,9 @@
    ========================================================================== */
 
 #define Q6_Vsf_equals_Vqf32(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_sf_qf32)(Vu)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
-#if __HVX_ARCH__ >= 68
+#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vd32.b=vcvt(Vu32.hf,Vv32.hf)
    C Intrinsic Prototype: HVX_Vector Q6_Vb_vcvt_VhfVhf(HVX_Vector Vu, HVX_Vector Vv)
@@ -4560,9 +3956,9 @@
    ========================================================================== */
 
 #define Q6_Vb_vcvt_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_b_hf)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
-#if __HVX_ARCH__ >= 68
+#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vd32.h=vcvt(Vu32.hf)
    C Intrinsic Prototype: HVX_Vector Q6_Vh_vcvt_Vhf(HVX_Vector Vu)
@@ -4571,9 +3967,9 @@
    ========================================================================== */
 
 #define Q6_Vh_vcvt_Vhf(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_h_hf)(Vu)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
-#if __HVX_ARCH__ >= 68
+#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vdd32.hf=vcvt(Vu32.b)
    C Intrinsic Prototype: HVX_VectorPair Q6_Whf_vcvt_Vb(HVX_Vector Vu)
@@ -4582,9 +3978,9 @@
    ========================================================================== */
 
 #define Q6_Whf_vcvt_Vb(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_hf_b)(Vu)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
-#if __HVX_ARCH__ >= 68
+#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vd32.hf=vcvt(Vu32.h)
    C Intrinsic Prototype: HVX_Vector Q6_Vhf_vcvt_Vh(HVX_Vector Vu)
@@ -4593,9 +3989,9 @@
    ========================================================================== */
 
 #define Q6_Vhf_vcvt_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_hf_h)(Vu)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
-#if __HVX_ARCH__ >= 68
+#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vd32.hf=vcvt(Vu32.sf,Vv32.sf)
    C Intrinsic Prototype: HVX_Vector Q6_Vhf_vcvt_VsfVsf(HVX_Vector Vu, HVX_Vector Vv)
@@ -4604,9 +4000,9 @@
    ========================================================================== */
 
 #define Q6_Vhf_vcvt_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_hf_sf)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
-#if __HVX_ARCH__ >= 68
+#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vdd32.hf=vcvt(Vu32.ub)
    C Intrinsic Prototype: HVX_VectorPair Q6_Whf_vcvt_Vub(HVX_Vector Vu)
@@ -4615,9 +4011,9 @@
    ========================================================================== */
 
 #define Q6_Whf_vcvt_Vub(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_hf_ub)(Vu)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
-#if __HVX_ARCH__ >= 68
+#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vd32.hf=vcvt(Vu32.uh)
    C Intrinsic Prototype: HVX_Vector Q6_Vhf_vcvt_Vuh(HVX_Vector Vu)
@@ -4626,9 +4022,9 @@
    ========================================================================== */
 
 #define Q6_Vhf_vcvt_Vuh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_hf_uh)(Vu)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
-#if __HVX_ARCH__ >= 68
+#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vdd32.sf=vcvt(Vu32.hf)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vcvt_Vhf(HVX_Vector Vu)
@@ -4637,9 +4033,9 @@
    ========================================================================== */
 
 #define Q6_Wsf_vcvt_Vhf(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_sf_hf)(Vu)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
-#if __HVX_ARCH__ >= 68
+#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vd32.ub=vcvt(Vu32.hf,Vv32.hf)
    C Intrinsic Prototype: HVX_Vector Q6_Vub_vcvt_VhfVhf(HVX_Vector Vu, HVX_Vector Vv)
@@ -4648,9 +4044,9 @@
    ========================================================================== */
 
 #define Q6_Vub_vcvt_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_ub_hf)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
-#if __HVX_ARCH__ >= 68
+#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vd32.uh=vcvt(Vu32.hf)
    C Intrinsic Prototype: HVX_Vector Q6_Vuh_vcvt_Vhf(HVX_Vector Vu)
@@ -4659,9 +4055,9 @@
    ========================================================================== */
 
 #define Q6_Vuh_vcvt_Vhf(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_uh_hf)(Vu)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
-#if __HVX_ARCH__ >= 68
+#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vd32.sf=vdmpy(Vu32.hf,Vv32.hf)
    C Intrinsic Prototype: HVX_Vector Q6_Vsf_vdmpy_VhfVhf(HVX_Vector Vu, HVX_Vector Vv)
@@ -4670,9 +4066,9 @@
    ========================================================================== */
 
 #define Q6_Vsf_vdmpy_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpy_sf_hf)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
-#if __HVX_ARCH__ >= 68
+#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vx32.sf+=vdmpy(Vu32.hf,Vv32.hf)
    C Intrinsic Prototype: HVX_Vector Q6_Vsf_vdmpyacc_VsfVhfVhf(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv)
@@ -4681,9 +4077,9 @@
    ========================================================================== */
 
 #define Q6_Vsf_vdmpyacc_VsfVhfVhf(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpy_sf_hf_acc)(Vx,Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
-#if __HVX_ARCH__ >= 68
+#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vd32.hf=vfmax(Vu32.hf,Vv32.hf)
    C Intrinsic Prototype: HVX_Vector Q6_Vhf_vfmax_VhfVhf(HVX_Vector Vu, HVX_Vector Vv)
@@ -4692,9 +4088,9 @@
    ========================================================================== */
 
 #define Q6_Vhf_vfmax_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfmax_hf)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
-#if __HVX_ARCH__ >= 68
+#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vd32.sf=vfmax(Vu32.sf,Vv32.sf)
    C Intrinsic Prototype: HVX_Vector Q6_Vsf_vfmax_VsfVsf(HVX_Vector Vu, HVX_Vector Vv)
@@ -4703,9 +4099,9 @@
    ========================================================================== */
 
 #define Q6_Vsf_vfmax_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfmax_sf)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
-#if __HVX_ARCH__ >= 68
+#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vd32.hf=vfmin(Vu32.hf,Vv32.hf)
    C Intrinsic Prototype: HVX_Vector Q6_Vhf_vfmin_VhfVhf(HVX_Vector Vu, HVX_Vector Vv)
@@ -4714,9 +4110,9 @@
    ========================================================================== */
 
 #define Q6_Vhf_vfmin_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfmin_hf)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
-#if __HVX_ARCH__ >= 68
+#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vd32.sf=vfmin(Vu32.sf,Vv32.sf)
    C Intrinsic Prototype: HVX_Vector Q6_Vsf_vfmin_VsfVsf(HVX_Vector Vu, HVX_Vector Vv)
@@ -4725,9 +4121,9 @@
    ========================================================================== */
 
 #define Q6_Vsf_vfmin_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfmin_sf)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
-#if __HVX_ARCH__ >= 68
+#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vd32.hf=vfneg(Vu32.hf)
    C Intrinsic Prototype: HVX_Vector Q6_Vhf_vfneg_Vhf(HVX_Vector Vu)
@@ -4736,9 +4132,9 @@
    ========================================================================== */
 
 #define Q6_Vhf_vfneg_Vhf(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfneg_hf)(Vu)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
-#if __HVX_ARCH__ >= 68
+#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vd32.sf=vfneg(Vu32.sf)
    C Intrinsic Prototype: HVX_Vector Q6_Vsf_vfneg_Vsf(HVX_Vector Vu)
@@ -4747,7 +4143,7 @@
    ========================================================================== */
 
 #define Q6_Vsf_vfneg_Vsf(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfneg_sf)(Vu)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
 #if __HVX_ARCH__ >= 68
 /* ==========================================================================
@@ -4758,7 +4154,7 @@
    ========================================================================== */
 
 #define Q6_Q_vcmp_gt_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgthf)(Vu,Vv)),-1)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
 #if __HVX_ARCH__ >= 68
 /* ==========================================================================
@@ -4769,7 +4165,7 @@
    ========================================================================== */
 
 #define Q6_Q_vcmp_gtand_QVhfVhf(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgthf_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
 #if __HVX_ARCH__ >= 68
 /* ==========================================================================
@@ -4780,7 +4176,7 @@
    ========================================================================== */
 
 #define Q6_Q_vcmp_gtor_QVhfVhf(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgthf_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
 #if __HVX_ARCH__ >= 68
 /* ==========================================================================
@@ -4791,7 +4187,7 @@
    ========================================================================== */
 
 #define Q6_Q_vcmp_gtxacc_QVhfVhf(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgthf_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
 #if __HVX_ARCH__ >= 68
 /* ==========================================================================
@@ -4802,7 +4198,7 @@
    ========================================================================== */
 
 #define Q6_Q_vcmp_gt_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtsf)(Vu,Vv)),-1)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
 #if __HVX_ARCH__ >= 68
 /* ==========================================================================
@@ -4813,7 +4209,7 @@
    ========================================================================== */
 
 #define Q6_Q_vcmp_gtand_QVsfVsf(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtsf_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
 #if __HVX_ARCH__ >= 68
 /* ==========================================================================
@@ -4824,7 +4220,7 @@
    ========================================================================== */
 
 #define Q6_Q_vcmp_gtor_QVsfVsf(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtsf_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
 #if __HVX_ARCH__ >= 68
 /* ==========================================================================
@@ -4835,7 +4231,7 @@
    ========================================================================== */
 
 #define Q6_Q_vcmp_gtxacc_QVsfVsf(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtsf_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
 #if __HVX_ARCH__ >= 68
 /* ==========================================================================
@@ -4846,7 +4242,7 @@
    ========================================================================== */
 
 #define Q6_Vhf_vmax_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmax_hf)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
 #if __HVX_ARCH__ >= 68
 /* ==========================================================================
@@ -4857,7 +4253,7 @@
    ========================================================================== */
 
 #define Q6_Vsf_vmax_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmax_sf)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
 #if __HVX_ARCH__ >= 68
 /* ==========================================================================
@@ -4868,7 +4264,7 @@
    ========================================================================== */
 
 #define Q6_Vhf_vmin_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmin_hf)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
 #if __HVX_ARCH__ >= 68
 /* ==========================================================================
@@ -4879,9 +4275,9 @@
    ========================================================================== */
 
 #define Q6_Vsf_vmin_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmin_sf)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
-#if __HVX_ARCH__ >= 68
+#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vd32.hf=vmpy(Vu32.hf,Vv32.hf)
    C Intrinsic Prototype: HVX_Vector Q6_Vhf_vmpy_VhfVhf(HVX_Vector Vu, HVX_Vector Vv)
@@ -4890,9 +4286,9 @@
    ========================================================================== */
 
 #define Q6_Vhf_vmpy_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_hf_hf)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
-#if __HVX_ARCH__ >= 68
+#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vx32.hf+=vmpy(Vu32.hf,Vv32.hf)
    C Intrinsic Prototype: HVX_Vector Q6_Vhf_vmpyacc_VhfVhfVhf(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv)
@@ -4901,7 +4297,7 @@
    ========================================================================== */
 
 #define Q6_Vhf_vmpyacc_VhfVhfVhf(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_hf_hf_acc)(Vx,Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
 #if __HVX_ARCH__ >= 68
 /* ==========================================================================
@@ -4912,7 +4308,7 @@
    ========================================================================== */
 
 #define Q6_Vqf16_vmpy_Vqf16Vqf16(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf16)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
 #if __HVX_ARCH__ >= 68
 /* ==========================================================================
@@ -4923,7 +4319,7 @@
    ========================================================================== */
 
 #define Q6_Vqf16_vmpy_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf16_hf)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
 #if __HVX_ARCH__ >= 68
 /* ==========================================================================
@@ -4934,7 +4330,7 @@
    ========================================================================== */
 
 #define Q6_Vqf16_vmpy_Vqf16Vhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf16_mix_hf)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
 #if __HVX_ARCH__ >= 68
 /* ==========================================================================
@@ -4945,7 +4341,7 @@
    ========================================================================== */
 
 #define Q6_Vqf32_vmpy_Vqf32Vqf32(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf32)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
 #if __HVX_ARCH__ >= 68
 /* ==========================================================================
@@ -4956,7 +4352,7 @@
    ========================================================================== */
 
 #define Q6_Wqf32_vmpy_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf32_hf)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
 #if __HVX_ARCH__ >= 68
 /* ==========================================================================
@@ -4967,7 +4363,7 @@
    ========================================================================== */
 
 #define Q6_Wqf32_vmpy_Vqf16Vhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf32_mix_hf)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
 #if __HVX_ARCH__ >= 68
 /* ==========================================================================
@@ -4978,7 +4374,7 @@
    ========================================================================== */
 
 #define Q6_Wqf32_vmpy_Vqf16Vqf16(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf32_qf16)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
 #if __HVX_ARCH__ >= 68
 /* ==========================================================================
@@ -4989,9 +4385,9 @@
    ========================================================================== */
 
 #define Q6_Vqf32_vmpy_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf32_sf)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
-#if __HVX_ARCH__ >= 68
+#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vdd32.sf=vmpy(Vu32.hf,Vv32.hf)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vmpy_VhfVhf(HVX_Vector Vu, HVX_Vector Vv)
@@ -5000,9 +4396,9 @@
    ========================================================================== */
 
 #define Q6_Wsf_vmpy_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_sf_hf)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
-#if __HVX_ARCH__ >= 68
+#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vxx32.sf+=vmpy(Vu32.hf,Vv32.hf)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vmpyacc_WsfVhfVhf(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv)
@@ -5011,9 +4407,9 @@
    ========================================================================== */
 
 #define Q6_Wsf_vmpyacc_WsfVhfVhf(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_sf_hf_acc)(Vxx,Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
-#if __HVX_ARCH__ >= 68
+#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vd32.sf=vmpy(Vu32.sf,Vv32.sf)
    C Intrinsic Prototype: HVX_Vector Q6_Vsf_vmpy_VsfVsf(HVX_Vector Vu, HVX_Vector Vv)
@@ -5022,7 +4418,7 @@
    ========================================================================== */
 
 #define Q6_Vsf_vmpy_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_sf_sf)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
 #if __HVX_ARCH__ >= 68
 /* ==========================================================================
@@ -5033,9 +4429,9 @@
    ========================================================================== */
 
 #define Q6_Vqf16_vsub_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_hf)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
-#if __HVX_ARCH__ >= 68
+#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vd32.hf=vsub(Vu32.hf,Vv32.hf)
    C Intrinsic Prototype: HVX_Vector Q6_Vhf_vsub_VhfVhf(HVX_Vector Vu, HVX_Vector Vv)
@@ -5044,7 +4440,7 @@
    ========================================================================== */
 
 #define Q6_Vhf_vsub_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_hf_hf)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
 #if __HVX_ARCH__ >= 68
 /* ==========================================================================
@@ -5055,7 +4451,7 @@
    ========================================================================== */
 
 #define Q6_Vqf16_vsub_Vqf16Vqf16(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_qf16)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
 #if __HVX_ARCH__ >= 68
 /* ==========================================================================
@@ -5066,7 +4462,7 @@
    ========================================================================== */
 
 #define Q6_Vqf16_vsub_Vqf16Vhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_qf16_mix)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
 #if __HVX_ARCH__ >= 68
 /* ==========================================================================
@@ -5077,7 +4473,7 @@
    ========================================================================== */
 
 #define Q6_Vqf32_vsub_Vqf32Vqf32(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_qf32)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
 #if __HVX_ARCH__ >= 68
 /* ==========================================================================
@@ -5088,7 +4484,7 @@
    ========================================================================== */
 
 #define Q6_Vqf32_vsub_Vqf32Vsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_qf32_mix)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
 #if __HVX_ARCH__ >= 68
 /* ==========================================================================
@@ -5099,9 +4495,9 @@
    ========================================================================== */
 
 #define Q6_Vqf32_vsub_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_sf)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
-#if __HVX_ARCH__ >= 68
+#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vdd32.sf=vsub(Vu32.hf,Vv32.hf)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vsub_VhfVhf(HVX_Vector Vu, HVX_Vector Vv)
@@ -5110,9 +4506,9 @@
    ========================================================================== */
 
 #define Q6_Wsf_vsub_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_sf_hf)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
-#if __HVX_ARCH__ >= 68
+#if __HVX_ARCH__ >= 68 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vd32.sf=vsub(Vu32.sf,Vv32.sf)
    C Intrinsic Prototype: HVX_Vector Q6_Vsf_vsub_VsfVsf(HVX_Vector Vu, HVX_Vector Vv)
@@ -5121,7 +4517,7 @@
    ========================================================================== */
 
 #define Q6_Vsf_vsub_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_sf_sf)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 68 */
+#endif
 
 #if __HVX_ARCH__ >= 69
 /* ==========================================================================
@@ -5132,7 +4528,7 @@
    ========================================================================== */
 
 #define Q6_Vub_vasr_WuhVub_rnd_sat(Vuu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrvuhubrndsat)(Vuu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 69 */
+#endif
 
 #if __HVX_ARCH__ >= 69
 /* ==========================================================================
@@ -5143,7 +4539,7 @@
    ========================================================================== */
 
 #define Q6_Vub_vasr_WuhVub_sat(Vuu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrvuhubsat)(Vuu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 69 */
+#endif
 
 #if __HVX_ARCH__ >= 69
 /* ==========================================================================
@@ -5154,7 +4550,7 @@
    ========================================================================== */
 
 #define Q6_Vuh_vasr_WwVuh_rnd_sat(Vuu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrvwuhrndsat)(Vuu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 69 */
+#endif
 
 #if __HVX_ARCH__ >= 69
 /* ==========================================================================
@@ -5165,7 +4561,7 @@
    ========================================================================== */
 
 #define Q6_Vuh_vasr_WwVuh_sat(Vuu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrvwuhsat)(Vuu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 69 */
+#endif
 
 #if __HVX_ARCH__ >= 69
 /* ==========================================================================
@@ -5176,9 +4572,9 @@
    ========================================================================== */
 
 #define Q6_Vuh_vmpy_VuhVuh_rs16(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuhvs)(Vu,Vv)
-#endif /* __HEXAGON_ARCH___ >= 69 */
+#endif
 
-#if __HVX_ARCH__ >= 73
+#if __HVX_ARCH__ >= 73 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vdd32.sf=vadd(Vu32.bf,Vv32.bf)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vadd_VbfVbf(HVX_Vector Vu,
@@ -5187,7 +4583,7 @@
 
 #define Q6_Wsf_vadd_VbfVbf(Vu, Vv)                                             \
   __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_sf_bf)(Vu, Vv)
-#endif /* __HEXAGON_ARCH___ >= 73 */
+#endif
 
 #if __HVX_ARCH__ >= 73
 /* ==========================================================================
@@ -5199,7 +4595,7 @@
 
 #define Q6_Vh_equals_Vhf(Vu)                                                   \
   __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_h_hf)(Vu)
-#endif /* __HEXAGON_ARCH___ >= 73 */
+#endif
 
 #if __HVX_ARCH__ >= 73
 /* ==========================================================================
@@ -5211,7 +4607,7 @@
 
 #define Q6_Vhf_equals_Vh(Vu)                                                   \
   __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_hf_h)(Vu)
-#endif /* __HEXAGON_ARCH___ >= 73 */
+#endif
 
 #if __HVX_ARCH__ >= 73
 /* ==========================================================================
@@ -5223,7 +4619,7 @@
 
 #define Q6_Vsf_equals_Vw(Vu)                                                   \
   __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_sf_w)(Vu)
-#endif /* __HEXAGON_ARCH___ >= 73 */
+#endif
 
 #if __HVX_ARCH__ >= 73
 /* ==========================================================================
@@ -5235,9 +4631,9 @@
 
 #define Q6_Vw_equals_Vsf(Vu)                                                   \
   __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_w_sf)(Vu)
-#endif /* __HEXAGON_ARCH___ >= 73 */
+#endif
 
-#if __HVX_ARCH__ >= 73
+#if __HVX_ARCH__ >= 73 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vd32.bf=vcvt(Vu32.sf,Vv32.sf)
    C Intrinsic Prototype: HVX_Vector Q6_Vbf_vcvt_VsfVsf(HVX_Vector Vu,
@@ -5246,7 +4642,7 @@
 
 #define Q6_Vbf_vcvt_VsfVsf(Vu, Vv)                                             \
   __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_bf_sf)(Vu, Vv)
-#endif /* __HEXAGON_ARCH___ >= 73 */
+#endif
 
 #if __HVX_ARCH__ >= 73
 /* ==========================================================================
@@ -5258,7 +4654,7 @@
 #define Q6_Q_vcmp_gt_VbfVbf(Vu, Vv)                                            \
   __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)                          \
   ((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtbf)(Vu, Vv)), -1)
-#endif /* __HEXAGON_ARCH___ >= 73 */
+#endif
 
 #if __HVX_ARCH__ >= 73
 /* ==========================================================================
@@ -5274,7 +4670,7 @@
        __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx), -1), Vu,      \
        Vv)),                                                                   \
    -1)
-#endif /* __HEXAGON_ARCH___ >= 73 */
+#endif
 
 #if __HVX_ARCH__ >= 73
 /* ==========================================================================
@@ -5290,7 +4686,7 @@
        __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx), -1), Vu,      \
        Vv)),                                                                   \
    -1)
-#endif /* __HEXAGON_ARCH___ >= 73 */
+#endif
 
 #if __HVX_ARCH__ >= 73
 /* ==========================================================================
@@ -5306,9 +4702,9 @@
        __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx), -1), Vu,      \
        Vv)),                                                                   \
    -1)
-#endif /* __HEXAGON_ARCH___ >= 73 */
+#endif
 
-#if __HVX_ARCH__ >= 73
+#if __HVX_ARCH__ >= 73 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vd32.bf=vmax(Vu32.bf,Vv32.bf)
    C Intrinsic Prototype: HVX_Vector Q6_Vbf_vmax_VbfVbf(HVX_Vector Vu,
@@ -5317,9 +4713,9 @@
 
 #define Q6_Vbf_vmax_VbfVbf(Vu, Vv)                                             \
   __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmax_bf)(Vu, Vv)
-#endif /* __HEXAGON_ARCH___ >= 73 */
+#endif
 
-#if __HVX_ARCH__ >= 73
+#if __HVX_ARCH__ >= 73 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vd32.bf=vmin(Vu32.bf,Vv32.bf)
    C Intrinsic Prototype: HVX_Vector Q6_Vbf_vmin_VbfVbf(HVX_Vector Vu,
@@ -5328,9 +4724,9 @@
 
 #define Q6_Vbf_vmin_VbfVbf(Vu, Vv)                                             \
   __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmin_bf)(Vu, Vv)
-#endif /* __HEXAGON_ARCH___ >= 73 */
+#endif
 
-#if __HVX_ARCH__ >= 73
+#if __HVX_ARCH__ >= 73 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vdd32.sf=vmpy(Vu32.bf,Vv32.bf)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vmpy_VbfVbf(HVX_Vector Vu,
@@ -5339,9 +4735,9 @@
 
 #define Q6_Wsf_vmpy_VbfVbf(Vu, Vv)                                             \
   __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_sf_bf)(Vu, Vv)
-#endif /* __HEXAGON_ARCH___ >= 73 */
+#endif
 
-#if __HVX_ARCH__ >= 73
+#if __HVX_ARCH__ >= 73 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vxx32.sf+=vmpy(Vu32.bf,Vv32.bf)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vmpyacc_WsfVbfVbf(HVX_VectorPair
@@ -5351,9 +4747,9 @@
 
 #define Q6_Wsf_vmpyacc_WsfVbfVbf(Vxx, Vu, Vv)                                  \
   __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_sf_bf_acc)(Vxx, Vu, Vv)
-#endif /* __HEXAGON_ARCH___ >= 73 */
+#endif
 
-#if __HVX_ARCH__ >= 73
+#if __HVX_ARCH__ >= 73 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vdd32.sf=vsub(Vu32.bf,Vv32.bf)
    C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vsub_VbfVbf(HVX_Vector Vu,
@@ -5362,7 +4758,7 @@
 
 #define Q6_Wsf_vsub_VbfVbf(Vu, Vv)                                             \
   __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_sf_bf)(Vu, Vv)
-#endif /* __HEXAGON_ARCH___ >= 73 */
+#endif
 
 #if __HVX_ARCH__ >= 79
 /* ==========================================================================
@@ -5374,7 +4770,7 @@
 
 #define Q6_V_vgetqfext_VR(Vu, Rt)                                              \
   __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_get_qfext)(Vu, Rt)
-#endif /* __HEXAGON_ARCH___ >= 79 */
+#endif
 
 #if __HVX_ARCH__ >= 79
 /* ==========================================================================
@@ -5386,7 +4782,7 @@
 
 #define Q6_V_vgetqfextor_VVR(Vx, Vu, Rt)                                       \
   __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_get_qfext_oracc)(Vx, Vu, Rt)
-#endif /* __HEXAGON_ARCH___ >= 79 */
+#endif
 
 #if __HVX_ARCH__ >= 79
 /* ==========================================================================
@@ -5398,9 +4794,9 @@
 
 #define Q6_V_vsetqfext_VR(Vu, Rt)                                              \
   __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_set_qfext)(Vu, Rt)
-#endif /* __HEXAGON_ARCH___ >= 79 */
+#endif
 
-#if __HVX_ARCH__ >= 79
+#if __HVX_ARCH__ >= 79 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vd32.f8=vabs(Vu32.f8)
    C Intrinsic Prototype: HVX_Vector Q6_V_vabs_V(HVX_Vector Vu)
@@ -5409,9 +4805,9 @@
    ========================================================================== */
 
 #define Q6_V_vabs_V(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabs_f8)(Vu)
-#endif /* __HEXAGON_ARCH___ >= 79 */
+#endif
 
-#if __HVX_ARCH__ >= 79
+#if __HVX_ARCH__ >= 79 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vdd32.hf=vadd(Vu32.f8,Vv32.f8)
    C Intrinsic Prototype: HVX_VectorPair Q6_Whf_vadd_VV(HVX_Vector Vu,
@@ -5420,9 +4816,9 @@
 
 #define Q6_Whf_vadd_VV(Vu, Vv)                                                 \
   __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_hf_f8)(Vu, Vv)
-#endif /* __HEXAGON_ARCH___ >= 79 */
+#endif
 
-#if __HVX_ARCH__ >= 79
+#if __HVX_ARCH__ >= 79 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vd32.b=vcvt2(Vu32.hf,Vv32.hf)
    C Intrinsic Prototype: HVX_Vector Q6_Vb_vcvt2_VhfVhf(HVX_Vector Vu,
@@ -5431,9 +4827,9 @@
 
 #define Q6_Vb_vcvt2_VhfVhf(Vu, Vv)                                             \
   __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt2_b_hf)(Vu, Vv)
-#endif /* __HEXAGON_ARCH___ >= 79 */
+#endif
 
-#if __HVX_ARCH__ >= 79
+#if __HVX_ARCH__ >= 79 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vdd32.hf=vcvt2(Vu32.b)
    C Intrinsic Prototype: HVX_VectorPair Q6_Whf_vcvt2_Vb(HVX_Vector Vu)
@@ -5443,9 +4839,9 @@
 
 #define Q6_Whf_vcvt2_Vb(Vu)                                                    \
   __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt2_hf_b)(Vu)
-#endif /* __HEXAGON_ARCH___ >= 79 */
+#endif
 
-#if __HVX_ARCH__ >= 79
+#if __HVX_ARCH__ >= 79 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vdd32.hf=vcvt2(Vu32.ub)
    C Intrinsic Prototype: HVX_VectorPair Q6_Whf_vcvt2_Vub(HVX_Vector Vu)
@@ -5455,9 +4851,9 @@
 
 #define Q6_Whf_vcvt2_Vub(Vu)                                                   \
   __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt2_hf_ub)(Vu)
-#endif /* __HEXAGON_ARCH___ >= 79 */
+#endif
 
-#if __HVX_ARCH__ >= 79
+#if __HVX_ARCH__ >= 79 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vd32.ub=vcvt2(Vu32.hf,Vv32.hf)
    C Intrinsic Prototype: HVX_Vector Q6_Vub_vcvt2_VhfVhf(HVX_Vector Vu,
@@ -5466,9 +4862,9 @@
 
 #define Q6_Vub_vcvt2_VhfVhf(Vu, Vv)                                            \
   __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt2_ub_hf)(Vu, Vv)
-#endif /* __HEXAGON_ARCH___ >= 79 */
+#endif
 
-#if __HVX_ARCH__ >= 79
+#if __HVX_ARCH__ >= 79 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vd32.f8=vcvt(Vu32.hf,Vv32.hf)
    C Intrinsic Prototype: HVX_Vector Q6_V_vcvt_VhfVhf(HVX_Vector Vu, HVX_Vector
@@ -5477,9 +4873,9 @@
 
 #define Q6_V_vcvt_VhfVhf(Vu, Vv)                                               \
   __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_f8_hf)(Vu, Vv)
-#endif /* __HEXAGON_ARCH___ >= 79 */
+#endif
 
-#if __HVX_ARCH__ >= 79
+#if __HVX_ARCH__ >= 79 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vdd32.hf=vcvt(Vu32.f8)
    C Intrinsic Prototype: HVX_VectorPair Q6_Whf_vcvt_V(HVX_Vector Vu)
@@ -5489,9 +4885,9 @@
 
 #define Q6_Whf_vcvt_V(Vu)                                                      \
   __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_hf_f8)(Vu)
-#endif /* __HEXAGON_ARCH___ >= 79 */
+#endif
 
-#if __HVX_ARCH__ >= 79
+#if __HVX_ARCH__ >= 79 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vd32.f8=vfmax(Vu32.f8,Vv32.f8)
    C Intrinsic Prototype: HVX_Vector Q6_V_vfmax_VV(HVX_Vector Vu, HVX_Vector Vv)
@@ -5501,9 +4897,9 @@
 
 #define Q6_V_vfmax_VV(Vu, Vv)                                                  \
   __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfmax_f8)(Vu, Vv)
-#endif /* __HEXAGON_ARCH___ >= 79 */
+#endif
 
-#if __HVX_ARCH__ >= 79
+#if __HVX_ARCH__ >= 79 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vd32.f8=vfmin(Vu32.f8,Vv32.f8)
    C Intrinsic Prototype: HVX_Vector Q6_V_vfmin_VV(HVX_Vector Vu, HVX_Vector Vv)
@@ -5513,9 +4909,9 @@
 
 #define Q6_V_vfmin_VV(Vu, Vv)                                                  \
   __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfmin_f8)(Vu, Vv)
-#endif /* __HEXAGON_ARCH___ >= 79 */
+#endif
 
-#if __HVX_ARCH__ >= 79
+#if __HVX_ARCH__ >= 79 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vd32.f8=vfneg(Vu32.f8)
    C Intrinsic Prototype: HVX_Vector Q6_V_vfneg_V(HVX_Vector Vu)
@@ -5525,7 +4921,7 @@
 
 #define Q6_V_vfneg_V(Vu)                                                       \
   __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfneg_f8)(Vu)
-#endif /* __HEXAGON_ARCH___ >= 79 */
+#endif
 
 #if __HVX_ARCH__ >= 79
 /* ==========================================================================
@@ -5536,9 +4932,9 @@
 
 #define Q6_V_vmerge_VVw(Vu, Vv)                                                \
   __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmerge_qf)(Vu, Vv)
-#endif /* __HEXAGON_ARCH___ >= 79 */
+#endif
 
-#if __HVX_ARCH__ >= 79
+#if __HVX_ARCH__ >= 79 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vdd32.hf=vmpy(Vu32.f8,Vv32.f8)
    C Intrinsic Prototype: HVX_VectorPair Q6_Whf_vmpy_VV(HVX_Vector Vu,
@@ -5547,9 +4943,9 @@
 
 #define Q6_Whf_vmpy_VV(Vu, Vv)                                                 \
   __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_hf_f8)(Vu, Vv)
-#endif /* __HEXAGON_ARCH___ >= 79 */
+#endif
 
-#if __HVX_ARCH__ >= 79
+#if __HVX_ARCH__ >= 79 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vxx32.hf+=vmpy(Vu32.f8,Vv32.f8)
    C Intrinsic Prototype: HVX_VectorPair Q6_Whf_vmpyacc_WhfVV(HVX_VectorPair
@@ -5559,7 +4955,7 @@
 
 #define Q6_Whf_vmpyacc_WhfVV(Vxx, Vu, Vv)                                      \
   __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_hf_f8_acc)(Vxx, Vu, Vv)
-#endif /* __HEXAGON_ARCH___ >= 79 */
+#endif
 
 #if __HVX_ARCH__ >= 79
 /* ==========================================================================
@@ -5570,7 +4966,7 @@
 
 #define Q6_Vqf16_vmpy_VhfRhf(Vu, Rt)                                           \
   __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_rt_hf)(Vu, Rt)
-#endif /* __HEXAGON_ARCH___ >= 79 */
+#endif
 
 #if __HVX_ARCH__ >= 79
 /* ==========================================================================
@@ -5581,7 +4977,7 @@
 
 #define Q6_Vqf16_vmpy_Vqf16Rhf(Vu, Rt)                                         \
   __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_rt_qf16)(Vu, Rt)
-#endif /* __HEXAGON_ARCH___ >= 79 */
+#endif
 
 #if __HVX_ARCH__ >= 79
 /* ==========================================================================
@@ -5592,9 +4988,9 @@
 
 #define Q6_Vqf32_vmpy_VsfRsf(Vu, Rt)                                           \
   __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_rt_sf)(Vu, Rt)
-#endif /* __HEXAGON_ARCH___ >= 79 */
+#endif
 
-#if __HVX_ARCH__ >= 79
+#if __HVX_ARCH__ >= 79 && defined __HVX_IEEE_FP__
 /* ==========================================================================
    Assembly Syntax:       Vdd32.hf=vsub(Vu32.f8,Vv32.f8)
    C Intrinsic Prototype: HVX_VectorPair Q6_Whf_vsub_VV(HVX_Vector Vu,
@@ -5603,7 +4999,400 @@
 
 #define Q6_Whf_vsub_VV(Vu, Vv)                                                 \
   __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_hf_f8)(Vu, Vv)
-#endif /* __HEXAGON_ARCH___ >= 79 */
+#endif
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Vd32.qf16=vabs(Vu32.hf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vabs_Vhf(HVX_Vector Vu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vqf16_vabs_Vhf(Vu)                                                  \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabs_qf16_hf)(Vu)
+#endif
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Vd32.qf16=vabs(Vu32.qf16)
+   C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vabs_Vqf16(HVX_Vector Vu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vqf16_vabs_Vqf16(Vu)                                                \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabs_qf16_qf16)(Vu)
+#endif
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Vd32.qf32=vabs(Vu32.qf32)
+   C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vabs_Vqf32(HVX_Vector Vu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vqf32_vabs_Vqf32(Vu)                                                \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabs_qf32_qf32)(Vu)
+#endif
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Vd32.qf32=vabs(Vu32.sf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vabs_Vsf(HVX_Vector Vu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vqf32_vabs_Vsf(Vu)                                                  \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabs_qf32_sf)(Vu)
+#endif
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Vd32=valign4(Vu32,Vv32,Rt8)
+   C Intrinsic Prototype: HVX_Vector Q6_V_valign4_VVR(HVX_Vector Vu, HVX_Vector
+   Vv, Word32 Rt) Instruction Type:      CVI_VA Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_V_valign4_VVR(Vu, Vv, Rt)                                           \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_valign4)(Vu, Vv, Rt)
+#endif
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Vd32.bf=Vuu32.qf32
+   C Intrinsic Prototype: HVX_Vector Q6_Vbf_equals_Wqf32(HVX_VectorPair Vuu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vbf_equals_Wqf32(Vuu)                                               \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_bf_qf32)(Vuu)
+#endif
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Vd32.f8=Vu32.qf16
+   C Intrinsic Prototype: HVX_Vector Q6_V_equals_Vqf16(HVX_Vector Vu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_V_equals_Vqf16(Vu)                                                  \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_f8_qf16)(Vu)
+#endif
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Vd32.h=Vu32.hf:rnd
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_equals_Vhf_rnd(HVX_Vector Vu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vh_equals_Vhf_rnd(Vu)                                               \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_h_hf_rnd)(Vu)
+#endif
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.qf16=Vu32.f8
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wqf16_equals_V(HVX_Vector Vu)
+   Instruction Type:      CVI_VP_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Wqf16_equals_V(Vu)                                                  \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_qf16_f8)(Vu)
+#endif
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Vd32.qf16=Vu32.hf
+   C Intrinsic Prototype: HVX_Vector Q6_Vqf16_equals_Vhf(HVX_Vector Vu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vqf16_equals_Vhf(Vu)                                                \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_qf16_hf)(Vu)
+#endif
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Vd32.qf16=Vu32.qf16
+   C Intrinsic Prototype: HVX_Vector Q6_Vqf16_equals_Vqf16(HVX_Vector Vu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vqf16_equals_Vqf16(Vu)                                              \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_qf16_qf16)(Vu)
+#endif
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Vd32.qf32=Vu32.qf32
+   C Intrinsic Prototype: HVX_Vector Q6_Vqf32_equals_Vqf32(HVX_Vector Vu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vqf32_equals_Vqf32(Vu)                                              \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_qf32_qf32)(Vu)
+#endif
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Vd32.qf32=Vu32.sf
+   C Intrinsic Prototype: HVX_Vector Q6_Vqf32_equals_Vsf(HVX_Vector Vu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vqf32_equals_Vsf(Vu)                                                \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_qf32_sf)(Vu)
+#endif
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Qd4=vcmp.eq(Vu32.hf,Vv32.hf)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eq_VhfVhf(HVX_Vector Vu,
+   HVX_Vector Vv) Instruction Type:      CVI_VA Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_eq_VhfVhf(Vu, Vv)                                            \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)(                         \
+      (__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqhf)(Vu, Vv)), -1)
+#endif
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Qx4&=vcmp.eq(Vu32.hf,Vv32.hf)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqand_QVhfVhf(HVX_VectorPred
+   Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type:      CVI_VA Execution
+   Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_eqand_QVhfVhf(Qx, Vu, Vv)                                    \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)(                         \
+      (__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqhf_and)(                  \
+          __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx), -1), Vu,   \
+          Vv)),                                                                \
+      -1)
+#endif
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Qx4|=vcmp.eq(Vu32.hf,Vv32.hf)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqor_QVhfVhf(HVX_VectorPred
+   Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type:      CVI_VA Execution
+   Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_eqor_QVhfVhf(Qx, Vu, Vv)                                     \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)(                         \
+      (__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqhf_or)(                   \
+          __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx), -1), Vu,   \
+          Vv)),                                                                \
+      -1)
+#endif
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Qx4^=vcmp.eq(Vu32.hf,Vv32.hf)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqxacc_QVhfVhf(HVX_VectorPred
+   Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type:      CVI_VA Execution
+   Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_eqxacc_QVhfVhf(Qx, Vu, Vv)                                   \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)(                         \
+      (__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqhf_xor)(                  \
+          __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx), -1), Vu,   \
+          Vv)),                                                                \
+      -1)
+#endif
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Qd4=vcmp.eq(Vu32.sf,Vv32.sf)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eq_VsfVsf(HVX_Vector Vu,
+   HVX_Vector Vv) Instruction Type:      CVI_VA Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_eq_VsfVsf(Vu, Vv)                                            \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)(                         \
+      (__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqsf)(Vu, Vv)), -1)
+#endif
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Qx4&=vcmp.eq(Vu32.sf,Vv32.sf)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqand_QVsfVsf(HVX_VectorPred
+   Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type:      CVI_VA Execution
+   Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_eqand_QVsfVsf(Qx, Vu, Vv)                                    \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)(                         \
+      (__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqsf_and)(                  \
+          __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx), -1), Vu,   \
+          Vv)),                                                                \
+      -1)
+#endif
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Qx4|=vcmp.eq(Vu32.sf,Vv32.sf)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqor_QVsfVsf(HVX_VectorPred
+   Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type:      CVI_VA Execution
+   Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_eqor_QVsfVsf(Qx, Vu, Vv)                                     \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)(                         \
+      (__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqsf_or)(                   \
+          __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx), -1), Vu,   \
+          Vv)),                                                                \
+      -1)
+#endif
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Qx4^=vcmp.eq(Vu32.sf,Vv32.sf)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqxacc_QVsfVsf(HVX_VectorPred
+   Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type:      CVI_VA Execution
+   Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_eqxacc_QVsfVsf(Qx, Vu, Vv)                                   \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)(                         \
+      (__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqsf_xor)(                  \
+          __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx), -1), Vu,   \
+          Vv)),                                                                \
+      -1)
+#endif
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=vilog2(Vu32.hf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vilog2_Vhf(HVX_Vector Vu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vw_vilog2_Vhf(Vu)                                                   \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vilog2_hf)(Vu)
+#endif
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=vilog2(Vu32.qf16)
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vilog2_Vqf16(HVX_Vector Vu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vw_vilog2_Vqf16(Vu)                                                 \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vilog2_qf16)(Vu)
+#endif
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=vilog2(Vu32.qf32)
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vilog2_Vqf32(HVX_Vector Vu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vw_vilog2_Vqf32(Vu)                                                 \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vilog2_qf32)(Vu)
+#endif
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=vilog2(Vu32.sf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vilog2_Vsf(HVX_Vector Vu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vw_vilog2_Vsf(Vu)                                                   \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vilog2_sf)(Vu)
+#endif
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Vd32.qf16=vneg(Vu32.hf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vneg_Vhf(HVX_Vector Vu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vqf16_vneg_Vhf(Vu)                                                  \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vneg_qf16_hf)(Vu)
+#endif
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Vd32.qf16=vneg(Vu32.qf16)
+   C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vneg_Vqf16(HVX_Vector Vu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vqf16_vneg_Vqf16(Vu)                                                \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vneg_qf16_qf16)(Vu)
+#endif
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Vd32.qf32=vneg(Vu32.qf32)
+   C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vneg_Vqf32(HVX_Vector Vu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vqf32_vneg_Vqf32(Vu)                                                \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vneg_qf32_qf32)(Vu)
+#endif
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Vd32.qf32=vneg(Vu32.sf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vneg_Vsf(HVX_Vector Vu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vqf32_vneg_Vsf(Vu)                                                  \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vneg_qf32_sf)(Vu)
+#endif
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Vd32.qf16=vsub(Vu32.hf,Vv32.qf16)
+   C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vsub_VhfVqf16(HVX_Vector Vu,
+   HVX_Vector Vv) Instruction Type:      CVI_VS Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vqf16_vsub_VhfVqf16(Vu, Vv)                                         \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_hf_mix)(Vu, Vv)
+#endif
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Vd32.qf32=vsub(Vu32.sf,Vv32.qf32)
+   C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vsub_VsfVqf32(HVX_Vector Vu,
+   HVX_Vector Vv) Instruction Type:      CVI_VS Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vqf32_vsub_VsfVqf32(Vu, Vv)                                         \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_sf_mix)(Vu, Vv)
+#endif
 
 #endif /* __HVX__ */
 
diff --git a/lib/include/immintrin.h b/lib/include/immintrin.h
index 35f012cc70..19064a4ff5 100644
--- a/lib/include/immintrin.h
+++ b/lib/include/immintrin.h
@@ -475,24 +475,12 @@ _storebe_i64(void * __P, long long __D) {
 
 #include <amxfp8intrin.h>
 
-#include <amxtransposeintrin.h>
-
 #include <amxmovrsintrin.h>
 
-#include <amxmovrstransposeintrin.h>
-
 #include <amxavx512intrin.h>
 
 #include <amxtf32intrin.h>
 
-#include <amxtf32transposeintrin.h>
-
-#include <amxbf16transposeintrin.h>
-
-#include <amxfp16transposeintrin.h>
-
-#include <amxcomplextransposeintrin.h>
-
 #include <avx512vp2intersectintrin.h>
 
 #include <avx512vlvp2intersectintrin.h>
diff --git a/lib/include/intrin.h b/lib/include/intrin.h
index 588c283cbd..210ed0c1f7 100644
--- a/lib/include/intrin.h
+++ b/lib/include/intrin.h
@@ -30,6 +30,10 @@
 #include <arm64intr.h>
 #endif
 
+#if defined(__ARM_ACLE)
+#include <arm_acle.h>
+#endif
+
 /* For the definition of jmp_buf. */
 #if __STDC_HOSTED__
 #include <setjmp.h>
diff --git a/lib/include/lasxintrin.h b/lib/include/lasxintrin.h
index 85020d8282..83cc4288a9 100644
--- a/lib/include/lasxintrin.h
+++ b/lib/include/lasxintrin.h
@@ -10,6 +10,8 @@
 #ifndef _LOONGSON_ASXINTRIN_H
 #define _LOONGSON_ASXINTRIN_H 1
 
+#include <lsxintrin.h>
+
 #if defined(__loongarch_asx)
 
 typedef signed char v32i8 __attribute__((vector_size(32), aligned(32)));
@@ -3882,5 +3884,116 @@ extern __inline
 
 #define __lasx_xvrepli_w(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_w((_1)))
 
+#if defined(__loongarch_asx_sx_conv)
+
+extern __inline
+    __attribute__((__gnu_inline__, __always_inline__,
+                   __artificial__)) __m256 __lasx_cast_128_s(__m128 _1) {
+  return (__m256)__builtin_lasx_cast_128_s((v4f32)_1);
+}
+
+extern __inline
+    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
+    __lasx_cast_128_d(__m128d _1) {
+  return (__m256d)__builtin_lasx_cast_128_d((v2f64)_1);
+}
+
+extern __inline
+    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+    __lasx_cast_128(__m128i _1) {
+  return (__m256i)__builtin_lasx_cast_128((v2i64)_1);
+}
+
+extern __inline
+    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
+    __lasx_concat_128_s(__m128 _1, __m128 _2) {
+  return (__m256)__builtin_lasx_concat_128_s((v4f32)_1, (v4f32)_2);
+}
+
+extern __inline
+    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
+    __lasx_concat_128_d(__m128d _1, __m128d _2) {
+  return (__m256d)__builtin_lasx_concat_128_d((v2f64)_1, (v2f64)_2);
+}
+
+extern __inline
+    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+    __lasx_concat_128(__m128i _1, __m128i _2) {
+  return (__m256i)__builtin_lasx_concat_128((v2i64)_1, (v2i64)_2);
+}
+
+extern __inline
+    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
+    __lasx_extract_128_lo_s(__m256 _1) {
+  return (__m128)__builtin_lasx_extract_128_lo_s((v8f32)_1);
+}
+
+extern __inline
+    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
+    __lasx_extract_128_lo_d(__m256d _1) {
+  return (__m128d)__builtin_lasx_extract_128_lo_d((v4f64)_1);
+}
+
+extern __inline
+    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+    __lasx_extract_128_lo(__m256i _1) {
+  return (__m128i)__builtin_lasx_extract_128_lo((v4i64)_1);
+}
+
+extern __inline
+    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
+    __lasx_extract_128_hi_s(__m256 _1) {
+  return (__m128)__builtin_lasx_extract_128_hi_s((v8f32)_1);
+}
+
+extern __inline
+    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
+    __lasx_extract_128_hi_d(__m256d _1) {
+  return (__m128d)__builtin_lasx_extract_128_hi_d((v4f64)_1);
+}
+
+extern __inline
+    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
+    __lasx_extract_128_hi(__m256i _1) {
+  return (__m128i)__builtin_lasx_extract_128_hi((v4i64)_1);
+}
+
+extern __inline
+    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
+    __lasx_insert_128_lo_s(__m256 _1, __m128 _2) {
+  return (__m256)__builtin_lasx_insert_128_lo_s((v8f32)_1, (v4f32)_2);
+}
+
+extern __inline
+    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
+    __lasx_insert_128_lo_d(__m256d _1, __m128d _2) {
+  return (__m256d)__builtin_lasx_insert_128_lo_d((v4f64)_1, (v2f64)_2);
+}
+
+extern __inline
+    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+    __lasx_insert_128_lo(__m256i _1, __m128i _2) {
+  return (__m256i)__builtin_lasx_insert_128_lo((v4i64)_1, (v2i64)_2);
+}
+
+extern __inline
+    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
+    __lasx_insert_128_hi_s(__m256 _1, __m128 _2) {
+  return (__m256)__builtin_lasx_insert_128_hi_s((v8f32)_1, (v4f32)_2);
+}
+
+extern __inline
+    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
+    __lasx_insert_128_hi_d(__m256d _1, __m128d _2) {
+  return (__m256d)__builtin_lasx_insert_128_hi_d((v4f64)_1, (v2f64)_2);
+}
+
+extern __inline
+    __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
+    __lasx_insert_128_hi(__m256i _1, __m128i _2) {
+  return (__m256i)__builtin_lasx_insert_128_hi((v4i64)_1, (v2i64)_2);
+}
+
+#endif /* defined(__loongarch_asx_sx_conv).  */
 #endif /* defined(__loongarch_asx).  */
 #endif /* _LOONGSON_ASXINTRIN_H.  */
diff --git a/lib/include/llvm_libc_wrappers/assert.h b/lib/include/llvm_libc_wrappers/assert.h
index 610ed96a45..7eadb2c354 100644
--- a/lib/include/llvm_libc_wrappers/assert.h
+++ b/lib/include/llvm_libc_wrappers/assert.h
@@ -19,13 +19,11 @@
 
 #if defined(__HIP__) || defined(__CUDA__)
 #define __LIBC_ATTRS __attribute__((device))
+#else
+#define __LIBC_ATTRS
 #endif
 
-#pragma omp begin declare target
-
-#include <llvm-libc-decls/assert.h>
-
-#pragma omp end declare target
+// TODO: Define these for CUDA / HIP.
 
 #undef __LIBC_ATTRS
 
diff --git a/lib/include/llvm_libc_wrappers/ctype.h b/lib/include/llvm_libc_wrappers/ctype.h
index 960cf43302..79b0c1e9be 100644
--- a/lib/include/llvm_libc_wrappers/ctype.h
+++ b/lib/include/llvm_libc_wrappers/ctype.h
@@ -13,128 +13,16 @@
 #error "This file is for GPU offloading compilation only"
 #endif
 
-// The GNU headers like to define 'toupper' and 'tolower' redundantly. This is
-// necessary to prevent it from doing that and remapping our implementation.
-#if (defined(__NVPTX__) || defined(__AMDGPU__)) && defined(__GLIBC__)
-#pragma push_macro("__USE_EXTERN_INLINES")
-#undef __USE_EXTERN_INLINES
-#endif
-
 #include_next <ctype.h>
 
-#if (defined(__NVPTX__) || defined(__AMDGPU__)) && defined(__GLIBC__)
-#pragma pop_macro("__USE_EXTERN_INLINES")
-#endif
-
-#if __has_include(<llvm-libc-decls/ctype.h>)
-
 #if defined(__HIP__) || defined(__CUDA__)
 #define __LIBC_ATTRS __attribute__((device))
+#else
+#define __LIBC_ATTRS
 #endif
 
-// The GNU headers like to provide these as macros, we need to undefine them so
-// they do not conflict with the following definitions for the GPU.
-
-#pragma push_macro("isalnum")
-#pragma push_macro("isalpha")
-#pragma push_macro("isascii")
-#pragma push_macro("isblank")
-#pragma push_macro("iscntrl")
-#pragma push_macro("isdigit")
-#pragma push_macro("isgraph")
-#pragma push_macro("islower")
-#pragma push_macro("isprint")
-#pragma push_macro("ispunct")
-#pragma push_macro("isspace")
-#pragma push_macro("isupper")
-#pragma push_macro("isxdigit")
-#pragma push_macro("toascii")
-#pragma push_macro("tolower")
-#pragma push_macro("toupper")
-#pragma push_macro("isalnum_l")
-#pragma push_macro("isalpha_l")
-#pragma push_macro("isascii_l")
-#pragma push_macro("isblank_l")
-#pragma push_macro("iscntrl_l")
-#pragma push_macro("isdigit_l")
-#pragma push_macro("isgraph_l")
-#pragma push_macro("islower_l")
-#pragma push_macro("isprint_l")
-#pragma push_macro("ispunct_l")
-#pragma push_macro("isspace_l")
-#pragma push_macro("isupper_l")
-#pragma push_macro("isxdigit_l")
-
-#undef isalnum
-#undef isalpha
-#undef isascii
-#undef iscntrl
-#undef isdigit
-#undef islower
-#undef isgraph
-#undef isprint
-#undef ispunct
-#undef isspace
-#undef isupper
-#undef isblank
-#undef isxdigit
-#undef toascii
-#undef tolower
-#undef toupper
-#undef isalnum_l
-#undef isalpha_l
-#undef iscntrl_l
-#undef isdigit_l
-#undef islower_l
-#undef isgraph_l
-#undef isprint_l
-#undef ispunct_l
-#undef isspace_l
-#undef isupper_l
-#undef isblank_l
-#undef isxdigit_l
-
-#pragma omp begin declare target
-
-#include <llvm-libc-decls/ctype.h>
-
-#pragma omp end declare target
-
-// Restore the original macros when compiling on the host.
-#if !defined(__NVPTX__) && !defined(__AMDGPU__)
-#pragma pop_macro("isalnum")
-#pragma pop_macro("isalpha")
-#pragma pop_macro("isascii")
-#pragma pop_macro("isblank")
-#pragma pop_macro("iscntrl")
-#pragma pop_macro("isdigit")
-#pragma pop_macro("isgraph")
-#pragma pop_macro("islower")
-#pragma pop_macro("isprint")
-#pragma pop_macro("ispunct")
-#pragma pop_macro("isspace")
-#pragma pop_macro("isupper")
-#pragma pop_macro("isxdigit")
-#pragma pop_macro("toascii")
-#pragma pop_macro("tolower")
-#pragma pop_macro("toupper")
-#pragma pop_macro("isalnum_l")
-#pragma pop_macro("isalpha_l")
-#pragma pop_macro("isascii_l")
-#pragma pop_macro("isblank_l")
-#pragma pop_macro("iscntrl_l")
-#pragma pop_macro("isdigit_l")
-#pragma pop_macro("isgraph_l")
-#pragma pop_macro("islower_l")
-#pragma pop_macro("isprint_l")
-#pragma pop_macro("ispunct_l")
-#pragma pop_macro("isspace_l")
-#pragma pop_macro("isupper_l")
-#pragma pop_macro("isxdigit_l")
-#endif
+// TODO: Define these for CUDA / HIP.
 
 #undef __LIBC_ATTRS
 
-#endif
-
 #endif // __CLANG_LLVM_LIBC_WRAPPERS_CTYPE_H__
diff --git a/lib/include/llvm_libc_wrappers/inttypes.h b/lib/include/llvm_libc_wrappers/inttypes.h
index 415f1e4b7b..2261389824 100644
--- a/lib/include/llvm_libc_wrappers/inttypes.h
+++ b/lib/include/llvm_libc_wrappers/inttypes.h
@@ -19,13 +19,11 @@
 
 #if defined(__HIP__) || defined(__CUDA__)
 #define __LIBC_ATTRS __attribute__((device))
+#else
+#define __LIBC_ATTRS
 #endif
 
-#pragma omp begin declare target
-
-#include <llvm-libc-decls/inttypes.h>
-
-#pragma omp end declare target
+// TODO: Define these for CUDA / HIP.
 
 #undef __LIBC_ATTRS
 
diff --git a/lib/include/llvm_libc_wrappers/stdio.h b/lib/include/llvm_libc_wrappers/stdio.h
index 950f91b376..0c3e44823d 100644
--- a/lib/include/llvm_libc_wrappers/stdio.h
+++ b/lib/include/llvm_libc_wrappers/stdio.h
@@ -6,45 +6,19 @@
 //
 //===----------------------------------------------------------------------===//
 
+#ifndef __CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__
+#define __CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__
+
 #if !defined(_OPENMP) && !defined(__HIP__) && !defined(__CUDA__)
 #error "This file is for GPU offloading compilation only"
 #endif
 
 #include_next <stdio.h>
 
-// In some old versions of glibc, other standard headers sometimes define
-// special macros (e.g., __need_FILE) before including stdio.h to cause stdio.h
-// to produce special definitions.  Future includes of stdio.h when those
-// special macros are undefined are expected to produce the normal definitions
-// from stdio.h.
-//
-// We do not apply our include guard (__CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__)
-// unconditionally to the above include_next.  Otherwise, after an occurrence of
-// the first glibc stdio.h use case described above, the include_next would be
-// skipped for remaining includes of stdio.h, leaving required symbols
-// undefined.
-//
-// We make the following assumptions to handle all use cases:
-//
-// 1. If the above include_next produces special glibc definitions, then (a) it
-//    does not produce the normal definitions that we must intercept below, (b)
-//    the current file was included from a glibc header that already defined
-//    __GLIBC__ (usually by including glibc's <features.h>), and (c) the above
-//    include_next does not define _STDIO_H.  In that case, we skip the rest of
-//    the current file and don't guard against future includes.
-// 2. If the above include_next produces the normal stdio.h definitions, then
-//    either (a) __GLIBC__ is not defined because C headers are from some other
-//    libc implementation or (b) the above include_next defines _STDIO_H to
-//    prevent the above include_next from having any effect in the future.
-#if !defined(__GLIBC__) || defined(_STDIO_H)
-
-#ifndef __CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__
-#define __CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__
-
-#if __has_include(<llvm-libc-decls/stdio.h>)
-
 #if defined(__HIP__) || defined(__CUDA__)
 #define __LIBC_ATTRS __attribute__((device))
+#else
+#define __LIBC_ATTRS
 #endif
 
 // Some headers provide these as macros. Temporarily undefine them so they do
@@ -60,21 +34,19 @@
 
 #pragma omp begin declare target
 
-#include <llvm-libc-decls/stdio.h>
+__LIBC_ATTRS extern FILE *stderr;
+__LIBC_ATTRS extern FILE *stdin;
+__LIBC_ATTRS extern FILE *stdout;
 
 #pragma omp end declare target
 
-#undef __LIBC_ATTRS
-
 // Restore the original macros when compiling on the host.
 #if !defined(__NVPTX__) && !defined(__AMDGPU__)
-#pragma pop_macro("stdout")
 #pragma pop_macro("stderr")
 #pragma pop_macro("stdin")
+#pragma pop_macro("stdout")
 #endif
 
-#endif
+#undef __LIBC_ATTRS
 
 #endif // __CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__
-
-#endif
diff --git a/lib/include/llvm_libc_wrappers/stdlib.h b/lib/include/llvm_libc_wrappers/stdlib.h
index 1da22abd0b..7af5e2ebe0 100644
--- a/lib/include/llvm_libc_wrappers/stdlib.h
+++ b/lib/include/llvm_libc_wrappers/stdlib.h
@@ -15,39 +15,18 @@
 
 #include_next <stdlib.h>
 
-#if __has_include(<llvm-libc-decls/stdlib.h>)
-
 #if defined(__HIP__) || defined(__CUDA__)
 #define __LIBC_ATTRS __attribute__((device))
+#else
+#define __LIBC_ATTRS
 #endif
 
 #pragma omp begin declare target
 
-// The LLVM C library uses these named types so we forward declare them.
-typedef void (*__atexithandler_t)(void);
-typedef int (*__search_compare_t)(const void *, const void *);
-typedef int (*__qsortcompare_t)(const void *, const void *);
-typedef int (*__qsortrcompare_t)(const void *, const void *, void *);
-
-// Enforce ABI compatibility with the structs used by the LLVM C library.
-_Static_assert(__builtin_offsetof(div_t, quot) == 0, "ABI mismatch!");
-_Static_assert(__builtin_offsetof(ldiv_t, quot) == 0, "ABI mismatch!");
-_Static_assert(__builtin_offsetof(lldiv_t, quot) == 0, "ABI mismatch!");
-
-#if defined(__GLIBC__) && __cplusplus >= 201703L
-#define at_quick_exit atexit
-#endif
-
-#include <llvm-libc-decls/stdlib.h>
-
-#if defined(__GLIBC__) && __cplusplus >= 201703L
-#undef at_quick_exit
-#endif
+// TODO: Define these for CUDA / HIP.
 
 #pragma omp end declare target
 
 #undef __LIBC_ATTRS
 
-#endif
-
 #endif // __CLANG_LLVM_LIBC_WRAPPERS_STDLIB_H__
diff --git a/lib/include/llvm_libc_wrappers/string.h b/lib/include/llvm_libc_wrappers/string.h
index 0ea49cb137..766a58f5b6 100644
--- a/lib/include/llvm_libc_wrappers/string.h
+++ b/lib/include/llvm_libc_wrappers/string.h
@@ -15,82 +15,14 @@
 
 #include_next <string.h>
 
-#if __has_include(<llvm-libc-decls/string.h>)
-
 #if defined(__HIP__) || defined(__CUDA__)
 #define __LIBC_ATTRS __attribute__((device))
-#endif
-
-#pragma omp begin declare target
-
-// The GNU headers provide C++ standard compliant headers when in C++ mode and
-// the LLVM libc does not. We need to manually provide the definitions using the
-// same prototypes.
-#if defined(__cplusplus) && defined(__GLIBC__) &&                              \
-    defined(__CORRECT_ISO_CPP_STRING_H_PROTO)
-
-#ifndef __LIBC_ATTRS
+#else
 #define __LIBC_ATTRS
 #endif
 
-extern "C" {
-void *memccpy(void *__restrict, const void *__restrict, int,
-              size_t) __LIBC_ATTRS;
-int memcmp(const void *, const void *, size_t) __LIBC_ATTRS;
-void *memcpy(void *__restrict, const void *__restrict, size_t) __LIBC_ATTRS;
-void *memmem(const void *, size_t, const void *, size_t) __LIBC_ATTRS;
-void *memmove(void *, const void *, size_t) __LIBC_ATTRS;
-void *mempcpy(void *__restrict, const void *__restrict, size_t) __LIBC_ATTRS;
-void *memset(void *, int, size_t) __LIBC_ATTRS;
-char *stpcpy(char *__restrict, const char *__restrict) __LIBC_ATTRS;
-char *stpncpy(char *__restrict, const char *__restrict, size_t) __LIBC_ATTRS;
-char *strcat(char *__restrict, const char *__restrict) __LIBC_ATTRS;
-int strcmp(const char *, const char *) __LIBC_ATTRS;
-int strcoll(const char *, const char *) __LIBC_ATTRS;
-char *strcpy(char *__restrict, const char *__restrict) __LIBC_ATTRS;
-size_t strcspn(const char *, const char *) __LIBC_ATTRS;
-char *strdup(const char *) __LIBC_ATTRS;
-size_t strlen(const char *) __LIBC_ATTRS;
-char *strncat(char *__restrict, const char *__restrict, size_t) __LIBC_ATTRS;
-int strncmp(const char *, const char *, size_t) __LIBC_ATTRS;
-char *strncpy(char *__restrict, const char *__restrict, size_t) __LIBC_ATTRS;
-char *strndup(const char *, size_t) __LIBC_ATTRS;
-size_t strnlen(const char *, size_t) __LIBC_ATTRS;
-size_t strspn(const char *, const char *) __LIBC_ATTRS;
-char *strtok(char *__restrict, const char *__restrict) __LIBC_ATTRS;
-char *strtok_r(char *__restrict, const char *__restrict,
-               char **__restrict) __LIBC_ATTRS;
-size_t strxfrm(char *__restrict, const char *__restrict, size_t) __LIBC_ATTRS;
-}
-
-extern "C++" {
-char *strstr(char *, const char *) noexcept __LIBC_ATTRS;
-const char *strstr(const char *, const char *) noexcept __LIBC_ATTRS;
-char *strpbrk(char *, const char *) noexcept __LIBC_ATTRS;
-const char *strpbrk(const char *, const char *) noexcept __LIBC_ATTRS;
-char *strrchr(char *, int) noexcept __LIBC_ATTRS;
-const char *strrchr(const char *, int) noexcept __LIBC_ATTRS;
-char *strchr(char *, int) noexcept __LIBC_ATTRS;
-const char *strchr(const char *, int) noexcept __LIBC_ATTRS;
-char *strchrnul(char *, int) noexcept __LIBC_ATTRS;
-const char *strchrnul(const char *, int) noexcept __LIBC_ATTRS;
-char *strcasestr(char *, const char *) noexcept __LIBC_ATTRS;
-const char *strcasestr(const char *, const char *) noexcept __LIBC_ATTRS;
-void *memrchr(void *__s, int __c, size_t __n) noexcept __LIBC_ATTRS;
-const void *memrchr(const void *__s, int __c, size_t __n) noexcept __LIBC_ATTRS;
-void *memchr(void *__s, int __c, size_t __n) noexcept __LIBC_ATTRS;
-const void *memchr(const void *__s, int __c, size_t __n) noexcept __LIBC_ATTRS;
-}
-
-#else
-#include <llvm-libc-decls/string.h>
-
-#endif
-
-#pragma omp end declare target
+// TODO: Define these for CUDA / HIP.
 
 #undef __LIBC_ATTRS
 
-#endif
-
 #endif // __CLANG_LLVM_LIBC_WRAPPERS_STRING_H__
diff --git a/lib/include/llvm_libc_wrappers/time.h b/lib/include/llvm_libc_wrappers/time.h
index 9d1340c4eb..d38eea327a 100644
--- a/lib/include/llvm_libc_wrappers/time.h
+++ b/lib/include/llvm_libc_wrappers/time.h
@@ -15,20 +15,14 @@
 
 #include_next <time.h>
 
-#if __has_include(<llvm-libc-decls/time.h>)
-
 #if defined(__HIP__) || defined(__CUDA__)
 #define __LIBC_ATTRS __attribute__((device))
+#else
+#define __LIBC_ATTRS
 #endif
 
-#pragma omp begin declare target
+// TODO: Define these for CUDA / HIP.
 
-_Static_assert(sizeof(clock_t) == sizeof(long), "ABI mismatch!");
-
-#include <llvm-libc-decls/time.h>
-
-#pragma omp end declare target
-
-#endif
+#undef __LIBC_ATTRS
 
 #endif // __CLANG_LLVM_LIBC_WRAPPERS_TIME_H__
diff --git a/lib/include/mmintrin.h b/lib/include/mmintrin.h
index dc0fa5c523..2cf46455d7 100644
--- a/lib/include/mmintrin.h
+++ b/lib/include/mmintrin.h
@@ -39,27 +39,21 @@ typedef short __v8hi __attribute__((__vector_size__(16)));
 typedef char __v16qi __attribute__((__vector_size__(16)));
 
 /* Define the default attributes for the functions in this file. */
-#if defined(__EVEX512__) && !defined(__AVX10_1_512__)
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
 #define __DEFAULT_FN_ATTRS_SSE2                                                \
-  __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("sse2,no-evex512"), __min_vector_width__(128)))
+  __attribute__((__always_inline__, __nodebug__, __target__("sse2"),           \
+                 __min_vector_width__(128))) constexpr
 #else
 #define __DEFAULT_FN_ATTRS_SSE2                                                \
   __attribute__((__always_inline__, __nodebug__, __target__("sse2"),           \
                  __min_vector_width__(128)))
 #endif
 
-#if defined(__cplusplus) && (__cplusplus >= 201103L)
-#define __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR __DEFAULT_FN_ATTRS_SSE2 constexpr
-#else
-#define __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR __DEFAULT_FN_ATTRS_SSE2
-#endif
-
 #define __trunc64(x)                                                           \
   (__m64) __builtin_shufflevector((__v2di)(x), __extension__(__v2di){}, 0)
-#define __anyext128(x)                                                         \
+#define __zext128(x)                                                           \
   (__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0,   \
-                                    1, -1, -1)
+                                    1, 2, 3)
 
 /// Clears the MMX state by setting the state of the x87 stack registers
 ///    to empty.
@@ -68,9 +62,9 @@ typedef char __v16qi __attribute__((__vector_size__(16)));
 ///
 /// This intrinsic corresponds to the <c> EMMS </c> instruction.
 ///
-static __inline__ void __attribute__((__always_inline__, __nodebug__,
-                                      __target__("mmx,no-evex512")))
-_mm_empty(void) {
+static __inline__ void
+    __attribute__((__always_inline__, __nodebug__, __target__("mmx")))
+    _mm_empty(void) {
   __builtin_ia32_emms();
 }
 
@@ -85,10 +79,8 @@ _mm_empty(void) {
 ///    A 32-bit integer value.
 /// \returns A 64-bit integer vector. The lower 32 bits contain the value of the
 ///    parameter. The upper 32 bits are set to 0.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_cvtsi32_si64(int __i)
-{
-    return __extension__ (__m64)(__v2si){__i, 0};
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cvtsi32_si64(int __i) {
+  return __extension__(__m64)(__v2si){__i, 0};
 }
 
 /// Returns the lower 32 bits of a 64-bit integer vector as a 32-bit
@@ -102,10 +94,8 @@ _mm_cvtsi32_si64(int __i)
 ///    A 64-bit integer vector.
 /// \returns A 32-bit signed integer value containing the lower 32 bits of the
 ///    parameter.
-static __inline__ int __DEFAULT_FN_ATTRS_SSE2
-_mm_cvtsi64_si32(__m64 __m)
-{
-    return ((__v2si)__m)[0];
+static __inline__ int __DEFAULT_FN_ATTRS_SSE2 _mm_cvtsi64_si32(__m64 __m) {
+  return ((__v2si)__m)[0];
 }
 
 /// Casts a 64-bit signed integer value into a 64-bit integer vector.
@@ -118,10 +108,8 @@ _mm_cvtsi64_si32(__m64 __m)
 ///    A 64-bit signed integer.
 /// \returns A 64-bit integer vector containing the same bitwise pattern as the
 ///    parameter.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_cvtsi64_m64(long long __i)
-{
-    return (__m64)__i;
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cvtsi64_m64(long long __i) {
+  return __extension__(__m64)(__v1di){__i};
 }
 
 /// Casts a 64-bit integer vector into a 64-bit signed integer value.
@@ -134,10 +122,8 @@ _mm_cvtsi64_m64(long long __i)
 ///    A 64-bit integer vector.
 /// \returns A 64-bit signed integer containing the same bitwise pattern as the
 ///    parameter.
-static __inline__ long long __DEFAULT_FN_ATTRS_SSE2
-_mm_cvtm64_si64(__m64 __m)
-{
-    return (long long)__m;
+static __inline__ long long __DEFAULT_FN_ATTRS_SSE2 _mm_cvtm64_si64(__m64 __m) {
+  return ((__v1di)__m)[0];
 }
 
 /// Converts, with saturation, 16-bit signed integers from both 64-bit integer
@@ -159,11 +145,10 @@ _mm_cvtm64_si64(__m64 __m)
 ///    written to the upper 32 bits of the result.
 /// \returns A 64-bit integer vector of [8 x i8] containing the converted
 ///    values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_packs_pi16(__m64 __m1, __m64 __m2)
-{
-    return __trunc64(__builtin_ia32_packsswb128(
-        (__v8hi)__builtin_shufflevector(__m1, __m2, 0, 1), (__v8hi){}));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_packs_pi16(__m64 __m1,
+                                                               __m64 __m2) {
+  return __trunc64(__builtin_ia32_packsswb128(
+      (__v8hi)__builtin_shufflevector(__m1, __m2, 0, 1), (__v8hi){}));
 }
 
 /// Converts, with saturation, 32-bit signed integers from both 64-bit integer
@@ -185,11 +170,10 @@ _mm_packs_pi16(__m64 __m1, __m64 __m2)
 ///    written to the upper 32 bits of the result.
 /// \returns A 64-bit integer vector of [4 x i16] containing the converted
 ///    values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_packs_pi32(__m64 __m1, __m64 __m2)
-{
-    return __trunc64(__builtin_ia32_packssdw128(
-        (__v4si)__builtin_shufflevector(__m1, __m2, 0, 1), (__v4si){}));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_packs_pi32(__m64 __m1,
+                                                               __m64 __m2) {
+  return __trunc64(__builtin_ia32_packssdw128(
+      (__v4si)__builtin_shufflevector(__m1, __m2, 0, 1), (__v4si){}));
 }
 
 /// Converts, with saturation, 16-bit signed integers from both 64-bit integer
@@ -211,11 +195,10 @@ _mm_packs_pi32(__m64 __m1, __m64 __m2)
 ///    written to the upper 32 bits of the result.
 /// \returns A 64-bit integer vector of [8 x i8] containing the converted
 ///    values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_packs_pu16(__m64 __m1, __m64 __m2)
-{
-    return __trunc64(__builtin_ia32_packuswb128(
-        (__v8hi)__builtin_shufflevector(__m1, __m2, 0, 1), (__v8hi){}));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_packs_pu16(__m64 __m1,
+                                                               __m64 __m2) {
+  return __trunc64(__builtin_ia32_packuswb128(
+      (__v8hi)__builtin_shufflevector(__m1, __m2, 0, 1), (__v8hi){}));
 }
 
 /// Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8]
@@ -239,11 +222,10 @@ _mm_packs_pu16(__m64 __m1, __m64 __m2)
 ///    Bits [63:56] are written to bits [63:56] of the result.
 /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
 ///    values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_unpackhi_pi8(__m64 __m1, __m64 __m2)
-{
-    return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2,
-                                          4, 12, 5, 13, 6, 14, 7, 15);
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_unpackhi_pi8(__m64 __m1,
+                                                                 __m64 __m2) {
+  return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2, 4, 12, 5,
+                                        13, 6, 14, 7, 15);
 }
 
 /// Unpacks the upper 32 bits from two 64-bit integer vectors of
@@ -263,11 +245,9 @@ _mm_unpackhi_pi8(__m64 __m1, __m64 __m2)
 ///    Bits [63:48] are written to bits [63:48] of the result.
 /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
 ///    values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_unpackhi_pi16(__m64 __m1, __m64 __m2)
-{
-    return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2,
-                                          2, 6, 3, 7);
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_unpackhi_pi16(__m64 __m1,
+                                                                  __m64 __m2) {
+  return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2, 2, 6, 3, 7);
 }
 
 /// Unpacks the upper 32 bits from two 64-bit integer vectors of
@@ -285,10 +265,9 @@ _mm_unpackhi_pi16(__m64 __m1, __m64 __m2)
 ///    the upper 32 bits of the result.
 /// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
 ///    values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_unpackhi_pi32(__m64 __m1, __m64 __m2)
-{
-    return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 1, 3);
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_unpackhi_pi32(__m64 __m1,
+                                                                  __m64 __m2) {
+  return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 1, 3);
 }
 
 /// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8]
@@ -312,11 +291,10 @@ _mm_unpackhi_pi32(__m64 __m1, __m64 __m2)
 ///    Bits [31:24] are written to bits [63:56] of the result.
 /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
 ///    values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
-{
-    return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2,
-                                          0, 8, 1, 9, 2, 10, 3, 11);
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_unpacklo_pi8(__m64 __m1,
+                                                                 __m64 __m2) {
+  return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2, 0, 8, 1, 9,
+                                        2, 10, 3, 11);
 }
 
 /// Unpacks the lower 32 bits from two 64-bit integer vectors of
@@ -336,11 +314,9 @@ _mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
 ///    Bits [31:16] are written to bits [63:48] of the result.
 /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
 ///    values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_unpacklo_pi16(__m64 __m1, __m64 __m2)
-{
-    return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2,
-                                          0, 4, 1, 5);
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_unpacklo_pi16(__m64 __m1,
+                                                                  __m64 __m2) {
+  return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2, 0, 4, 1, 5);
 }
 
 /// Unpacks the lower 32 bits from two 64-bit integer vectors of
@@ -358,10 +334,9 @@ _mm_unpacklo_pi16(__m64 __m1, __m64 __m2)
 ///    the upper 32 bits of the result.
 /// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
 ///    values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_unpacklo_pi32(__m64 __m1, __m64 __m2)
-{
-    return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 0, 2);
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_unpacklo_pi32(__m64 __m1,
+                                                                  __m64 __m2) {
+  return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 0, 2);
 }
 
 /// Adds each 8-bit integer element of the first 64-bit integer vector
@@ -379,10 +354,9 @@ _mm_unpacklo_pi32(__m64 __m1, __m64 __m2)
 ///    A 64-bit integer vector of [8 x i8].
 /// \returns A 64-bit integer vector of [8 x i8] containing the sums of both
 ///    parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_add_pi8(__m64 __m1, __m64 __m2)
-{
-    return (__m64)(((__v8qu)__m1) + ((__v8qu)__m2));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_add_pi8(__m64 __m1,
+                                                            __m64 __m2) {
+  return (__m64)(((__v8qu)__m1) + ((__v8qu)__m2));
 }
 
 /// Adds each 16-bit integer element of the first 64-bit integer vector
@@ -400,10 +374,9 @@ _mm_add_pi8(__m64 __m1, __m64 __m2)
 ///    A 64-bit integer vector of [4 x i16].
 /// \returns A 64-bit integer vector of [4 x i16] containing the sums of both
 ///    parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_add_pi16(__m64 __m1, __m64 __m2)
-{
-    return (__m64)(((__v4hu)__m1) + ((__v4hu)__m2));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_add_pi16(__m64 __m1,
+                                                             __m64 __m2) {
+  return (__m64)(((__v4hu)__m1) + ((__v4hu)__m2));
 }
 
 /// Adds each 32-bit integer element of the first 64-bit integer vector
@@ -421,10 +394,9 @@ _mm_add_pi16(__m64 __m1, __m64 __m2)
 ///    A 64-bit integer vector of [2 x i32].
 /// \returns A 64-bit integer vector of [2 x i32] containing the sums of both
 ///    parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_add_pi32(__m64 __m1, __m64 __m2)
-{
-    return (__m64)(((__v2su)__m1) + ((__v2su)__m2));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_add_pi32(__m64 __m1,
+                                                             __m64 __m2) {
+  return (__m64)(((__v2su)__m1) + ((__v2su)__m2));
 }
 
 /// Adds, with saturation, each 8-bit signed integer element of the first
@@ -445,10 +417,9 @@ _mm_add_pi32(__m64 __m1, __m64 __m2)
 ///    A 64-bit integer vector of [8 x i8].
 /// \returns A 64-bit integer vector of [8 x i8] containing the saturated sums
 ///    of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_adds_pi8(__m64 __m1, __m64 __m2)
-{
-    return (__m64)__builtin_elementwise_add_sat((__v8qs)__m1, (__v8qs)__m2);
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_adds_pi8(__m64 __m1,
+                                                             __m64 __m2) {
+  return (__m64)__builtin_elementwise_add_sat((__v8qs)__m1, (__v8qs)__m2);
 }
 
 /// Adds, with saturation, each 16-bit signed integer element of the first
@@ -469,10 +440,9 @@ _mm_adds_pi8(__m64 __m1, __m64 __m2)
 ///    A 64-bit integer vector of [4 x i16].
 /// \returns A 64-bit integer vector of [4 x i16] containing the saturated sums
 ///    of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_adds_pi16(__m64 __m1, __m64 __m2)
-{
-    return (__m64)__builtin_elementwise_add_sat((__v4hi)__m1, (__v4hi)__m2);
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_adds_pi16(__m64 __m1,
+                                                              __m64 __m2) {
+  return (__m64)__builtin_elementwise_add_sat((__v4hi)__m1, (__v4hi)__m2);
 }
 
 /// Adds, with saturation, each 8-bit unsigned integer element of the first
@@ -492,10 +462,9 @@ _mm_adds_pi16(__m64 __m1, __m64 __m2)
 ///    A 64-bit integer vector of [8 x i8].
 /// \returns A 64-bit integer vector of [8 x i8] containing the saturated
 ///    unsigned sums of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_adds_pu8(__m64 __m1, __m64 __m2)
-{
-    return (__m64)__builtin_elementwise_add_sat((__v8qu)__m1, (__v8qu)__m2);
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_adds_pu8(__m64 __m1,
+                                                             __m64 __m2) {
+  return (__m64)__builtin_elementwise_add_sat((__v8qu)__m1, (__v8qu)__m2);
 }
 
 /// Adds, with saturation, each 16-bit unsigned integer element of the first
@@ -515,10 +484,9 @@ _mm_adds_pu8(__m64 __m1, __m64 __m2)
 ///    A 64-bit integer vector of [4 x i16].
 /// \returns A 64-bit integer vector of [4 x i16] containing the saturated
 ///    unsigned sums of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_adds_pu16(__m64 __m1, __m64 __m2)
-{
-    return (__m64)__builtin_elementwise_add_sat((__v4hu)__m1, (__v4hu)__m2);
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_adds_pu16(__m64 __m1,
+                                                              __m64 __m2) {
+  return (__m64)__builtin_elementwise_add_sat((__v4hu)__m1, (__v4hu)__m2);
 }
 
 /// Subtracts each 8-bit integer element of the second 64-bit integer
@@ -536,10 +504,9 @@ _mm_adds_pu16(__m64 __m1, __m64 __m2)
 ///    A 64-bit integer vector of [8 x i8] containing the subtrahends.
 /// \returns A 64-bit integer vector of [8 x i8] containing the differences of
 ///    both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_sub_pi8(__m64 __m1, __m64 __m2)
-{
-    return (__m64)(((__v8qu)__m1) - ((__v8qu)__m2));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sub_pi8(__m64 __m1,
+                                                            __m64 __m2) {
+  return (__m64)(((__v8qu)__m1) - ((__v8qu)__m2));
 }
 
 /// Subtracts each 16-bit integer element of the second 64-bit integer
@@ -557,10 +524,9 @@ _mm_sub_pi8(__m64 __m1, __m64 __m2)
 ///    A 64-bit integer vector of [4 x i16] containing the subtrahends.
 /// \returns A 64-bit integer vector of [4 x i16] containing the differences of
 ///    both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_sub_pi16(__m64 __m1, __m64 __m2)
-{
-    return (__m64)(((__v4hu)__m1) - ((__v4hu)__m2));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sub_pi16(__m64 __m1,
+                                                             __m64 __m2) {
+  return (__m64)(((__v4hu)__m1) - ((__v4hu)__m2));
 }
 
 /// Subtracts each 32-bit integer element of the second 64-bit integer
@@ -578,10 +544,9 @@ _mm_sub_pi16(__m64 __m1, __m64 __m2)
 ///    A 64-bit integer vector of [2 x i32] containing the subtrahends.
 /// \returns A 64-bit integer vector of [2 x i32] containing the differences of
 ///    both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_sub_pi32(__m64 __m1, __m64 __m2)
-{
-    return (__m64)(((__v2su)__m1) - ((__v2su)__m2));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sub_pi32(__m64 __m1,
+                                                             __m64 __m2) {
+  return (__m64)(((__v2su)__m1) - ((__v2su)__m2));
 }
 
 /// Subtracts, with saturation, each 8-bit signed integer element of the second
@@ -602,10 +567,9 @@ _mm_sub_pi32(__m64 __m1, __m64 __m2)
 ///    A 64-bit integer vector of [8 x i8] containing the subtrahends.
 /// \returns A 64-bit integer vector of [8 x i8] containing the saturated
 ///    differences of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_subs_pi8(__m64 __m1, __m64 __m2)
-{
-    return (__m64)__builtin_elementwise_sub_sat((__v8qs)__m1, (__v8qs)__m2);
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_subs_pi8(__m64 __m1,
+                                                             __m64 __m2) {
+  return (__m64)__builtin_elementwise_sub_sat((__v8qs)__m1, (__v8qs)__m2);
 }
 
 /// Subtracts, with saturation, each 16-bit signed integer element of the
@@ -626,10 +590,9 @@ _mm_subs_pi8(__m64 __m1, __m64 __m2)
 ///    A 64-bit integer vector of [4 x i16] containing the subtrahends.
 /// \returns A 64-bit integer vector of [4 x i16] containing the saturated
 ///    differences of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_subs_pi16(__m64 __m1, __m64 __m2)
-{
-    return (__m64)__builtin_elementwise_sub_sat((__v4hi)__m1, (__v4hi)__m2);
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_subs_pi16(__m64 __m1,
+                                                              __m64 __m2) {
+  return (__m64)__builtin_elementwise_sub_sat((__v4hi)__m1, (__v4hi)__m2);
 }
 
 /// Subtracts each 8-bit unsigned integer element of the second 64-bit
@@ -650,10 +613,9 @@ _mm_subs_pi16(__m64 __m1, __m64 __m2)
 ///    A 64-bit integer vector of [8 x i8] containing the subtrahends.
 /// \returns A 64-bit integer vector of [8 x i8] containing the saturated
 ///    differences of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_subs_pu8(__m64 __m1, __m64 __m2)
-{
-    return (__m64)__builtin_elementwise_sub_sat((__v8qu)__m1, (__v8qu)__m2);
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_subs_pu8(__m64 __m1,
+                                                             __m64 __m2) {
+  return (__m64)__builtin_elementwise_sub_sat((__v8qu)__m1, (__v8qu)__m2);
 }
 
 /// Subtracts each 16-bit unsigned integer element of the second 64-bit
@@ -674,10 +636,9 @@ _mm_subs_pu8(__m64 __m1, __m64 __m2)
 ///    A 64-bit integer vector of [4 x i16] containing the subtrahends.
 /// \returns A 64-bit integer vector of [4 x i16] containing the saturated
 ///    differences of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_subs_pu16(__m64 __m1, __m64 __m2)
-{
-    return (__m64)__builtin_elementwise_sub_sat((__v4hu)__m1, (__v4hu)__m2);
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_subs_pu16(__m64 __m1,
+                                                              __m64 __m2) {
+  return (__m64)__builtin_elementwise_sub_sat((__v4hu)__m1, (__v4hu)__m2);
 }
 
 /// Multiplies each 16-bit signed integer element of the first 64-bit
@@ -701,11 +662,10 @@ _mm_subs_pu16(__m64 __m1, __m64 __m2)
 ///    A 64-bit integer vector of [4 x i16].
 /// \returns A 64-bit integer vector of [2 x i32] containing the sums of
 ///    products of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_madd_pi16(__m64 __m1, __m64 __m2)
-{
-    return __trunc64(__builtin_ia32_pmaddwd128((__v8hi)__anyext128(__m1),
-                                               (__v8hi)__anyext128(__m2)));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_madd_pi16(__m64 __m1,
+                                                              __m64 __m2) {
+  return __trunc64(__builtin_ia32_pmaddwd128((__v8hi)__zext128(__m1),
+                                             (__v8hi)__zext128(__m2)));
 }
 
 /// Multiplies each 16-bit signed integer element of the first 64-bit
@@ -723,11 +683,10 @@ _mm_madd_pi16(__m64 __m1, __m64 __m2)
 ///    A 64-bit integer vector of [4 x i16].
 /// \returns A 64-bit integer vector of [4 x i16] containing the upper 16 bits
 ///    of the products of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_mulhi_pi16(__m64 __m1, __m64 __m2)
-{
-    return __trunc64(__builtin_ia32_pmulhw128((__v8hi)__anyext128(__m1),
-                                              (__v8hi)__anyext128(__m2)));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_mulhi_pi16(__m64 __m1,
+                                                               __m64 __m2) {
+  return __trunc64(__builtin_ia32_pmulhw128((__v8hi)__zext128(__m1),
+                                            (__v8hi)__zext128(__m2)));
 }
 
 /// Multiplies each 16-bit signed integer element of the first 64-bit
@@ -745,10 +704,9 @@ _mm_mulhi_pi16(__m64 __m1, __m64 __m2)
 ///    A 64-bit integer vector of [4 x i16].
 /// \returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits
 ///    of the products of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_mullo_pi16(__m64 __m1, __m64 __m2)
-{
-    return (__m64)(((__v4hu)__m1) * ((__v4hu)__m2));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_mullo_pi16(__m64 __m1,
+                                                               __m64 __m2) {
+  return (__m64)(((__v4hu)__m1) * ((__v4hu)__m2));
 }
 
 /// Left-shifts each 16-bit signed integer element of the first
@@ -771,8 +729,8 @@ _mm_mullo_pi16(__m64 __m1, __m64 __m2)
 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
 _mm_sll_pi16(__m64 __m, __m64 __count)
 {
-    return __trunc64(__builtin_ia32_psllw128((__v8hi)__anyext128(__m),
-                                             (__v8hi)__anyext128(__count)));
+  return __trunc64(__builtin_ia32_psllw128((__v8hi)__zext128(__m),
+                                           (__v8hi)__zext128(__count)));
 }
 
 /// Left-shifts each 16-bit signed integer element of a 64-bit integer
@@ -791,11 +749,9 @@ _mm_sll_pi16(__m64 __m, __m64 __count)
 /// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted
 ///    values. If \a __count is greater or equal to 16, the result is set to all
 ///    0.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_slli_pi16(__m64 __m, int __count)
-{
-    return __trunc64(__builtin_ia32_psllwi128((__v8hi)__anyext128(__m),
-                                              __count));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_slli_pi16(__m64 __m,
+                                                              int __count) {
+  return __trunc64(__builtin_ia32_psllwi128((__v8hi)__zext128(__m), __count));
 }
 
 /// Left-shifts each 32-bit signed integer element of the first
@@ -818,8 +774,8 @@ _mm_slli_pi16(__m64 __m, int __count)
 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
 _mm_sll_pi32(__m64 __m, __m64 __count)
 {
-    return __trunc64(__builtin_ia32_pslld128((__v4si)__anyext128(__m),
-                                             (__v4si)__anyext128(__count)));
+  return __trunc64(__builtin_ia32_pslld128((__v4si)__zext128(__m),
+                                           (__v4si)__zext128(__count)));
 }
 
 /// Left-shifts each 32-bit signed integer element of a 64-bit integer
@@ -838,11 +794,9 @@ _mm_sll_pi32(__m64 __m, __m64 __count)
 /// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted
 ///    values. If \a __count is greater or equal to 32, the result is set to all
 ///    0.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_slli_pi32(__m64 __m, int __count)
-{
-    return __trunc64(__builtin_ia32_pslldi128((__v4si)__anyext128(__m),
-                                              __count));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_slli_pi32(__m64 __m,
+                                                              int __count) {
+  return __trunc64(__builtin_ia32_pslldi128((__v4si)__zext128(__m), __count));
 }
 
 /// Left-shifts the first 64-bit integer parameter by the number of bits
@@ -862,8 +816,8 @@ _mm_slli_pi32(__m64 __m, int __count)
 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
 _mm_sll_si64(__m64 __m, __m64 __count)
 {
-    return __trunc64(__builtin_ia32_psllq128((__v2di)__anyext128(__m),
-                                             (__v2di)__anyext128(__count)));
+  return __trunc64(__builtin_ia32_psllq128((__v2di)__zext128(__m),
+                                           (__v2di)__zext128(__count)));
 }
 
 /// Left-shifts the first parameter, which is a 64-bit integer, by the
@@ -880,11 +834,9 @@ _mm_sll_si64(__m64 __m, __m64 __count)
 ///    A 32-bit integer value.
 /// \returns A 64-bit integer vector containing the left-shifted value. If
 ///     \a __count is greater or equal to 64, the result is set to 0.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_slli_si64(__m64 __m, int __count)
-{
-    return __trunc64(__builtin_ia32_psllqi128((__v2di)__anyext128(__m),
-                                              __count));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_slli_si64(__m64 __m,
+                                                              int __count) {
+  return __trunc64(__builtin_ia32_psllqi128((__v2di)__zext128(__m), __count));
 }
 
 /// Right-shifts each 16-bit integer element of the first parameter,
@@ -908,8 +860,8 @@ _mm_slli_si64(__m64 __m, int __count)
 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
 _mm_sra_pi16(__m64 __m, __m64 __count)
 {
-    return __trunc64(__builtin_ia32_psraw128((__v8hi)__anyext128(__m),
-                                             (__v8hi)__anyext128(__count)));
+  return __trunc64(__builtin_ia32_psraw128((__v8hi)__zext128(__m),
+                                           (__v8hi)__zext128(__count)));
 }
 
 /// Right-shifts each 16-bit integer element of a 64-bit integer vector
@@ -929,11 +881,9 @@ _mm_sra_pi16(__m64 __m, __m64 __count)
 ///    A 32-bit integer value.
 /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
 ///    values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_srai_pi16(__m64 __m, int __count)
-{
-    return __trunc64(__builtin_ia32_psrawi128((__v8hi)__anyext128(__m),
-                                              __count));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srai_pi16(__m64 __m,
+                                                              int __count) {
+  return __trunc64(__builtin_ia32_psrawi128((__v8hi)__zext128(__m), __count));
 }
 
 /// Right-shifts each 32-bit integer element of the first parameter,
@@ -957,8 +907,8 @@ _mm_srai_pi16(__m64 __m, int __count)
 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
 _mm_sra_pi32(__m64 __m, __m64 __count)
 {
-    return __trunc64(__builtin_ia32_psrad128((__v4si)__anyext128(__m),
-                                             (__v4si)__anyext128(__count)));
+  return __trunc64(__builtin_ia32_psrad128((__v4si)__zext128(__m),
+                                           (__v4si)__zext128(__count)));
 }
 
 /// Right-shifts each 32-bit integer element of a 64-bit integer vector
@@ -978,11 +928,9 @@ _mm_sra_pi32(__m64 __m, __m64 __count)
 ///    A 32-bit integer value.
 /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
 ///    values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_srai_pi32(__m64 __m, int __count)
-{
-    return __trunc64(__builtin_ia32_psradi128((__v4si)__anyext128(__m),
-                                              __count));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srai_pi32(__m64 __m,
+                                                              int __count) {
+  return __trunc64(__builtin_ia32_psradi128((__v4si)__zext128(__m), __count));
 }
 
 /// Right-shifts each 16-bit integer element of the first parameter,
@@ -1005,8 +953,8 @@ _mm_srai_pi32(__m64 __m, int __count)
 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
 _mm_srl_pi16(__m64 __m, __m64 __count)
 {
-    return __trunc64(__builtin_ia32_psrlw128((__v8hi)__anyext128(__m),
-                                             (__v8hi)__anyext128(__count)));
+  return __trunc64(__builtin_ia32_psrlw128((__v8hi)__zext128(__m),
+                                           (__v8hi)__zext128(__count)));
 }
 
 /// Right-shifts each 16-bit integer element of a 64-bit integer vector
@@ -1025,11 +973,9 @@ _mm_srl_pi16(__m64 __m, __m64 __count)
 ///    A 32-bit integer value.
 /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
 ///    values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_srli_pi16(__m64 __m, int __count)
-{
-    return __trunc64(__builtin_ia32_psrlwi128((__v8hi)__anyext128(__m),
-                                              __count));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srli_pi16(__m64 __m,
+                                                              int __count) {
+  return __trunc64(__builtin_ia32_psrlwi128((__v8hi)__zext128(__m), __count));
 }
 
 /// Right-shifts each 32-bit integer element of the first parameter,
@@ -1052,8 +998,8 @@ _mm_srli_pi16(__m64 __m, int __count)
 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
 _mm_srl_pi32(__m64 __m, __m64 __count)
 {
-    return __trunc64(__builtin_ia32_psrld128((__v4si)__anyext128(__m),
-                                             (__v4si)__anyext128(__count)));
+  return __trunc64(__builtin_ia32_psrld128((__v4si)__zext128(__m),
+                                           (__v4si)__zext128(__count)));
 }
 
 /// Right-shifts each 32-bit integer element of a 64-bit integer vector
@@ -1072,11 +1018,9 @@ _mm_srl_pi32(__m64 __m, __m64 __count)
 ///    A 32-bit integer value.
 /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
 ///    values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_srli_pi32(__m64 __m, int __count)
-{
-    return __trunc64(__builtin_ia32_psrldi128((__v4si)__anyext128(__m),
-                                              __count));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srli_pi32(__m64 __m,
+                                                              int __count) {
+  return __trunc64(__builtin_ia32_psrldi128((__v4si)__zext128(__m), __count));
 }
 
 /// Right-shifts the first 64-bit integer parameter by the number of bits
@@ -1096,8 +1040,8 @@ _mm_srli_pi32(__m64 __m, int __count)
 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
 _mm_srl_si64(__m64 __m, __m64 __count)
 {
-    return __trunc64(__builtin_ia32_psrlq128((__v2di)__anyext128(__m),
-                                             (__v2di)__anyext128(__count)));
+  return __trunc64(__builtin_ia32_psrlq128((__v2di)__zext128(__m),
+                                           (__v2di)__zext128(__count)));
 }
 
 /// Right-shifts the first parameter, which is a 64-bit integer, by the
@@ -1115,11 +1059,9 @@ _mm_srl_si64(__m64 __m, __m64 __count)
 /// \param __count
 ///    A 32-bit integer value.
 /// \returns A 64-bit integer vector containing the right-shifted value.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_srli_si64(__m64 __m, int __count)
-{
-    return __trunc64(__builtin_ia32_psrlqi128((__v2di)__anyext128(__m),
-                                              __count));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srli_si64(__m64 __m,
+                                                              int __count) {
+  return __trunc64(__builtin_ia32_psrlqi128((__v2di)__zext128(__m), __count));
 }
 
 /// Performs a bitwise AND of two 64-bit integer vectors.
@@ -1134,10 +1076,9 @@ _mm_srli_si64(__m64 __m, int __count)
 ///    A 64-bit integer vector.
 /// \returns A 64-bit integer vector containing the bitwise AND of both
 ///    parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_and_si64(__m64 __m1, __m64 __m2)
-{
-    return (__m64)(((__v1du)__m1) & ((__v1du)__m2));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_and_si64(__m64 __m1,
+                                                             __m64 __m2) {
+  return (__m64)(((__v1du)__m1) & ((__v1du)__m2));
 }
 
 /// Performs a bitwise NOT of the first 64-bit integer vector, and then
@@ -1155,10 +1096,9 @@ _mm_and_si64(__m64 __m1, __m64 __m2)
 ///    A 64-bit integer vector.
 /// \returns A 64-bit integer vector containing the bitwise AND of the second
 ///    parameter and the one's complement of the first parameter.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_andnot_si64(__m64 __m1, __m64 __m2)
-{
-    return (__m64)(~((__v1du)__m1) & ((__v1du)__m2));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_andnot_si64(__m64 __m1,
+                                                                __m64 __m2) {
+  return (__m64)(~((__v1du)__m1) & ((__v1du)__m2));
 }
 
 /// Performs a bitwise OR of two 64-bit integer vectors.
@@ -1173,10 +1113,9 @@ _mm_andnot_si64(__m64 __m1, __m64 __m2)
 ///    A 64-bit integer vector.
 /// \returns A 64-bit integer vector containing the bitwise OR of both
 ///    parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_or_si64(__m64 __m1, __m64 __m2)
-{
-    return (__m64)(((__v1du)__m1) | ((__v1du)__m2));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_or_si64(__m64 __m1,
+                                                            __m64 __m2) {
+  return (__m64)(((__v1du)__m1) | ((__v1du)__m2));
 }
 
 /// Performs a bitwise exclusive OR of two 64-bit integer vectors.
@@ -1191,10 +1130,9 @@ _mm_or_si64(__m64 __m1, __m64 __m2)
 ///    A 64-bit integer vector.
 /// \returns A 64-bit integer vector containing the bitwise exclusive OR of both
 ///    parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_xor_si64(__m64 __m1, __m64 __m2)
-{
-    return (__m64)(((__v1du)__m1) ^ ((__v1du)__m2));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_xor_si64(__m64 __m1,
+                                                             __m64 __m2) {
+  return (__m64)(((__v1du)__m1) ^ ((__v1du)__m2));
 }
 
 /// Compares the 8-bit integer elements of two 64-bit integer vectors of
@@ -1213,10 +1151,9 @@ _mm_xor_si64(__m64 __m1, __m64 __m2)
 ///    A 64-bit integer vector of [8 x i8].
 /// \returns A 64-bit integer vector of [8 x i8] containing the comparison
 ///    results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
-{
-    return (__m64)(((__v8qi)__m1) == ((__v8qi)__m2));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cmpeq_pi8(__m64 __m1,
+                                                              __m64 __m2) {
+  return (__m64)(((__v8qi)__m1) == ((__v8qi)__m2));
 }
 
 /// Compares the 16-bit integer elements of two 64-bit integer vectors of
@@ -1235,10 +1172,9 @@ _mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
 ///    A 64-bit integer vector of [4 x i16].
 /// \returns A 64-bit integer vector of [4 x i16] containing the comparison
 ///    results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
-{
-    return (__m64)(((__v4hi)__m1) == ((__v4hi)__m2));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cmpeq_pi16(__m64 __m1,
+                                                               __m64 __m2) {
+  return (__m64)(((__v4hi)__m1) == ((__v4hi)__m2));
 }
 
 /// Compares the 32-bit integer elements of two 64-bit integer vectors of
@@ -1257,10 +1193,9 @@ _mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
 ///    A 64-bit integer vector of [2 x i32].
 /// \returns A 64-bit integer vector of [2 x i32] containing the comparison
 ///    results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
-{
-    return (__m64)(((__v2si)__m1) == ((__v2si)__m2));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cmpeq_pi32(__m64 __m1,
+                                                               __m64 __m2) {
+  return (__m64)(((__v2si)__m1) == ((__v2si)__m2));
 }
 
 /// Compares the 8-bit integer elements of two 64-bit integer vectors of
@@ -1279,9 +1214,8 @@ _mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
 ///    A 64-bit integer vector of [8 x i8].
 /// \returns A 64-bit integer vector of [8 x i8] containing the comparison
 ///    results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
-{
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cmpgt_pi8(__m64 __m1,
+                                                              __m64 __m2) {
   /* This function always performs a signed comparison, but __v8qi is a char
      which may be signed or unsigned, so use __v8qs. */
     return (__m64)((__v8qs)__m1 > (__v8qs)__m2);
@@ -1303,10 +1237,9 @@ _mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
 ///    A 64-bit integer vector of [4 x i16].
 /// \returns A 64-bit integer vector of [4 x i16] containing the comparison
 ///    results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
-{
-    return (__m64)((__v4hi)__m1 > (__v4hi)__m2);
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cmpgt_pi16(__m64 __m1,
+                                                               __m64 __m2) {
+  return (__m64)((__v4hi)__m1 > (__v4hi)__m2);
 }
 
 /// Compares the 32-bit integer elements of two 64-bit integer vectors of
@@ -1325,10 +1258,9 @@ _mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
 ///    A 64-bit integer vector of [2 x i32].
 /// \returns A 64-bit integer vector of [2 x i32] containing the comparison
 ///    results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
-{
-    return (__m64)((__v2si)__m1 > (__v2si)__m2);
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cmpgt_pi32(__m64 __m1,
+                                                               __m64 __m2) {
+  return (__m64)((__v2si)__m1 > (__v2si)__m2);
 }
 
 /// Constructs a 64-bit integer vector initialized to zero.
@@ -1338,8 +1270,7 @@ _mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
 /// This intrinsic corresponds to the <c> PXOR </c> instruction.
 ///
 /// \returns An initialized 64-bit integer vector with all elements set to zero.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_setzero_si64(void) {
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_setzero_si64(void) {
   return __extension__(__m64){0LL};
 }
 
@@ -1358,8 +1289,8 @@ _mm_setzero_si64(void) {
 ///    A 32-bit integer value used to initialize the lower 32 bits of the
 ///    result.
 /// \returns An initialized 64-bit integer vector.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_set_pi32(int __i1, int __i0) {
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_set_pi32(int __i1,
+                                                             int __i0) {
   return __extension__(__m64)(__v2si){__i0, __i1};
 }
 
@@ -1380,8 +1311,10 @@ _mm_set_pi32(int __i1, int __i0) {
 /// \param __s0
 ///    A 16-bit integer value used to initialize bits [15:0] of the result.
 /// \returns An initialized 64-bit integer vector.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_set_pi16(short __s3, short __s2, short __s1, short __s0) {
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_set_pi16(short __s3,
+                                                             short __s2,
+                                                             short __s1,
+                                                             short __s0) {
   return __extension__(__m64)(__v4hi){__s0, __s1, __s2, __s3};
 }
 
@@ -1410,7 +1343,7 @@ _mm_set_pi16(short __s3, short __s2, short __s1, short __s0) {
 /// \param __b0
 ///    An 8-bit integer value used to initialize bits [7:0] of the result.
 /// \returns An initialized 64-bit integer vector.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
 _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,
             char __b1, char __b0) {
   return __extension__(__m64)(__v8qi){__b0, __b1, __b2, __b3,
@@ -1430,8 +1363,7 @@ _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,
 ///    A 32-bit integer value used to initialize each vector element of the
 ///    result.
 /// \returns An initialized 64-bit integer vector of [2 x i32].
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_set1_pi32(int __i) {
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_set1_pi32(int __i) {
   return _mm_set_pi32(__i, __i);
 }
 
@@ -1448,8 +1380,7 @@ _mm_set1_pi32(int __i) {
 ///    A 16-bit integer value used to initialize each vector element of the
 ///    result.
 /// \returns An initialized 64-bit integer vector of [4 x i16].
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_set1_pi16(short __w) {
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_set1_pi16(short __w) {
   return _mm_set_pi16(__w, __w, __w, __w);
 }
 
@@ -1465,8 +1396,7 @@ _mm_set1_pi16(short __w) {
 ///    An 8-bit integer value used to initialize each vector element of the
 ///    result.
 /// \returns An initialized 64-bit integer vector of [8 x i8].
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_set1_pi8(char __b) {
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_set1_pi8(char __b) {
   return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b);
 }
 
@@ -1485,8 +1415,8 @@ _mm_set1_pi8(char __b) {
 ///    A 32-bit integer value used to initialize the upper 32 bits of the
 ///    result.
 /// \returns An initialized 64-bit integer vector.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_setr_pi32(int __i0, int __i1) {
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_setr_pi32(int __i0,
+                                                              int __i1) {
   return _mm_set_pi32(__i1, __i0);
 }
 
@@ -1507,8 +1437,10 @@ _mm_setr_pi32(int __i0, int __i1) {
 /// \param __w3
 ///    A 16-bit integer value used to initialize bits [63:48] of the result.
 /// \returns An initialized 64-bit integer vector.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
-_mm_setr_pi16(short __w0, short __w1, short __w2, short __w3) {
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_setr_pi16(short __w0,
+                                                              short __w1,
+                                                              short __w2,
+                                                              short __w3) {
   return _mm_set_pi16(__w3, __w2, __w1, __w0);
 }
 
@@ -1537,13 +1469,12 @@ _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3) {
 /// \param __b7
 ///    An 8-bit integer value used to initialize bits [63:56] of the result.
 /// \returns An initialized 64-bit integer vector.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
 _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5,
              char __b6, char __b7) {
   return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
 }
 
-#undef __anyext128
 #undef __trunc64
 #undef __DEFAULT_FN_ATTRS_SSE2
 
diff --git a/lib/include/module.modulemap b/lib/include/module.modulemap
index a72828625a..c13dd3fd48 100644
--- a/lib/include/module.modulemap
+++ b/lib/include/module.modulemap
@@ -171,8 +171,22 @@ module _Builtin_intrinsics [system] [extern_c] {
 // that module. The system float.h (if present) will be treated
 // as a textual header in the sytem module.
 module _Builtin_float [system] {
-  header "float.h"
-  export *
+  textual header "float.h"
+
+  explicit module float {
+    header "__float_float.h"
+    export *
+  }
+
+  explicit module header_macro {
+    header "__float_header_macro.h"
+    export *
+  }
+
+  explicit module infinity_nan {
+    header "__float_infinity_nan.h"
+    export *
+  }
 }
 
 module _Builtin_inttypes [system] {
@@ -239,6 +253,11 @@ module _Builtin_stdbool [system] {
   export *
 }
 
+module _Builtin_stdckdint [system] {
+  header "stdckdint.h"
+  export *
+}
+
 module _Builtin_stdcountof [system] {
   header "stdcountof.h"
   export *
@@ -329,13 +348,13 @@ module _Builtin_unwind [system] {
 }
 // End -fbuiltin-headers-in-system-modules affected modules
 
-module opencl_c {
+module opencl_c [system] {
   requires opencl
   header "opencl-c.h"
   header "opencl-c-base.h"
 }
 
-module ptrauth {
+module ptrauth [system] {
   header "ptrauth.h"
   export *
 }
diff --git a/lib/include/movrs_avx10_2_512intrin.h b/lib/include/movrs_avx10_2_512intrin.h
index 5cd907a597..75d7ce93db 100644
--- a/lib/include/movrs_avx10_2_512intrin.h
+++ b/lib/include/movrs_avx10_2_512intrin.h
@@ -17,8 +17,8 @@
 
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS512                                                  \
-  __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("movrs, avx10.2-512"), __min_vector_width__(512)))
+  __attribute__((__always_inline__, __nodebug__, __target__("movrs, avx10.2"), \
+                 __min_vector_width__(512)))
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_loadrs_epi8(void const *__A) {
diff --git a/lib/include/movrs_avx10_2intrin.h b/lib/include/movrs_avx10_2intrin.h
index 27b625b6b4..1c78b214fd 100644
--- a/lib/include/movrs_avx10_2intrin.h
+++ b/lib/include/movrs_avx10_2intrin.h
@@ -17,11 +17,11 @@
 
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS128                                                  \
-  __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("movrs,avx10.2-256"), __min_vector_width__(128)))
+  __attribute__((__always_inline__, __nodebug__, __target__("movrs,avx10.2"),  \
+                 __min_vector_width__(128)))
 #define __DEFAULT_FN_ATTRS256                                                  \
-  __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("movrs,avx10.2-256"), __min_vector_width__(256)))
+  __attribute__((__always_inline__, __nodebug__, __target__("movrs,avx10.2"),  \
+                 __min_vector_width__(256)))
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_loadrs_epi8(void const *__A) {
diff --git a/lib/include/pmmintrin.h b/lib/include/pmmintrin.h
index cd605df7fb..a9a6544036 100644
--- a/lib/include/pmmintrin.h
+++ b/lib/include/pmmintrin.h
@@ -17,15 +17,9 @@
 #include <emmintrin.h>
 
 /* Define the default attributes for the functions in this file. */
-#if defined(__EVEX512__) && !defined(__AVX10_1_512__)
-#define __DEFAULT_FN_ATTRS                                                     \
-  __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("sse3,no-evex512"), __min_vector_width__(128)))
-#else
 #define __DEFAULT_FN_ATTRS                                                     \
   __attribute__((__always_inline__, __nodebug__, __target__("sse3"),           \
                  __min_vector_width__(128)))
-#endif
 
 #if defined(__cplusplus) && (__cplusplus >= 201103L)
 #define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
@@ -66,9 +60,8 @@ _mm_lddqu_si128(__m128i_u const *__p)
 ///    A 128-bit vector of [4 x float] containing the right source operand.
 /// \returns A 128-bit vector of [4 x float] containing the alternating sums and
 ///    differences of both operands.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
-_mm_addsub_ps(__m128 __a, __m128 __b)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_addsub_ps(__m128 __a, __m128 __b) {
   return __builtin_ia32_addsubps((__v4sf)__a, (__v4sf)__b);
 }
 
@@ -89,9 +82,8 @@ _mm_addsub_ps(__m128 __a, __m128 __b)
 ///    destination.
 /// \returns A 128-bit vector of [4 x float] containing the horizontal sums of
 ///    both operands.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
-_mm_hadd_ps(__m128 __a, __m128 __b)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadd_ps(__m128 __a,
+                                                                  __m128 __b) {
   return __builtin_ia32_haddps((__v4sf)__a, (__v4sf)__b);
 }
 
@@ -112,9 +104,8 @@ _mm_hadd_ps(__m128 __a, __m128 __b)
 ///    bits of the destination.
 /// \returns A 128-bit vector of [4 x float] containing the horizontal
 ///    differences of both operands.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
-_mm_hsub_ps(__m128 __a, __m128 __b)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsub_ps(__m128 __a,
+                                                                  __m128 __b) {
   return __builtin_ia32_hsubps((__v4sf)__a, (__v4sf)__b);
 }
 
@@ -174,9 +165,8 @@ _mm_moveldup_ps(__m128 __a)
 ///    A 128-bit vector of [2 x double] containing the right source operand.
 /// \returns A 128-bit vector of [2 x double] containing the alternating sums
 ///    and differences of both operands.
-static __inline__ __m128d __DEFAULT_FN_ATTRS
-_mm_addsub_pd(__m128d __a, __m128d __b)
-{
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_addsub_pd(__m128d __a, __m128d __b) {
   return __builtin_ia32_addsubpd((__v2df)__a, (__v2df)__b);
 }
 
@@ -197,9 +187,8 @@ _mm_addsub_pd(__m128d __a, __m128d __b)
 ///    destination.
 /// \returns A 128-bit vector of [2 x double] containing the horizontal sums of
 ///    both operands.
-static __inline__ __m128d __DEFAULT_FN_ATTRS
-_mm_hadd_pd(__m128d __a, __m128d __b)
-{
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_hadd_pd(__m128d __a, __m128d __b) {
   return __builtin_ia32_haddpd((__v2df)__a, (__v2df)__b);
 }
 
@@ -220,9 +209,8 @@ _mm_hadd_pd(__m128d __a, __m128d __b)
 ///    the destination.
 /// \returns A 128-bit vector of [2 x double] containing the horizontal
 ///    differences of both operands.
-static __inline__ __m128d __DEFAULT_FN_ATTRS
-_mm_hsub_pd(__m128d __a, __m128d __b)
-{
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_hsub_pd(__m128d __a, __m128d __b) {
   return __builtin_ia32_hsubpd((__v2df)__a, (__v2df)__b);
 }
 
diff --git a/lib/include/ptrauth.h b/lib/include/ptrauth.h
index 7f7d387cbd..ad28f06f09 100644
--- a/lib/include/ptrauth.h
+++ b/lib/include/ptrauth.h
@@ -95,7 +95,7 @@ typedef __UINTPTR_TYPE__ ptrauth_generic_signature_t;
    __ptrauth qualifier; the compiler will perform this check
    automatically. */
 
-#if __has_feature(ptrauth_intrinsics)
+#if __has_feature(ptrauth_intrinsics) || defined(__PTRAUTH__)
 
 /* Strip the signature from a value without authenticating it.
 
@@ -241,6 +241,18 @@ typedef __UINTPTR_TYPE__ ptrauth_generic_signature_t;
 #define ptrauth_type_discriminator(__type)                                     \
   __builtin_ptrauth_type_discriminator(__type)
 
+/* Compute the constant discriminator used by Clang to sign pointers with the
+   given C function pointer type.
+
+   A call to this function is an integer constant expression. */
+#if __has_feature(ptrauth_function_pointer_type_discrimination)
+#define ptrauth_function_pointer_type_discriminator(__type)                    \
+  __builtin_ptrauth_type_discriminator(__type)
+#else
+#define ptrauth_function_pointer_type_discriminator(__type)                    \
+  ((ptrauth_extra_data_t)0)
+#endif
+
 /* Compute a signature for the given pair of pointer-sized values.
    The order of the arguments is significant.
 
@@ -372,6 +384,8 @@ typedef __UINTPTR_TYPE__ ptrauth_generic_signature_t;
   })
 
 #define ptrauth_type_discriminator(__type) ((ptrauth_extra_data_t)0)
+#define ptrauth_function_pointer_type_discriminator(__type)                    \
+  ((ptrauth_extra_data_t)0)
 
 #define ptrauth_sign_generic_data(__value, __data)                             \
   ({                                                                           \
@@ -388,6 +402,6 @@ typedef __UINTPTR_TYPE__ ptrauth_generic_signature_t;
 #define __ptrauth_objc_isa_uintptr
 #define __ptrauth_objc_super_pointer
 
-#endif /* __has_feature(ptrauth_intrinsics) */
+#endif /* __has_feature(ptrauth_intrinsics) || defined(__PTRAUTH__) */
 
 #endif /* __PTRAUTH_H */
diff --git a/lib/include/riscv_mips.h b/lib/include/riscv_mips.h
new file mode 100644
index 0000000000..124a989280
--- /dev/null
+++ b/lib/include/riscv_mips.h
@@ -0,0 +1,34 @@
+//===----- riscv_mips.h - RISC-V MIPS Intrinsic definitions
+//----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __RISCV_MIPS_H
+#define __RISCV_MIPS_H
+
+#if !defined(__riscv)
+#error "This header is only meant to be used on riscv architecture"
+#endif
+
+#define __DEFAULT_FN_ATTRS                                                     \
+  __attribute__((__always_inline__, __nodebug__, __target__("xmipsexectl")))
+
+static __inline__ void __DEFAULT_FN_ATTRS __mips_pause() {
+  __builtin_riscv_mips_pause();
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS __mips_ehb() {
+  __builtin_riscv_mips_ehb();
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS __mips_ihb() {
+  __builtin_riscv_mips_ihb();
+}
+
+#undef __DEFAULT_FN_ATTRS
+
+#endif
diff --git a/lib/include/riscv_nds.h b/lib/include/riscv_nds.h
new file mode 100644
index 0000000000..29734c4383
--- /dev/null
+++ b/lib/include/riscv_nds.h
@@ -0,0 +1,89 @@
+/*===---- riscv_nds.h - Andes intrinsics -----------------------------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __RISCV_NDS_H
+#define __RISCV_NDS_H
+
+#include <stdint.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+
+#if defined(__riscv_xandesperf)
+
+#if __riscv_xlen == 32
+
+static __inline__ int32_t __DEFAULT_FN_ATTRS __riscv_nds_ffb_32(uint32_t __a,
+                                                                uint32_t __b) {
+  return __builtin_riscv_nds_ffb_32(__a, __b);
+}
+
+static __inline__ int32_t __DEFAULT_FN_ATTRS
+__riscv_nds_ffzmism_32(uint32_t __a, uint32_t __b) {
+  return __builtin_riscv_nds_ffzmism_32(__a, __b);
+}
+
+static __inline__ int32_t __DEFAULT_FN_ATTRS
+__riscv_nds_ffmism_32(uint32_t __a, uint32_t __b) {
+  return __builtin_riscv_nds_ffmism_32(__a, __b);
+}
+
+static __inline__ int32_t __DEFAULT_FN_ATTRS
+__riscv_nds_flmism_32(uint32_t __a, uint32_t __b) {
+  return __builtin_riscv_nds_flmism_32(__a, __b);
+}
+
+#endif
+
+#if __riscv_xlen == 64
+
+static __inline__ int64_t __DEFAULT_FN_ATTRS __riscv_nds_ffb_64(uint64_t __a,
+                                                                uint64_t __b) {
+  return __builtin_riscv_nds_ffb_64(__a, __b);
+}
+
+static __inline__ int64_t __DEFAULT_FN_ATTRS
+__riscv_nds_ffzmism_64(uint64_t __a, uint64_t __b) {
+  return __builtin_riscv_nds_ffzmism_64(__a, __b);
+}
+
+static __inline__ int64_t __DEFAULT_FN_ATTRS
+__riscv_nds_ffmism_64(uint64_t __a, uint64_t __b) {
+  return __builtin_riscv_nds_ffmism_64(__a, __b);
+}
+
+static __inline__ int64_t __DEFAULT_FN_ATTRS
+__riscv_nds_flmism_64(uint64_t __a, uint64_t __b) {
+  return __builtin_riscv_nds_flmism_64(__a, __b);
+}
+
+#endif
+
+#endif // defined(__riscv_xandesperf)
+
+#if defined(__riscv_xandesbfhcvt)
+
+static __inline__ float __DEFAULT_FN_ATTRS __riscv_nds_fcvt_s_bf16(__bf16 bf) {
+  return __builtin_riscv_nds_fcvt_s_bf16(bf);
+}
+
+static __inline__ __bf16 __DEFAULT_FN_ATTRS __riscv_nds_fcvt_bf16_s(float sf) {
+  return __builtin_riscv_nds_fcvt_bf16_s(sf);
+}
+
+#endif // defined(__riscv_xandesbfhcvt)
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif // define __RISCV_NDS_H
diff --git a/lib/include/sifive_vector.h b/lib/include/sifive_vector.h
index 4e67ad6fca..d315eb9609 100644
--- a/lib/include/sifive_vector.h
+++ b/lib/include/sifive_vector.h
@@ -47,9 +47,9 @@
   __riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint32_t)rs1, 32, 3, vl)
 
 #define __riscv_sf_vc_i_se_u8mf4(p27_26, p24_20, p11_7, simm5, vl)             \
-  __riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 8, 7, vl)
-#define __riscv_sf_vc_i_se_u8mf2(p27_26, p24_20, p11_7, simm5, vl)             \
   __riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 8, 6, vl)
+#define __riscv_sf_vc_i_se_u8mf2(p27_26, p24_20, p11_7, simm5, vl)             \
+  __riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 8, 7, vl)
 #define __riscv_sf_vc_i_se_u8m1(p27_26, p24_20, p11_7, simm5, vl)              \
   __riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 8, 0, vl)
 #define __riscv_sf_vc_i_se_u8m2(p27_26, p24_20, p11_7, simm5, vl)              \
@@ -115,4 +115,60 @@
 #endif
 #endif
 
+#define __riscv_sf_vsettnt_e8w1(atn) __riscv_sf_vsettnt(atn, 0, 1);
+#define __riscv_sf_vsettnt_e8w2(atn) __riscv_sf_vsettnt(atn, 0, 2);
+#define __riscv_sf_vsettnt_e8w4(atn) __riscv_sf_vsettnt(atn, 0, 3);
+#define __riscv_sf_vsettnt_e16w1(atn) __riscv_sf_vsettnt(atn, 1, 1);
+#define __riscv_sf_vsettnt_e16w2(atn) __riscv_sf_vsettnt(atn, 1, 2);
+#define __riscv_sf_vsettnt_e16w4(atn) __riscv_sf_vsettnt(atn, 1, 3);
+#define __riscv_sf_vsettnt_e32w1(atn) __riscv_sf_vsettnt(atn, 2, 1);
+#define __riscv_sf_vsettnt_e32w2(atn) __riscv_sf_vsettnt(atn, 2, 2);
+#define __riscv_sf_vsettm_e8w1(atm) __riscv_sf_vsettm(atm, 0, 1);
+#define __riscv_sf_vsettm_e8w2(atm) __riscv_sf_vsettm(atm, 0, 2);
+#define __riscv_sf_vsettm_e8w4(atm) __riscv_sf_vsettm(atm, 0, 3);
+#define __riscv_sf_vsettm_e16w1(atm) __riscv_sf_vsettm(atm, 1, 1);
+#define __riscv_sf_vsettm_e16w2(atm) __riscv_sf_vsettm(atm, 1, 2);
+#define __riscv_sf_vsettm_e16w4(atm) __riscv_sf_vsettm(atm, 1, 3);
+#define __riscv_sf_vsettm_e32w1(atm) __riscv_sf_vsettm(atm, 2, 1);
+#define __riscv_sf_vsettm_e32w2(atm) __riscv_sf_vsettm(atm, 2, 2);
+#define __riscv_sf_vsettn_e8w1(atn) __riscv_sf_vsettn(atn, 0, 1);
+#define __riscv_sf_vsettn_e8w2(atn) __riscv_sf_vsettn(atn, 0, 2);
+#define __riscv_sf_vsettn_e8w4(atn) __riscv_sf_vsettn(atn, 0, 3);
+#define __riscv_sf_vsettn_e16w1(atn) __riscv_sf_vsettn(atn, 1, 1);
+#define __riscv_sf_vsettn_e16w2(atn) __riscv_sf_vsettn(atn, 1, 2);
+#define __riscv_sf_vsettn_e16w4(atn) __riscv_sf_vsettn(atn, 1, 3);
+#define __riscv_sf_vsettn_e32w1(atn) __riscv_sf_vsettn(atn, 2, 1);
+#define __riscv_sf_vsettn_e32w2(atn) __riscv_sf_vsettn(atn, 2, 2);
+#define __riscv_sf_vsettk_e8w1(atk) __riscv_sf_vsettk(atk, 0, 1);
+#define __riscv_sf_vsettk_e8w2(atk) __riscv_sf_vsettk(atk, 0, 2);
+#define __riscv_sf_vsettk_e8w4(atk) __riscv_sf_vsettk(atk, 0, 3);
+#define __riscv_sf_vsettk_e16w1(atk) __riscv_sf_vsettk(atk, 1, 1);
+#define __riscv_sf_vsettk_e16w2(atk) __riscv_sf_vsettk(atk, 1, 2);
+#define __riscv_sf_vsettk_e16w4(atk) __riscv_sf_vsettk(atk, 1, 3);
+#define __riscv_sf_vsettk_e32w1(atk) __riscv_sf_vsettk(atk, 2, 1);
+#define __riscv_sf_vsettk_e32w2(atk) __riscv_sf_vsettk(atk, 2, 2);
+#define __riscv_sf_vtzero_t_e8w1(tile, atm, atn)                               \
+  __riscv_sf_vtzero_t(tile, atm, atn, 3, 1);
+#define __riscv_sf_vtzero_t_e8w2(tile, atm, atn)                               \
+  __riscv_sf_vtzero_t(tile, atm, atn, 3, 2);
+#define __riscv_sf_vtzero_t_e8w4(tile, atm, atn)                               \
+  __riscv_sf_vtzero_t(tile, atm, atn, 3, 4);
+#define __riscv_sf_vtzero_t_e16w1(tile, atm, atn)                              \
+  __riscv_sf_vtzero_t(tile, atm, atn, 4, 1);
+#define __riscv_sf_vtzero_t_e16w2(tile, atm, atn)                              \
+  __riscv_sf_vtzero_t(tile, atm, atn, 4, 2);
+#define __riscv_sf_vtzero_t_e16w4(tile, atm, atn)                              \
+  __riscv_sf_vtzero_t(tile, atm, atn, 4, 4);
+#define __riscv_sf_vtzero_t_e32w1(tile, atm, atn)                              \
+  __riscv_sf_vtzero_t(tile, atm, atn, 5, 1);
+#define __riscv_sf_vtzero_t_e32w2(tile, atm, atn)                              \
+  __riscv_sf_vtzero_t(tile, atm, atn, 5, 2);
+#if __riscv_v_elen >= 64
+#define __riscv_sf_vsettnt_e64w1(atn) __riscv_sf_vsettnt(atn, 3, 1);
+#define __riscv_sf_vsettm_e64w1(atm) __riscv_sf_vsettm(atm, 3, 1);
+#define __riscv_sf_vsettn_e64w1(atn) __riscv_sf_vsettn(atn, 3, 1);
+#define __riscv_sf_vsettk_e64w1(atk) __riscv_sf_vsettk(atk, 3, 1);
+#define __riscv_sf_vtzero_t_e64w1(tile, atm, atn)                              \
+  __riscv_sf_vtzero_t(tile, atm, atn, 6, 1);
+#endif
 #endif //_SIFIVE_VECTOR_H_
diff --git a/lib/include/sm4evexintrin.h b/lib/include/sm4evexintrin.h
index f6ae0037ba..9c15d1fca9 100644
--- a/lib/include/sm4evexintrin.h
+++ b/lib/include/sm4evexintrin.h
@@ -14,8 +14,8 @@
 #define __SM4EVEXINTRIN_H
 
 #define __DEFAULT_FN_ATTRS512                                                  \
-  __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("sm4,avx10.2-512"), __min_vector_width__(512)))
+  __attribute__((__always_inline__, __nodebug__, __target__("sm4,avx10.2"),    \
+                 __min_vector_width__(512)))
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_sm4key4_epi32(__m512i __A, __m512i __B) {
diff --git a/lib/include/smmintrin.h b/lib/include/smmintrin.h
index bc6fe4c801..511a135375 100644
--- a/lib/include/smmintrin.h
+++ b/lib/include/smmintrin.h
@@ -17,14 +17,14 @@
 #include <tmmintrin.h>
 
 /* Define the default attributes for the functions in this file. */
-#if defined(__EVEX512__) && !defined(__AVX10_1_512__)
-#define __DEFAULT_FN_ATTRS                                                     \
-  __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("sse4.1,no-evex512"), __min_vector_width__(128)))
-#else
 #define __DEFAULT_FN_ATTRS                                                     \
   __attribute__((__always_inline__, __nodebug__, __target__("sse4.1"),         \
                  __min_vector_width__(128)))
+
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
+#else
+#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
 #endif
 
 /* SSE4 Rounding macros. */
@@ -439,9 +439,8 @@
 ///    position in the result. When a mask bit is 1, the corresponding 64-bit
 ///    element in operand \a __V2 is copied to the same position in the result.
 /// \returns A 128-bit vector of [2 x double] containing the copied values.
-static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_blendv_pd(__m128d __V1,
-                                                           __m128d __V2,
-                                                           __m128d __M) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_blendv_pd(__m128d __V1, __m128d __V2, __m128d __M) {
   return (__m128d)__builtin_ia32_blendvpd((__v2df)__V1, (__v2df)__V2,
                                           (__v2df)__M);
 }
@@ -466,9 +465,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_blendv_pd(__m128d __V1,
 ///    position in the result. When a mask bit is 1, the corresponding 32-bit
 ///    element in operand \a __V2 is copied to the same position in the result.
 /// \returns A 128-bit vector of [4 x float] containing the copied values.
-static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_blendv_ps(__m128 __V1,
-                                                          __m128 __V2,
-                                                          __m128 __M) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_blendv_ps(__m128 __V1, __m128 __V2, __m128 __M) {
   return (__m128)__builtin_ia32_blendvps((__v4sf)__V1, (__v4sf)__V2,
                                          (__v4sf)__M);
 }
@@ -493,9 +491,8 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_blendv_ps(__m128 __V1,
 ///    position in the result. When a mask bit is 1, the corresponding 8-bit
 ///    element in operand \a __V2 is copied to the same position in the result.
 /// \returns A 128-bit vector of [16 x i8] containing the copied values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_blendv_epi8(__m128i __V1,
-                                                             __m128i __V2,
-                                                             __m128i __M) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_blendv_epi8(__m128i __V1, __m128i __V2, __m128i __M) {
   return (__m128i)__builtin_ia32_pblendvb128((__v16qi)__V1, (__v16qi)__V2,
                                              (__v16qi)__M);
 }
@@ -542,8 +539,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_blendv_epi8(__m128i __V1,
 /// \param __V2
 ///    A 128-bit integer vector.
 /// \returns A 128-bit integer vector containing the products of both operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi32(__m128i __V1,
-                                                             __m128i __V2) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_mullo_epi32(__m128i __V1, __m128i __V2) {
   return (__m128i)((__v4su)__V1 * (__v4su)__V2);
 }
 
@@ -561,8 +558,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi32(__m128i __V1,
 ///    A 128-bit vector of [4 x i32].
 /// \returns A 128-bit vector of [2 x i64] containing the products of both
 ///    operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epi32(__m128i __V1,
-                                                           __m128i __V2) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_mul_epi32(__m128i __V1, __m128i __V2) {
   return (__m128i)__builtin_ia32_pmuldq128((__v4si)__V1, (__v4si)__V2);
 }
 
@@ -669,8 +666,8 @@ _mm_stream_load_si128(const void *__V) {
 /// \param __V2
 ///    A 128-bit vector of [16 x i8]
 /// \returns A 128-bit vector of [16 x i8] containing the lesser values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi8(__m128i __V1,
-                                                          __m128i __V2) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_min_epi8(__m128i __V1, __m128i __V2) {
   return (__m128i)__builtin_elementwise_min((__v16qs)__V1, (__v16qs)__V2);
 }
 
@@ -687,8 +684,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi8(__m128i __V1,
 /// \param __V2
 ///    A 128-bit vector of [16 x i8].
 /// \returns A 128-bit vector of [16 x i8] containing the greater values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi8(__m128i __V1,
-                                                          __m128i __V2) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_max_epi8(__m128i __V1, __m128i __V2) {
   return (__m128i)__builtin_elementwise_max((__v16qs)__V1, (__v16qs)__V2);
 }
 
@@ -705,8 +702,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi8(__m128i __V1,
 /// \param __V2
 ///    A 128-bit vector of [8 x u16].
 /// \returns A 128-bit vector of [8 x u16] containing the lesser values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu16(__m128i __V1,
-                                                           __m128i __V2) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_min_epu16(__m128i __V1, __m128i __V2) {
   return (__m128i)__builtin_elementwise_min((__v8hu)__V1, (__v8hu)__V2);
 }
 
@@ -723,8 +720,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu16(__m128i __V1,
 /// \param __V2
 ///    A 128-bit vector of [8 x u16].
 /// \returns A 128-bit vector of [8 x u16] containing the greater values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu16(__m128i __V1,
-                                                           __m128i __V2) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_max_epu16(__m128i __V1, __m128i __V2) {
   return (__m128i)__builtin_elementwise_max((__v8hu)__V1, (__v8hu)__V2);
 }
 
@@ -741,8 +738,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu16(__m128i __V1,
 /// \param __V2
 ///    A 128-bit vector of [4 x i32].
 /// \returns A 128-bit vector of [4 x i32] containing the lesser values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi32(__m128i __V1,
-                                                           __m128i __V2) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_min_epi32(__m128i __V1, __m128i __V2) {
   return (__m128i)__builtin_elementwise_min((__v4si)__V1, (__v4si)__V2);
 }
 
@@ -759,8 +756,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi32(__m128i __V1,
 /// \param __V2
 ///    A 128-bit vector of [4 x i32].
 /// \returns A 128-bit vector of [4 x i32] containing the greater values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi32(__m128i __V1,
-                                                           __m128i __V2) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_max_epi32(__m128i __V1, __m128i __V2) {
   return (__m128i)__builtin_elementwise_max((__v4si)__V1, (__v4si)__V2);
 }
 
@@ -777,8 +774,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi32(__m128i __V1,
 /// \param __V2
 ///    A 128-bit vector of [4 x u32].
 /// \returns A 128-bit vector of [4 x u32] containing the lesser values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu32(__m128i __V1,
-                                                           __m128i __V2) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_min_epu32(__m128i __V1, __m128i __V2) {
   return (__m128i)__builtin_elementwise_min((__v4su)__V1, (__v4su)__V2);
 }
 
@@ -795,8 +792,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu32(__m128i __V1,
 /// \param __V2
 ///    A 128-bit vector of [4 x u32].
 /// \returns A 128-bit vector of [4 x u32] containing the greater values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu32(__m128i __V1,
-                                                           __m128i __V2) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_max_epu32(__m128i __V1, __m128i __V2) {
   return (__m128i)__builtin_elementwise_max((__v4su)__V1, (__v4su)__V2);
 }
 
@@ -1096,8 +1093,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu32(__m128i __V1,
 /// \param __V
 ///    A 128-bit integer vector selecting which bits to test in operand \a __M.
 /// \returns TRUE if the specified bits are all zeros; FALSE otherwise.
-static __inline__ int __DEFAULT_FN_ATTRS _mm_testz_si128(__m128i __M,
-                                                         __m128i __V) {
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_testz_si128(__m128i __M, __m128i __V) {
   return __builtin_ia32_ptestz128((__v2di)__M, (__v2di)__V);
 }
 
@@ -1113,8 +1110,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_testz_si128(__m128i __M,
 /// \param __V
 ///    A 128-bit integer vector selecting which bits to test in operand \a __M.
 /// \returns TRUE if the specified bits are all ones; FALSE otherwise.
-static __inline__ int __DEFAULT_FN_ATTRS _mm_testc_si128(__m128i __M,
-                                                         __m128i __V) {
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_testc_si128(__m128i __M, __m128i __V) {
   return __builtin_ia32_ptestc128((__v2di)__M, (__v2di)__V);
 }
 
@@ -1131,8 +1128,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_testc_si128(__m128i __M,
 ///    A 128-bit integer vector selecting which bits to test in operand \a __M.
 /// \returns TRUE if the specified bits are neither all zeros nor all ones;
 ///    FALSE otherwise.
-static __inline__ int __DEFAULT_FN_ATTRS _mm_testnzc_si128(__m128i __M,
-                                                           __m128i __V) {
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_testnzc_si128(__m128i __M, __m128i __V) {
   return __builtin_ia32_ptestnzc128((__v2di)__M, (__v2di)__V);
 }
 
@@ -1205,8 +1202,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_testnzc_si128(__m128i __M,
 /// \param __V2
 ///    A 128-bit integer vector.
 /// \returns A 128-bit integer vector containing the comparison results.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi64(__m128i __V1,
-                                                             __m128i __V2) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cmpeq_epi64(__m128i __V1, __m128i __V2) {
   return (__m128i)((__v2di)__V1 == (__v2di)__V2);
 }
 
@@ -1224,7 +1221,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi64(__m128i __V1,
 ///    A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are
 ///    sign-extended to 16-bit values.
 /// \returns A 128-bit vector of [8 x i16] containing the sign-extended values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi16(__m128i __V) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cvtepi8_epi16(__m128i __V) {
   /* This function always performs a signed extension, but __v16qi is a char
      which may be signed or unsigned, so use __v16qs. */
   return (__m128i) __builtin_convertvector(
@@ -1246,7 +1244,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi16(__m128i __V) {
 ///    A 128-bit vector of [16 x i8]. The lower four 8-bit elements are
 ///    sign-extended to 32-bit values.
 /// \returns A 128-bit vector of [4 x i32] containing the sign-extended values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi32(__m128i __V) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cvtepi8_epi32(__m128i __V) {
   /* This function always performs a signed extension, but __v16qi is a char
      which may be signed or unsigned, so use __v16qs. */
   return (__m128i) __builtin_convertvector(
@@ -1266,7 +1265,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi32(__m128i __V) {
 ///    A 128-bit vector of [16 x i8]. The lower two 8-bit elements are
 ///    sign-extended to 64-bit values.
 /// \returns A 128-bit vector of [2 x i64] containing the sign-extended values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi64(__m128i __V) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cvtepi8_epi64(__m128i __V) {
   /* This function always performs a signed extension, but __v16qi is a char
      which may be signed or unsigned, so use __v16qs. */
   return (__m128i) __builtin_convertvector(
@@ -1286,7 +1286,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi64(__m128i __V) {
 ///    A 128-bit vector of [8 x i16]. The lower four 16-bit elements are
 ///    sign-extended to 32-bit values.
 /// \returns A 128-bit vector of [4 x i32] containing the sign-extended values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi16_epi32(__m128i __V) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cvtepi16_epi32(__m128i __V) {
   return (__m128i) __builtin_convertvector(
       __builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1, 2, 3), __v4si);
 }
@@ -1304,7 +1305,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi16_epi32(__m128i __V) {
 ///    A 128-bit vector of [8 x i16]. The lower two 16-bit elements are
 ///     sign-extended to 64-bit values.
 /// \returns A 128-bit vector of [2 x i64] containing the sign-extended values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi16_epi64(__m128i __V) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cvtepi16_epi64(__m128i __V) {
   return (__m128i) __builtin_convertvector(
       __builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1), __v2di);
 }
@@ -1322,7 +1324,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi16_epi64(__m128i __V) {
 ///    A 128-bit vector of [4 x i32]. The lower two 32-bit elements are
 ///    sign-extended to 64-bit values.
 /// \returns A 128-bit vector of [2 x i64] containing the sign-extended values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi32_epi64(__m128i __V) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cvtepi32_epi64(__m128i __V) {
   return (__m128i) __builtin_convertvector(
       __builtin_shufflevector((__v4si)__V, (__v4si)__V, 0, 1), __v2di);
 }
@@ -1341,7 +1344,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi32_epi64(__m128i __V) {
 ///    A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are
 ///    zero-extended to 16-bit values.
 /// \returns A 128-bit vector of [8 x i16] containing the zero-extended values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi16(__m128i __V) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cvtepu8_epi16(__m128i __V) {
   return (__m128i) __builtin_convertvector(
       __builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3, 4, 5, 6,
                               7),
@@ -1361,7 +1365,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi16(__m128i __V) {
 ///    A 128-bit vector of [16 x i8]. The lower four 8-bit elements are
 ///    zero-extended to 32-bit values.
 /// \returns A 128-bit vector of [4 x i32] containing the zero-extended values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi32(__m128i __V) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cvtepu8_epi32(__m128i __V) {
   return (__m128i) __builtin_convertvector(
       __builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3), __v4si);
 }
@@ -1379,7 +1384,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi32(__m128i __V) {
 ///    A 128-bit vector of [16 x i8]. The lower two 8-bit elements are
 ///    zero-extended to 64-bit values.
 /// \returns A 128-bit vector of [2 x i64] containing the zero-extended values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi64(__m128i __V) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cvtepu8_epi64(__m128i __V) {
   return (__m128i) __builtin_convertvector(
       __builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1), __v2di);
 }
@@ -1397,7 +1403,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi64(__m128i __V) {
 ///    A 128-bit vector of [8 x i16]. The lower four 16-bit elements are
 ///    zero-extended to 32-bit values.
 /// \returns A 128-bit vector of [4 x i32] containing the zero-extended values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu16_epi32(__m128i __V) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cvtepu16_epi32(__m128i __V) {
   return (__m128i) __builtin_convertvector(
       __builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1, 2, 3), __v4si);
 }
@@ -1415,7 +1422,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu16_epi32(__m128i __V) {
 ///    A 128-bit vector of [8 x i16]. The lower two 16-bit elements are
 ///    zero-extended to 64-bit values.
 /// \returns A 128-bit vector of [2 x i64] containing the zero-extended values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu16_epi64(__m128i __V) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cvtepu16_epi64(__m128i __V) {
   return (__m128i) __builtin_convertvector(
       __builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1), __v2di);
 }
@@ -1433,7 +1441,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu16_epi64(__m128i __V) {
 ///    A 128-bit vector of [4 x i32]. The lower two 32-bit elements are
 ///    zero-extended to 64-bit values.
 /// \returns A 128-bit vector of [2 x i64] containing the zero-extended values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu32_epi64(__m128i __V) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cvtepu32_epi64(__m128i __V) {
   return (__m128i) __builtin_convertvector(
       __builtin_shufflevector((__v4su)__V, (__v4su)__V, 0, 1), __v2di);
 }
@@ -1457,8 +1466,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu32_epi64(__m128i __V) {
 ///    A 128-bit vector of [4 x i32]. The converted [4 x i16] values are
 ///    written to the higher 64 bits of the result.
 /// \returns A 128-bit vector of [8 x i16] containing the converted values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi32(__m128i __V1,
-                                                              __m128i __V2) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_packus_epi32(__m128i __V1, __m128i __V2) {
   return (__m128i)__builtin_ia32_packusdw128((__v4si)__V1, (__v4si)__V2);
 }
 
@@ -1515,7 +1524,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi32(__m128i __V1,
 /// \returns A 128-bit value where bits [15:0] contain the minimum value found
 ///    in parameter \a __V, bits [18:16] contain the index of the minimum value
 ///    and the remaining bits are set to 0.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_minpos_epu16(__m128i __V) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_minpos_epu16(__m128i __V) {
   return (__m128i)__builtin_ia32_phminposuw128((__v8hi)__V);
 }
 
@@ -1525,9 +1535,16 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_minpos_epu16(__m128i __V) {
    so we'll do the same.  */
 
 #undef __DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS_CONSTEXPR
 #define __DEFAULT_FN_ATTRS                                                     \
   __attribute__((__always_inline__, __nodebug__, __target__("sse4.2")))
 
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
+#else
+#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
+#endif
+
 /* These specify the type of data that we're comparing.  */
 #define _SIDD_UBYTE_OPS 0x00
 #define _SIDD_UWORD_OPS 0x01
@@ -2320,12 +2337,13 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_minpos_epu16(__m128i __V) {
 /// \param __V2
 ///    A 128-bit integer vector.
 /// \returns A 128-bit integer vector containing the comparison results.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi64(__m128i __V1,
-                                                             __m128i __V2) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cmpgt_epi64(__m128i __V1, __m128i __V2) {
   return (__m128i)((__v2di)__V1 > (__v2di)__V2);
 }
 
 #undef __DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS_CONSTEXPR
 
 #include <popcntintrin.h>
 
diff --git a/lib/include/spirvintrin.h b/lib/include/spirvintrin.h
new file mode 100644
index 0000000000..2a10a47ade
--- /dev/null
+++ b/lib/include/spirvintrin.h
@@ -0,0 +1,194 @@
+//===-- spirvintrin.h - SPIR-V intrinsic functions ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __SPIRVINTRIN_H
+#define __SPIRVINTRIN_H
+
+#ifndef __SPIRV__
+#error "This file is intended for SPIR-V targets or offloading to SPIR-V"
+#endif
+
+#ifndef __GPUINTRIN_H
+#error "Never use <spirvintrin.h> directly; include <gpuintrin.h> instead"
+#endif
+
+_Pragma("omp begin declare target device_type(nohost)");
+_Pragma("omp begin declare variant match(device = {arch(spirv64)})");
+
+// Type aliases to the address spaces used by the SPIR-V backend.
+#define __gpu_private __attribute__((address_space(0)))
+#define __gpu_constant __attribute__((address_space(2)))
+#define __gpu_local __attribute__((address_space(3)))
+#define __gpu_global __attribute__((address_space(1)))
+#define __gpu_generic __attribute__((address_space(4)))
+
+// Attribute to declare a function as a kernel.
+#define __gpu_kernel __attribute__((device_kernel, visibility("protected")))
+
+// Returns the number of workgroups in the 'x' dimension of the grid.
+_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_blocks_x(void) {
+  return __builtin_spirv_num_workgroups(0);
+}
+
+// Returns the number of workgroups in the 'y' dimension of the grid.
+_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_blocks_y(void) {
+  return __builtin_spirv_num_workgroups(1);
+}
+
+// Returns the number of workgroups in the 'z' dimension of the grid.
+_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_blocks_z(void) {
+  return __builtin_spirv_num_workgroups(2);
+}
+
+// Returns the 'x' dimension of the current workgroup's id.
+_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_block_id_x(void) {
+  return __builtin_spirv_workgroup_id(0);
+}
+
+// Returns the 'y' dimension of the current workgroup's id.
+_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_block_id_y(void) {
+  return __builtin_spirv_workgroup_id(1);
+}
+
+// Returns the 'z' dimension of the current workgroup's id.
+_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_block_id_z(void) {
+  return __builtin_spirv_workgroup_id(2);
+}
+
+// Returns the number of workitems in the 'x' dimension.
+_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_threads_x(void) {
+  return __builtin_spirv_workgroup_size(0);
+}
+
+// Returns the number of workitems in the 'y' dimension.
+_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_threads_y(void) {
+  return __builtin_spirv_workgroup_size(1);
+}
+
+// Returns the number of workitems in the 'z' dimension.
+_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_threads_z(void) {
+  return __builtin_spirv_workgroup_size(2);
+}
+
+// Returns the 'x' dimension id of the workitem in the current workgroup.
+_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_thread_id_x(void) {
+  return __builtin_spirv_local_invocation_id(0);
+}
+
+// Returns the 'y' dimension id of the workitem in the current workgroup.
+_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_thread_id_y(void) {
+  return __builtin_spirv_local_invocation_id(1);
+}
+
+// Returns the 'z' dimension id of the workitem in the current workgroup.
+_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_thread_id_z(void) {
+  return __builtin_spirv_local_invocation_id(2);
+}
+
+// Returns the size of an wavefront, either 32 or 64 depending on hardware
+// and compilation options.
+_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_lanes(void) {
+  return __builtin_spirv_subgroup_size();
+}
+
+// Returns the id of the thread inside of an wavefront executing together.
+_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_lane_id(void) {
+  return __builtin_spirv_subgroup_local_invocation_id();
+}
+
+// Returns the bit-mask of active threads in the current wavefront. This
+// implementation is incorrect if the target uses more than 64 lanes.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t __gpu_lane_mask(void) {
+  uint32_t [[clang::ext_vector_type(4)]] __mask =
+      __builtin_spirv_subgroup_ballot(1);
+  return __builtin_bit_cast(uint64_t,
+                            __builtin_shufflevector(__mask, __mask, 0, 1));
+}
+
+// Copies the value from the first active thread in the wavefront to the rest.
+_DEFAULT_FN_ATTRS static __inline__ uint32_t
+__gpu_read_first_lane_u32(uint64_t __lane_mask, uint32_t __x) {
+  return __builtin_spirv_subgroup_shuffle(__x,
+                                          __builtin_ctzg(__gpu_lane_mask()));
+}
+
+// Returns a bitmask of threads in the current lane for which \p x is true. This
+// implementation is incorrect if the target uses more than 64 lanes.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t __gpu_ballot(uint64_t __lane_mask,
+                                                          bool __x) {
+  // The lane_mask & gives the nvptx semantics when lane_mask is a subset of
+  // the active threads.
+  uint32_t [[clang::ext_vector_type(4)]] __mask =
+      __builtin_spirv_subgroup_ballot(__x);
+  return __lane_mask & __builtin_bit_cast(uint64_t, __builtin_shufflevector(
+                                                        __mask, __mask, 0, 1));
+}
+
+// Waits for all the threads in the block to converge and issues a fence.
+_DEFAULT_FN_ATTRS static __inline__ void __gpu_sync_threads(void) {
+  __builtin_spirv_group_barrier();
+}
+
+// Wait for all threads in the wavefront to converge, this is a noop on SPIR-V.
+_DEFAULT_FN_ATTRS static __inline__ void __gpu_sync_lane(uint64_t __lane_mask) {
+}
+
+// Shuffles the the lanes inside the wavefront according to the given index.
+_DEFAULT_FN_ATTRS static __inline__ uint32_t
+__gpu_shuffle_idx_u32(uint64_t __lane_mask, uint32_t __idx, uint32_t __x,
+                      uint32_t __width) {
+  uint32_t __lane = __idx + (__gpu_lane_id() & ~(__width - 1));
+  return __builtin_spirv_subgroup_shuffle(__x, __lane);
+}
+
+// Returns a bitmask marking all lanes that have the same value of __x.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_match_any_u32(uint64_t __lane_mask, uint32_t __x) {
+  return __gpu_match_any_u32_impl(__lane_mask, __x);
+}
+
+// Returns a bitmask marking all lanes that have the same value of __x.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_match_any_u64(uint64_t __lane_mask, uint64_t __x) {
+  return __gpu_match_any_u64_impl(__lane_mask, __x);
+}
+
+// Returns the current lane mask if every lane contains __x.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_match_all_u32(uint64_t __lane_mask, uint32_t __x) {
+  return __gpu_match_all_u32_impl(__lane_mask, __x);
+}
+
+// Returns the current lane mask if every lane contains __x.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_match_all_u64(uint64_t __lane_mask, uint64_t __x) {
+  return __gpu_match_all_u64_impl(__lane_mask, __x);
+}
+
+// SPIR-V does not expose this, always return false.
+_DEFAULT_FN_ATTRS static __inline__ bool __gpu_is_ptr_local(void *ptr) {
+  return 0;
+}
+
+// SPIR-V does not expose this, always return false.
+_DEFAULT_FN_ATTRS static __inline__ bool __gpu_is_ptr_private(void *ptr) {
+  return 0;
+}
+
+// SPIR-V only supports 'OpTerminateInvocation' in fragment shaders.
+_DEFAULT_FN_ATTRS [[noreturn]] static __inline__ void __gpu_exit(void) {
+  __builtin_trap();
+}
+
+// This is a no-op as SPIR-V does not support it.
+_DEFAULT_FN_ATTRS static __inline__ void __gpu_thread_suspend(void) {}
+
+_Pragma("omp end declare variant");
+_Pragma("omp end declare target");
+
+#endif // __SPIRVINTRIN_H
diff --git a/lib/include/stddefer.h b/lib/include/stddefer.h
new file mode 100644
index 0000000000..162876ddfa
--- /dev/null
+++ b/lib/include/stddefer.h
@@ -0,0 +1,19 @@
+/*===---- stddefer.h - Standard header for 'defer' -------------------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __CLANG_STDDEFER_H
+#define __CLANG_STDDEFER_H
+
+/* Provide 'defer' if '_Defer' is supported. */
+#ifdef __STDC_DEFER_TS25755__
+#define __STDC_VERSION_STDDEFER_H__ 202602L
+#define defer _Defer
+#endif
+
+#endif /* __CLANG_STDDEFER_H */
diff --git a/lib/include/tmmintrin.h b/lib/include/tmmintrin.h
index 371cc82e3d..cb4b36ea73 100644
--- a/lib/include/tmmintrin.h
+++ b/lib/include/tmmintrin.h
@@ -17,21 +17,21 @@
 #include <pmmintrin.h>
 
 /* Define the default attributes for the functions in this file. */
-#if defined(__EVEX512__) && !defined(__AVX10_1_512__)
-#define __DEFAULT_FN_ATTRS                                                     \
-  __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("ssse3,no-evex512"), __min_vector_width__(128)))
-#else
 #define __DEFAULT_FN_ATTRS                                                     \
   __attribute__((__always_inline__, __nodebug__, __target__("ssse3"),          \
                  __min_vector_width__(128)))
-#endif
 
 #define __trunc64(x)                                                           \
   (__m64) __builtin_shufflevector((__v2di)(x), __extension__(__v2di){}, 0)
-#define __anyext128(x)                                                         \
+#define __zext128(x)                                                           \
   (__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0,   \
-                                    1, -1, -1)
+                                    1, 2, 3)
+
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
+#else
+#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
+#endif
 
 /// Computes the absolute value of each of the packed 8-bit signed
 ///    integers in the source operand and stores the 8-bit unsigned integer
@@ -45,9 +45,7 @@
 ///    A 64-bit vector of [8 x i8].
 /// \returns A 64-bit integer vector containing the absolute values of the
 ///    elements in the operand.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_mm_abs_pi8(__m64 __a)
-{
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_abs_pi8(__m64 __a) {
   return (__m64)__builtin_elementwise_abs((__v8qs)__a);
 }
 
@@ -63,10 +61,9 @@ _mm_abs_pi8(__m64 __a)
 ///    A 128-bit vector of [16 x i8].
 /// \returns A 128-bit integer vector containing the absolute values of the
 ///    elements in the operand.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_abs_epi8(__m128i __a)
-{
-    return (__m128i)__builtin_elementwise_abs((__v16qs)__a);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_abs_epi8(__m128i __a) {
+  return (__m128i)__builtin_elementwise_abs((__v16qs)__a);
 }
 
 /// Computes the absolute value of each of the packed 16-bit signed
@@ -81,10 +78,8 @@ _mm_abs_epi8(__m128i __a)
 ///    A 64-bit vector of [4 x i16].
 /// \returns A 64-bit integer vector containing the absolute values of the
 ///    elements in the operand.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_mm_abs_pi16(__m64 __a)
-{
-    return (__m64)__builtin_elementwise_abs((__v4hi)__a);
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_abs_pi16(__m64 __a) {
+  return (__m64)__builtin_elementwise_abs((__v4hi)__a);
 }
 
 /// Computes the absolute value of each of the packed 16-bit signed
@@ -99,10 +94,9 @@ _mm_abs_pi16(__m64 __a)
 ///    A 128-bit vector of [8 x i16].
 /// \returns A 128-bit integer vector containing the absolute values of the
 ///    elements in the operand.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_abs_epi16(__m128i __a)
-{
-    return (__m128i)__builtin_elementwise_abs((__v8hi)__a);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_abs_epi16(__m128i __a) {
+  return (__m128i)__builtin_elementwise_abs((__v8hi)__a);
 }
 
 /// Computes the absolute value of each of the packed 32-bit signed
@@ -117,10 +111,8 @@ _mm_abs_epi16(__m128i __a)
 ///    A 64-bit vector of [2 x i32].
 /// \returns A 64-bit integer vector containing the absolute values of the
 ///    elements in the operand.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_mm_abs_pi32(__m64 __a)
-{
-    return (__m64)__builtin_elementwise_abs((__v2si)__a);
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_abs_pi32(__m64 __a) {
+  return (__m64)__builtin_elementwise_abs((__v2si)__a);
 }
 
 /// Computes the absolute value of each of the packed 32-bit signed
@@ -135,10 +127,9 @@ _mm_abs_pi32(__m64 __a)
 ///    A 128-bit vector of [4 x i32].
 /// \returns A 128-bit integer vector containing the absolute values of the
 ///    elements in the operand.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_abs_epi32(__m128i __a)
-{
-    return (__m128i)__builtin_elementwise_abs((__v4si)__a);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_abs_epi32(__m128i __a) {
+  return (__m128i)__builtin_elementwise_abs((__v4si)__a);
 }
 
 /// Concatenates the two 128-bit integer vector operands, and
@@ -184,11 +175,12 @@ _mm_abs_epi32(__m128i __a)
 ///    An immediate operand specifying how many bytes to right-shift the result.
 /// \returns A 64-bit integer vector containing the concatenated right-shifted
 ///    value.
-#define _mm_alignr_pi8(a, b, n) \
-  ((__m64)__builtin_shufflevector(                                       \
-       __builtin_ia32_psrldqi128_byteshift(                              \
-           __builtin_shufflevector((__v1di)(a), (__v1di)(b), 1, 0),      \
-           (n)), __extension__ (__v2di){}, 0))
+#define _mm_alignr_pi8(a, b, n)                                                \
+  ((__m64)__builtin_shufflevector(                                             \
+      (__v2di)__builtin_ia32_psrldqi128_byteshift(                             \
+          (__v16qi)__builtin_shufflevector((__v1di)(a), (__v1di)(b), 1, 0),    \
+          (n)),                                                                \
+      __extension__(__v2di){}, 0))
 
 /// Horizontally adds the adjacent pairs of values contained in 2 packed
 ///    128-bit vectors of [8 x i16].
@@ -207,10 +199,9 @@ _mm_abs_epi32(__m128i __a)
 ///    destination.
 /// \returns A 128-bit vector of [8 x i16] containing the horizontal sums of
 ///    both operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_hadd_epi16(__m128i __a, __m128i __b)
-{
-    return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_hadd_epi16(__m128i __a, __m128i __b) {
+  return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);
 }
 
 /// Horizontally adds the adjacent pairs of values contained in 2 packed
@@ -230,10 +221,9 @@ _mm_hadd_epi16(__m128i __a, __m128i __b)
 ///    destination.
 /// \returns A 128-bit vector of [4 x i32] containing the horizontal sums of
 ///    both operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_hadd_epi32(__m128i __a, __m128i __b)
-{
-    return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_hadd_epi32(__m128i __a, __m128i __b) {
+  return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);
 }
 
 /// Horizontally adds the adjacent pairs of values contained in 2 packed
@@ -253,11 +243,10 @@ _mm_hadd_epi32(__m128i __a, __m128i __b)
 ///    destination.
 /// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both
 ///    operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_mm_hadd_pi16(__m64 __a, __m64 __b)
-{
-    return __trunc64(__builtin_ia32_phaddw128(
-        (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadd_pi16(__m64 __a,
+                                                                   __m64 __b) {
+  return __trunc64(__builtin_ia32_phaddw128(
+      (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
 }
 
 /// Horizontally adds the adjacent pairs of values contained in 2 packed
@@ -277,11 +266,10 @@ _mm_hadd_pi16(__m64 __a, __m64 __b)
 ///    destination.
 /// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both
 ///    operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_mm_hadd_pi32(__m64 __a, __m64 __b)
-{
-    return __trunc64(__builtin_ia32_phaddd128(
-        (__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){}));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadd_pi32(__m64 __a,
+                                                                   __m64 __b) {
+  return __trunc64(__builtin_ia32_phaddd128(
+      (__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){}));
 }
 
 /// Horizontally adds, with saturation, the adjacent pairs of values contained
@@ -304,10 +292,9 @@ _mm_hadd_pi32(__m64 __a, __m64 __b)
 ///    destination.
 /// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
 ///    sums of both operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_hadds_epi16(__m128i __a, __m128i __b)
-{
-    return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_hadds_epi16(__m128i __a, __m128i __b) {
+  return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
 }
 
 /// Horizontally adds, with saturation, the adjacent pairs of values contained
@@ -330,11 +317,10 @@ _mm_hadds_epi16(__m128i __a, __m128i __b)
 ///    destination.
 /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
 ///    sums of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_mm_hadds_pi16(__m64 __a, __m64 __b)
-{
-    return __trunc64(__builtin_ia32_phaddsw128(
-        (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadds_pi16(__m64 __a,
+                                                                    __m64 __b) {
+  return __trunc64(__builtin_ia32_phaddsw128(
+      (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
 }
 
 /// Horizontally subtracts the adjacent pairs of values contained in 2
@@ -354,10 +340,9 @@ _mm_hadds_pi16(__m64 __a, __m64 __b)
 ///    the destination.
 /// \returns A 128-bit vector of [8 x i16] containing the horizontal differences
 ///    of both operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_hsub_epi16(__m128i __a, __m128i __b)
-{
-    return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_hsub_epi16(__m128i __a, __m128i __b) {
+  return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);
 }
 
 /// Horizontally subtracts the adjacent pairs of values contained in 2
@@ -377,10 +362,9 @@ _mm_hsub_epi16(__m128i __a, __m128i __b)
 ///    the destination.
 /// \returns A 128-bit vector of [4 x i32] containing the horizontal differences
 ///    of both operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_hsub_epi32(__m128i __a, __m128i __b)
-{
-    return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_hsub_epi32(__m128i __a, __m128i __b) {
+  return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);
 }
 
 /// Horizontally subtracts the adjacent pairs of values contained in 2
@@ -400,11 +384,10 @@ _mm_hsub_epi32(__m128i __a, __m128i __b)
 ///    the destination.
 /// \returns A 64-bit vector of [4 x i16] containing the horizontal differences
 ///    of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_mm_hsub_pi16(__m64 __a, __m64 __b)
-{
-    return __trunc64(__builtin_ia32_phsubw128(
-        (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsub_pi16(__m64 __a,
+                                                                   __m64 __b) {
+  return __trunc64(__builtin_ia32_phsubw128(
+      (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
 }
 
 /// Horizontally subtracts the adjacent pairs of values contained in 2
@@ -424,11 +407,10 @@ _mm_hsub_pi16(__m64 __a, __m64 __b)
 ///    the destination.
 /// \returns A 64-bit vector of [2 x i32] containing the horizontal differences
 ///    of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_mm_hsub_pi32(__m64 __a, __m64 __b)
-{
-    return __trunc64(__builtin_ia32_phsubd128(
-        (__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){}));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsub_pi32(__m64 __a,
+                                                                   __m64 __b) {
+  return __trunc64(__builtin_ia32_phsubd128(
+      (__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){}));
 }
 
 /// Horizontally subtracts, with saturation, the adjacent pairs of values
@@ -451,10 +433,9 @@ _mm_hsub_pi32(__m64 __a, __m64 __b)
 ///    the destination.
 /// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
 ///    differences of both operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_hsubs_epi16(__m128i __a, __m128i __b)
-{
-    return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_hsubs_epi16(__m128i __a, __m128i __b) {
+  return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
 }
 
 /// Horizontally subtracts, with saturation, the adjacent pairs of values
@@ -477,11 +458,10 @@ _mm_hsubs_epi16(__m128i __a, __m128i __b)
 ///    the destination.
 /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
 ///    differences of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_mm_hsubs_pi16(__m64 __a, __m64 __b)
-{
-    return __trunc64(__builtin_ia32_phsubsw128(
-        (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsubs_pi16(__m64 __a,
+                                                                    __m64 __b) {
+  return __trunc64(__builtin_ia32_phsubsw128(
+      (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
 }
 
 /// Multiplies corresponding pairs of packed 8-bit unsigned integer
@@ -512,10 +492,9 @@ _mm_hsubs_pi16(__m64 __a, __m64 __b)
 ///    \a R5 := (\a __a10 * \a __b10) + (\a __a11 * \a __b11) \n
 ///    \a R6 := (\a __a12 * \a __b12) + (\a __a13 * \a __b13) \n
 ///    \a R7 := (\a __a14 * \a __b14) + (\a __a15 * \a __b15)
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_maddubs_epi16(__m128i __a, __m128i __b)
-{
-    return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_maddubs_epi16(__m128i __a, __m128i __b) {
+  return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);
 }
 
 /// Multiplies corresponding pairs of packed 8-bit unsigned integer
@@ -542,11 +521,10 @@ _mm_maddubs_epi16(__m128i __a, __m128i __b)
 ///    \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
 ///    \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
 ///    \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7)
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_mm_maddubs_pi16(__m64 __a, __m64 __b)
-{
-    return __trunc64(__builtin_ia32_pmaddubsw128((__v16qi)__anyext128(__a),
-                                                 (__v16qi)__anyext128(__b)));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_maddubs_pi16(__m64 __a, __m64 __b) {
+  return __trunc64(__builtin_ia32_pmaddubsw128((__v16qi)__zext128(__a),
+                                               (__v16qi)__zext128(__b)));
 }
 
 /// Multiplies packed 16-bit signed integer values, truncates the 32-bit
@@ -563,10 +541,9 @@ _mm_maddubs_pi16(__m64 __a, __m64 __b)
 ///    A 128-bit vector of [8 x i16] containing one of the source operands.
 /// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled
 ///    products of both operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_mulhrs_epi16(__m128i __a, __m128i __b)
-{
-    return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_mulhrs_epi16(__m128i __a, __m128i __b) {
+  return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
 }
 
 /// Multiplies packed 16-bit signed integer values, truncates the 32-bit
@@ -583,11 +560,10 @@ _mm_mulhrs_epi16(__m128i __a, __m128i __b)
 ///    A 64-bit vector of [4 x i16] containing one of the source operands.
 /// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled
 ///    products of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_mm_mulhrs_pi16(__m64 __a, __m64 __b)
-{
-    return __trunc64(__builtin_ia32_pmulhrsw128((__v8hi)__anyext128(__a),
-                                                (__v8hi)__anyext128(__b)));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_mulhrs_pi16(__m64 __a, __m64 __b) {
+  return __trunc64(__builtin_ia32_pmulhrsw128((__v8hi)__zext128(__a),
+                                              (__v8hi)__zext128(__b)));
 }
 
 /// Copies the 8-bit integers from a 128-bit integer vector to the
@@ -610,10 +586,9 @@ _mm_mulhrs_pi16(__m64 __a, __m64 __b)
 ///    Bits [6:4] Reserved.  \n
 ///    Bits [3:0] select the source byte to be copied.
 /// \returns A 128-bit integer vector containing the copied or cleared values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_shuffle_epi8(__m128i __a, __m128i __b)
-{
-    return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_shuffle_epi8(__m128i __a, __m128i __b) {
+  return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b);
 }
 
 /// Copies the 8-bit integers from a 64-bit integer vector to the
@@ -635,13 +610,12 @@ _mm_shuffle_epi8(__m128i __a, __m128i __b)
 ///    destination. \n
 ///    Bits [2:0] select the source byte to be copied.
 /// \returns A 64-bit integer vector containing the copied or cleared values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_mm_shuffle_pi8(__m64 __a, __m64 __b)
-{
-    return __trunc64(__builtin_ia32_pshufb128(
-        (__v16qi)__builtin_shufflevector(
-            (__v2si)(__a), __extension__ (__v2si){}, 0, 1, 0, 1),
-        (__v16qi)__anyext128(__b)));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_shuffle_pi8(__m64 __a, __m64 __b) {
+  return __trunc64(__builtin_ia32_pshufb128(
+      (__v16qi)__builtin_shufflevector((__v2si)(__a), __extension__(__v2si){},
+                                       0, 1, 0, 1),
+      (__v16qi)__zext128(__b)));
 }
 
 /// For each 8-bit integer in the first source operand, perform one of
@@ -664,10 +638,9 @@ _mm_shuffle_pi8(__m64 __a, __m64 __b)
 ///    A 128-bit integer vector containing control bytes corresponding to
 ///    positions in the destination.
 /// \returns A 128-bit integer vector containing the resultant values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_sign_epi8(__m128i __a, __m128i __b)
-{
-    return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_sign_epi8(__m128i __a, __m128i __b) {
+  return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);
 }
 
 /// For each 16-bit integer in the first source operand, perform one of
@@ -690,10 +663,9 @@ _mm_sign_epi8(__m128i __a, __m128i __b)
 ///    A 128-bit integer vector containing control words corresponding to
 ///    positions in the destination.
 /// \returns A 128-bit integer vector containing the resultant values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_sign_epi16(__m128i __a, __m128i __b)
-{
-    return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_sign_epi16(__m128i __a, __m128i __b) {
+  return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);
 }
 
 /// For each 32-bit integer in the first source operand, perform one of
@@ -716,10 +688,9 @@ _mm_sign_epi16(__m128i __a, __m128i __b)
 ///    A 128-bit integer vector containing control doublewords corresponding to
 ///    positions in the destination.
 /// \returns A 128-bit integer vector containing the resultant values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_sign_epi32(__m128i __a, __m128i __b)
-{
-    return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_sign_epi32(__m128i __a, __m128i __b) {
+  return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);
 }
 
 /// For each 8-bit integer in the first source operand, perform one of
@@ -742,11 +713,10 @@ _mm_sign_epi32(__m128i __a, __m128i __b)
 ///    A 64-bit integer vector containing control bytes corresponding to
 ///    positions in the destination.
 /// \returns A 64-bit integer vector containing the resultant values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_mm_sign_pi8(__m64 __a, __m64 __b)
-{
-    return __trunc64(__builtin_ia32_psignb128((__v16qi)__anyext128(__a),
-                                              (__v16qi)__anyext128(__b)));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sign_pi8(__m64 __a,
+                                                                  __m64 __b) {
+  return __trunc64(__builtin_ia32_psignb128((__v16qi)__zext128(__a),
+                                            (__v16qi)__zext128(__b)));
 }
 
 /// For each 16-bit integer in the first source operand, perform one of
@@ -769,11 +739,10 @@ _mm_sign_pi8(__m64 __a, __m64 __b)
 ///    A 64-bit integer vector containing control words corresponding to
 ///    positions in the destination.
 /// \returns A 64-bit integer vector containing the resultant values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_mm_sign_pi16(__m64 __a, __m64 __b)
-{
-    return __trunc64(__builtin_ia32_psignw128((__v8hi)__anyext128(__a),
-                                              (__v8hi)__anyext128(__b)));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sign_pi16(__m64 __a,
+                                                                   __m64 __b) {
+  return __trunc64(
+      __builtin_ia32_psignw128((__v8hi)__zext128(__a), (__v8hi)__zext128(__b)));
 }
 
 /// For each 32-bit integer in the first source operand, perform one of
@@ -796,15 +765,15 @@ _mm_sign_pi16(__m64 __a, __m64 __b)
 ///    A 64-bit integer vector containing two control doublewords corresponding
 ///    to positions in the destination.
 /// \returns A 64-bit integer vector containing the resultant values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_mm_sign_pi32(__m64 __a, __m64 __b)
-{
-    return __trunc64(__builtin_ia32_psignd128((__v4si)__anyext128(__a),
-                                              (__v4si)__anyext128(__b)));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sign_pi32(__m64 __a,
+                                                                   __m64 __b) {
+  return __trunc64(
+      __builtin_ia32_psignd128((__v4si)__zext128(__a), (__v4si)__zext128(__b)));
 }
 
-#undef __anyext128
+#undef __zext128
 #undef __trunc64
 #undef __DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS_CONSTEXPR
 
 #endif /* __TMMINTRIN_H */
diff --git a/lib/include/vaesintrin.h b/lib/include/vaesintrin.h
index d7c162f5c0..5194ca6c50 100644
--- a/lib/include/vaesintrin.h
+++ b/lib/include/vaesintrin.h
@@ -19,8 +19,7 @@
 
 /* Default attributes for ZMM forms. */
 #define __DEFAULT_FN_ATTRS_F                                                   \
-  __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("avx512f,evex512,vaes"),                           \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx512f,vaes"),   \
                  __min_vector_width__(512)))
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
diff --git a/lib/include/xmmintrin.h b/lib/include/xmmintrin.h
index 6a64369773..ab0f0c1690 100644
--- a/lib/include/xmmintrin.h
+++ b/lib/include/xmmintrin.h
@@ -16,7 +16,6 @@
 
 #include <mmintrin.h>
 
-typedef int __v4si __attribute__((__vector_size__(16)));
 typedef float __v4sf __attribute__((__vector_size__(16)));
 typedef float __m128 __attribute__((__vector_size__(16), __aligned__(16)));
 
@@ -24,6 +23,8 @@ typedef float __m128_u __attribute__((__vector_size__(16), __aligned__(1)));
 
 /* Unsigned types */
 typedef unsigned int __v4su __attribute__((__vector_size__(16)));
+typedef unsigned short __v8hu __attribute__((__vector_size__(16)));
+typedef unsigned char __v16qu __attribute__((__vector_size__(16)));
 
 /* This header should only be included in a hosted environment as it depends on
  * a standard library to provide allocation routines. */
@@ -32,21 +33,12 @@ typedef unsigned int __v4su __attribute__((__vector_size__(16)));
 #endif
 
 /* Define the default attributes for the functions in this file. */
-#if defined(__EVEX512__) && !defined(__AVX10_1_512__)
-#define __DEFAULT_FN_ATTRS                                                     \
-  __attribute__((__always_inline__, __nodebug__, __target__("sse,no-evex512"), \
-                 __min_vector_width__(128)))
-#define __DEFAULT_FN_ATTRS_SSE2                                                \
-  __attribute__((__always_inline__, __nodebug__,                               \
-                 __target__("sse2,no-evex512"), __min_vector_width__(128)))
-#else
 #define __DEFAULT_FN_ATTRS                                                     \
   __attribute__((__always_inline__, __nodebug__, __target__("sse"),            \
                  __min_vector_width__(128)))
 #define __DEFAULT_FN_ATTRS_SSE2                                                \
   __attribute__((__always_inline__, __nodebug__, __target__("sse2"),           \
                  __min_vector_width__(128)))
-#endif
 
 #if defined(__cplusplus) && (__cplusplus >= 201103L)
 #define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
@@ -239,10 +231,9 @@ _mm_div_ps(__m128 __a, __m128 __b) {
 ///    used in the calculation.
 /// \returns A 128-bit vector of [4 x float] containing the square root of the
 ///    value in the low-order bits of the operand.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
-_mm_sqrt_ss(__m128 __a)
-{
-  return (__m128)__builtin_ia32_sqrtss((__v4sf)__a);
+static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sqrt_ss(__m128 __a) {
+  __a[0] = __builtin_elementwise_sqrt(__a[0]);
+  return __a;
 }
 
 /// Calculates the square roots of the values stored in a 128-bit vector
@@ -256,10 +247,8 @@ _mm_sqrt_ss(__m128 __a)
 ///    A 128-bit vector of [4 x float].
 /// \returns A 128-bit vector of [4 x float] containing the square roots of the
 ///    values in the operand.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
-_mm_sqrt_ps(__m128 __a)
-{
-  return __builtin_ia32_sqrtps((__v4sf)__a);
+static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sqrt_ps(__m128 __a) {
+  return __builtin_elementwise_sqrt(__a);
 }
 
 /// Calculates the approximate reciprocal of the value stored in the
@@ -352,9 +341,7 @@ _mm_rsqrt_ps(__m128 __a)
 /// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the
 ///    minimum value between both operands. The upper 96 bits are copied from
 ///    the upper 96 bits of the first source operand.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
-_mm_min_ss(__m128 __a, __m128 __b)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_min_ss(__m128 __a, __m128 __b) {
   return __builtin_ia32_minss((__v4sf)__a, (__v4sf)__b);
 }
 
@@ -373,9 +360,8 @@ _mm_min_ss(__m128 __a, __m128 __b)
 ///    A 128-bit vector of [4 x float] containing one of the operands.
 /// \returns A 128-bit vector of [4 x float] containing the minimum values
 ///    between both operands.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
-_mm_min_ps(__m128 __a, __m128 __b)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_min_ps(__m128 __a,
+                                                                 __m128 __b) {
   return __builtin_ia32_minps((__v4sf)__a, (__v4sf)__b);
 }
 
@@ -398,9 +384,7 @@ _mm_min_ps(__m128 __a, __m128 __b)
 /// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the
 ///    maximum value between both operands. The upper 96 bits are copied from
 ///    the upper 96 bits of the first source operand.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
-_mm_max_ss(__m128 __a, __m128 __b)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_max_ss(__m128 __a, __m128 __b) {
   return __builtin_ia32_maxss((__v4sf)__a, (__v4sf)__b);
 }
 
@@ -419,9 +403,8 @@ _mm_max_ss(__m128 __a, __m128 __b)
 ///    A 128-bit vector of [4 x float] containing one of the operands.
 /// \returns A 128-bit vector of [4 x float] containing the maximum values
 ///    between both operands.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
-_mm_max_ps(__m128 __a, __m128 __b)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_max_ps(__m128 __a,
+                                                                 __m128 __b) {
   return __builtin_ia32_maxps((__v4sf)__a, (__v4sf)__b);
 }
 
@@ -1688,7 +1671,7 @@ _mm_cvtsi64_ss(__m128 __a, long long __b) {
 /// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the
 ///    converted value of the second operand. The upper 64 bits are copied from
 ///    the upper 64 bits of the first operand.
-static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
 _mm_cvtpi32_ps(__m128 __a, __m64 __b)
 {
   return (__m128)__builtin_shufflevector(
@@ -1714,7 +1697,7 @@ _mm_cvtpi32_ps(__m128 __a, __m64 __b)
 /// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the
 ///    converted value from the second operand. The upper 64 bits are copied
 ///    from the upper 64 bits of the first operand.
-static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
 _mm_cvt_pi2ps(__m128 __a, __m64 __b)
 {
   return _mm_cvtpi32_ps(__a, __b);
@@ -2353,9 +2336,8 @@ void _mm_sfence(void);
 /// \param __b
 ///    A 64-bit integer vector containing one of the source operands.
 /// \returns A 64-bit integer vector containing the comparison results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_max_pi16(__m64 __a, __m64 __b)
-{
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_max_pi16(__m64 __a, __m64 __b) {
   return (__m64)__builtin_elementwise_max((__v4hi)__a, (__v4hi)__b);
 }
 
@@ -2372,9 +2354,8 @@ _mm_max_pi16(__m64 __a, __m64 __b)
 /// \param __b
 ///    A 64-bit integer vector containing one of the source operands.
 /// \returns A 64-bit integer vector containing the comparison results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_max_pu8(__m64 __a, __m64 __b)
-{
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_max_pu8(__m64 __a, __m64 __b) {
   return (__m64)__builtin_elementwise_max((__v8qu)__a, (__v8qu)__b);
 }
 
@@ -2391,9 +2372,8 @@ _mm_max_pu8(__m64 __a, __m64 __b)
 /// \param __b
 ///    A 64-bit integer vector containing one of the source operands.
 /// \returns A 64-bit integer vector containing the comparison results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_min_pi16(__m64 __a, __m64 __b)
-{
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_min_pi16(__m64 __a, __m64 __b) {
   return (__m64)__builtin_elementwise_min((__v4hi)__a, (__v4hi)__b);
 }
 
@@ -2410,9 +2390,8 @@ _mm_min_pi16(__m64 __a, __m64 __b)
 /// \param __b
 ///    A 64-bit integer vector containing one of the source operands.
 /// \returns A 64-bit integer vector containing the comparison results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_min_pu8(__m64 __a, __m64 __b)
-{
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_min_pu8(__m64 __a, __m64 __b) {
   return (__m64)__builtin_elementwise_min((__v8qu)__a, (__v8qu)__b);
 }
 
@@ -2428,9 +2407,8 @@ _mm_min_pu8(__m64 __a, __m64 __b)
 ///    A 64-bit integer vector containing the values with bits to be extracted.
 /// \returns The most significant bit from each 8-bit element in \a __a,
 ///    written to bits [7:0].
-static __inline__ int __DEFAULT_FN_ATTRS_SSE2
-_mm_movemask_pi8(__m64 __a)
-{
+static __inline__ int __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_movemask_pi8(__m64 __a) {
   return __builtin_ia32_pmovmskb128((__v16qi)__zext128(__a));
 }
 
@@ -2447,11 +2425,11 @@ _mm_movemask_pi8(__m64 __a)
 /// \param __b
 ///    A 64-bit integer vector containing one of the source operands.
 /// \returns A 64-bit integer vector containing the products of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
 _mm_mulhi_pu16(__m64 __a, __m64 __b)
 {
-  return __trunc64(__builtin_ia32_pmulhuw128((__v8hi)__anyext128(__a),
-                                             (__v8hi)__anyext128(__b)));
+  return __trunc64(__builtin_ia32_pmulhuw128((__v8hu)__zext128(__a),
+                                             (__v8hu)__zext128(__b)));
 }
 
 /// Shuffles the 4 16-bit integers from a 64-bit integer vector to the
@@ -2530,8 +2508,8 @@ _mm_maskmove_si64(__m64 __d, __m64 __n, char *__p)
     // If there's a risk of spurious trap due to a 128-bit write, back up the
     // pointer by 8 bytes and shift values in registers to match.
     __p -= 8;
-    __d128 = __builtin_ia32_pslldqi128_byteshift((__v2di)__d128, 8);
-    __n128 = __builtin_ia32_pslldqi128_byteshift((__v2di)__n128, 8);
+    __d128 = (__m128i)__builtin_ia32_pslldqi128_byteshift((__v16qi)__d128, 8);
+    __n128 = (__m128i)__builtin_ia32_pslldqi128_byteshift((__v16qi)__n128, 8);
   }
 
   __builtin_ia32_maskmovdqu((__v16qi)__d128, (__v16qi)__n128, __p);
@@ -2550,11 +2528,10 @@ _mm_maskmove_si64(__m64 __d, __m64 __n, char *__p)
 /// \param __b
 ///    A 64-bit integer vector containing one of the source operands.
 /// \returns A 64-bit integer vector containing the averages of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_avg_pu8(__m64 __a, __m64 __b)
-{
-  return __trunc64(__builtin_ia32_pavgb128((__v16qi)__anyext128(__a),
-                                           (__v16qi)__anyext128(__b)));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_avg_pu8(__m64 __a, __m64 __b) {
+  return __trunc64(__builtin_ia32_pavgb128((__v16qu)__zext128(__a),
+                                           (__v16qu)__zext128(__b)));
 }
 
 /// Computes the rounded averages of the packed unsigned 16-bit integer
@@ -2570,11 +2547,10 @@ _mm_avg_pu8(__m64 __a, __m64 __b)
 /// \param __b
 ///    A 64-bit integer vector containing one of the source operands.
 /// \returns A 64-bit integer vector containing the averages of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_avg_pu16(__m64 __a, __m64 __b)
-{
-  return __trunc64(__builtin_ia32_pavgw128((__v8hi)__anyext128(__a),
-                                           (__v8hi)__anyext128(__b)));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_avg_pu16(__m64 __a, __m64 __b) {
+  return __trunc64(
+      __builtin_ia32_pavgw128((__v8hu)__zext128(__a), (__v8hu)__zext128(__b)));
 }
 
 /// Subtracts the corresponding 8-bit unsigned integer values of the two
@@ -2873,7 +2849,7 @@ _mm_movelh_ps(__m128 __a, __m128 __b) {
 ///    from the corresponding elements in this operand.
 /// \returns A 128-bit vector of [4 x float] containing the copied and converted
 ///    values from the operand.
-static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
 _mm_cvtpi16_ps(__m64 __a)
 {
   return __builtin_convertvector((__v4hi)__a, __v4sf);
@@ -2891,7 +2867,7 @@ _mm_cvtpi16_ps(__m64 __a)
 ///    destination are copied from the corresponding elements in this operand.
 /// \returns A 128-bit vector of [4 x float] containing the copied and converted
 ///    values from the operand.
-static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
 _mm_cvtpu16_ps(__m64 __a)
 {
   return __builtin_convertvector((__v4hu)__a, __v4sf);
@@ -2909,7 +2885,7 @@ _mm_cvtpu16_ps(__m64 __a)
 ///    from the corresponding lower 4 elements in this operand.
 /// \returns A 128-bit vector of [4 x float] containing the copied and converted
 ///    values from the operand.
-static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
 _mm_cvtpi8_ps(__m64 __a)
 {
   return __builtin_convertvector(
@@ -2930,7 +2906,7 @@ _mm_cvtpi8_ps(__m64 __a)
 ///    operand.
 /// \returns A 128-bit vector of [4 x float] containing the copied and converted
 ///    values from the source operand.
-static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
 _mm_cvtpu8_ps(__m64 __a)
 {
   return __builtin_convertvector(
@@ -2954,7 +2930,7 @@ _mm_cvtpu8_ps(__m64 __a)
 /// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the
 ///    copied and converted values from the first operand. The upper 64 bits
 ///    contain the copied and converted values from the second operand.
-static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
 _mm_cvtpi32x2_ps(__m64 __a, __m64 __b)
 {
   return __builtin_convertvector(
@@ -3029,9 +3005,7 @@ _mm_cvtps_pi8(__m128 __a)
 /// \returns A 32-bit integer value. Bits [3:0] contain the sign bits from each
 ///    single-precision floating-point element of the parameter. Bits [31:4] are
 ///    set to zero.
-static __inline__ int __DEFAULT_FN_ATTRS
-_mm_movemask_ps(__m128 __a)
-{
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_movemask_ps(__m128 __a) {
   return __builtin_ia32_movmskps((__v4sf)__a);
 }
 
diff --git a/lib/include/xopintrin.h b/lib/include/xopintrin.h
index 976cdf4902..aba632f941 100644
--- a/lib/include/xopintrin.h
+++ b/lib/include/xopintrin.h
@@ -20,6 +20,14 @@
 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xop"), __min_vector_width__(128)))
 #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("xop"), __min_vector_width__(256)))
 
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
+#else
+#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
+#endif
+
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)
 {
@@ -182,13 +190,13 @@ _mm_hsubq_epi32(__m128i __A)
   return (__m128i)__builtin_ia32_vphsubdq((__v4si)__A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C)
 {
   return (__m128i)(((__v2du)__A & (__v2du)__C) | ((__v2du)__B & ~(__v2du)__C));
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C)
 {
   return (__m256i)(((__v4du)__A & (__v4du)__C) | ((__v4du)__B & ~(__v4du)__C));
@@ -200,28 +208,28 @@ _mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C)
   return (__m128i)__builtin_ia32_vpperm((__v16qi)__A, (__v16qi)__B, (__v16qi)__C);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm_rot_epi8(__m128i __A, __m128i __B)
 {
-  return (__m128i)__builtin_ia32_vprotb((__v16qi)__A, (__v16qi)__B);
+  return (__m128i)__builtin_elementwise_fshl((__v16qu)__A, (__v16qu)__A, (__v16qu)__B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm_rot_epi16(__m128i __A, __m128i __B)
 {
-  return (__m128i)__builtin_ia32_vprotw((__v8hi)__A, (__v8hi)__B);
+  return (__m128i)__builtin_elementwise_fshl((__v8hu)__A, (__v8hu)__A, (__v8hu)__B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm_rot_epi32(__m128i __A, __m128i __B)
 {
-  return (__m128i)__builtin_ia32_vprotd((__v4si)__A, (__v4si)__B);
+  return (__m128i)__builtin_elementwise_fshl((__v4su)__A, (__v4su)__A, (__v4su)__B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm_rot_epi64(__m128i __A, __m128i __B)
 {
-  return (__m128i)__builtin_ia32_vprotq((__v2di)__A, (__v2di)__B);
+  return (__m128i)__builtin_elementwise_fshl((__v2du)__A, (__v2du)__A, (__v2du)__B);
 }
 
 #define _mm_roti_epi8(A, N) \
@@ -766,5 +774,7 @@ _mm256_frcz_pd(__m256d __A)
 
 #undef __DEFAULT_FN_ATTRS
 #undef __DEFAULT_FN_ATTRS256
+#undef __DEFAULT_FN_ATTRS_CONSTEXPR
+#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
 
 #endif /* __XOPINTRIN_H */
diff --git a/lib/libcxx/include/__algorithm/all_of.h b/lib/libcxx/include/__algorithm/all_of.h
index 6acc117fc4..9bdb20a0d7 100644
--- a/lib/libcxx/include/__algorithm/all_of.h
+++ b/lib/libcxx/include/__algorithm/all_of.h
@@ -10,24 +10,28 @@
 #ifndef _LIBCPP___ALGORITHM_ALL_OF_H
 #define _LIBCPP___ALGORITHM_ALL_OF_H
 
+#include <__algorithm/any_of.h>
 #include <__config>
 #include <__functional/identity.h>
 #include <__type_traits/invoke.h>
+#include <__utility/forward.h>
+#include <__utility/move.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
 #endif
 
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
 _LIBCPP_BEGIN_NAMESPACE_STD
 
 template <class _Iter, class _Sent, class _Proj, class _Pred>
 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 bool
 __all_of(_Iter __first, _Sent __last, _Pred& __pred, _Proj& __proj) {
-  for (; __first != __last; ++__first) {
-    if (!std::__invoke(__pred, std::__invoke(__proj, *__first)))
-      return false;
-  }
-  return true;
+  using _Ref          = decltype(std::__invoke(__proj, *__first));
+  auto __negated_pred = [&__pred](_Ref __arg) -> bool { return !std::__invoke(__pred, std::forward<_Ref>(__arg)); };
+  return !std::__any_of(std::move(__first), std::move(__last), __negated_pred, __proj);
 }
 
 template <class _InputIterator, class _Predicate>
@@ -39,4 +43,6 @@ all_of(_InputIterator __first, _InputIterator __last, _Predicate __pred) {
 
 _LIBCPP_END_NAMESPACE_STD
 
+_LIBCPP_POP_MACROS
+
 #endif // _LIBCPP___ALGORITHM_ALL_OF_H
diff --git a/lib/libcxx/include/__algorithm/comp.h b/lib/libcxx/include/__algorithm/comp.h
index ab3c598418..38e2fb9f5e 100644
--- a/lib/libcxx/include/__algorithm/comp.h
+++ b/lib/libcxx/include/__algorithm/comp.h
@@ -11,6 +11,7 @@
 
 #include <__config>
 #include <__type_traits/desugars_to.h>
+#include <__type_traits/is_generic_transparent_comparator.h>
 #include <__type_traits/is_integral.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -48,6 +49,9 @@ inline const bool __desugars_to_v<__less_tag, __less<>, _Tp, _Tp> = true;
 template <class _Tp>
 inline const bool __desugars_to_v<__totally_ordered_less_tag, __less<>, _Tp, _Tp> = is_integral<_Tp>::value;
 
+template <>
+inline const bool __is_generic_transparent_comparator_v<__less<> > = true;
+
 _LIBCPP_END_NAMESPACE_STD
 
 #endif // _LIBCPP___ALGORITHM_COMP_H
diff --git a/lib/libcxx/include/__algorithm/copy.h b/lib/libcxx/include/__algorithm/copy.h
index ea98031df1..344a53e516 100644
--- a/lib/libcxx/include/__algorithm/copy.h
+++ b/lib/libcxx/include/__algorithm/copy.h
@@ -12,11 +12,10 @@
 #include <__algorithm/copy_move_common.h>
 #include <__algorithm/for_each_segment.h>
 #include <__algorithm/min.h>
+#include <__algorithm/specialized_algorithms.h>
 #include <__config>
-#include <__fwd/bit_reference.h>
 #include <__iterator/iterator_traits.h>
 #include <__iterator/segmented_iterator.h>
-#include <__memory/pointer_traits.h>
 #include <__type_traits/common_type.h>
 #include <__type_traits/enable_if.h>
 #include <__utility/move.h>
@@ -38,124 +37,14 @@ copy(_InputIterator __first, _InputIterator __last, _OutputIterator __result);
 template <class _InIter, class _Sent, class _OutIter>
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter> __copy(_InIter, _Sent, _OutIter);
 
-template <class _Cp, bool _IsConst>
-_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false> __copy_aligned(
-    __bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) {
-  using _In             = __bit_iterator<_Cp, _IsConst>;
-  using difference_type = typename _In::difference_type;
-  using __storage_type  = typename _In::__storage_type;
-
-  const int __bits_per_word = _In::__bits_per_word;
-  difference_type __n       = __last - __first;
-  if (__n > 0) {
-    // do first word
-    if (__first.__ctz_ != 0) {
-      unsigned __clz       = __bits_per_word - __first.__ctz_;
-      difference_type __dn = std::min(static_cast<difference_type>(__clz), __n);
-      __n -= __dn;
-      __storage_type __m = std::__middle_mask<__storage_type>(__clz - __dn, __first.__ctz_);
-      __storage_type __b = *__first.__seg_ & __m;
-      *__result.__seg_ &= ~__m;
-      *__result.__seg_ |= __b;
-      __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word;
-      __result.__ctz_ = static_cast<unsigned>((__dn + __result.__ctz_) % __bits_per_word);
-      ++__first.__seg_;
-      // __first.__ctz_ = 0;
-    }
-    // __first.__ctz_ == 0;
-    // do middle words
-    __storage_type __nw = __n / __bits_per_word;
-    std::copy(std::__to_address(__first.__seg_),
-              std::__to_address(__first.__seg_ + __nw),
-              std::__to_address(__result.__seg_));
-    __n -= __nw * __bits_per_word;
-    __result.__seg_ += __nw;
-    // do last word
-    if (__n > 0) {
-      __first.__seg_ += __nw;
-      __storage_type __m = std::__trailing_mask<__storage_type>(__bits_per_word - __n);
-      __storage_type __b = *__first.__seg_ & __m;
-      *__result.__seg_ &= ~__m;
-      *__result.__seg_ |= __b;
-      __result.__ctz_ = static_cast<unsigned>(__n);
-    }
-  }
-  return __result;
-}
-
-template <class _Cp, bool _IsConst>
-_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false> __copy_unaligned(
-    __bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) {
-  using _In             = __bit_iterator<_Cp, _IsConst>;
-  using difference_type = typename _In::difference_type;
-  using __storage_type  = typename _In::__storage_type;
-
-  const int __bits_per_word = _In::__bits_per_word;
-  difference_type __n       = __last - __first;
-  if (__n > 0) {
-    // do first word
-    if (__first.__ctz_ != 0) {
-      unsigned __clz_f     = __bits_per_word - __first.__ctz_;
-      difference_type __dn = std::min(static_cast<difference_type>(__clz_f), __n);
-      __n -= __dn;
-      __storage_type __m   = std::__middle_mask<__storage_type>(__clz_f - __dn, __first.__ctz_);
-      __storage_type __b   = *__first.__seg_ & __m;
-      unsigned __clz_r     = __bits_per_word - __result.__ctz_;
-      __storage_type __ddn = std::min<__storage_type>(__dn, __clz_r);
-      __m                  = std::__middle_mask<__storage_type>(__clz_r - __ddn, __result.__ctz_);
-      *__result.__seg_ &= ~__m;
-      if (__result.__ctz_ > __first.__ctz_)
-        *__result.__seg_ |= __b << (__result.__ctz_ - __first.__ctz_);
-      else
-        *__result.__seg_ |= __b >> (__first.__ctz_ - __result.__ctz_);
-      __result.__seg_ += (__ddn + __result.__ctz_) / __bits_per_word;
-      __result.__ctz_ = static_cast<unsigned>((__ddn + __result.__ctz_) % __bits_per_word);
-      __dn -= __ddn;
-      if (__dn > 0) {
-        __m = std::__trailing_mask<__storage_type>(__bits_per_word - __dn);
-        *__result.__seg_ &= ~__m;
-        *__result.__seg_ |= __b >> (__first.__ctz_ + __ddn);
-        __result.__ctz_ = static_cast<unsigned>(__dn);
-      }
-      ++__first.__seg_;
-      // __first.__ctz_ = 0;
-    }
-    // __first.__ctz_ == 0;
-    // do middle words
-    unsigned __clz_r   = __bits_per_word - __result.__ctz_;
-    __storage_type __m = std::__leading_mask<__storage_type>(__result.__ctz_);
-    for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_) {
-      __storage_type __b = *__first.__seg_;
-      *__result.__seg_ &= ~__m;
-      *__result.__seg_ |= __b << __result.__ctz_;
-      ++__result.__seg_;
-      *__result.__seg_ &= __m;
-      *__result.__seg_ |= __b >> __clz_r;
-    }
-    // do last word
-    if (__n > 0) {
-      __m                 = std::__trailing_mask<__storage_type>(__bits_per_word - __n);
-      __storage_type __b  = *__first.__seg_ & __m;
-      __storage_type __dn = std::min(__n, static_cast<difference_type>(__clz_r));
-      __m                 = std::__middle_mask<__storage_type>(__clz_r - __dn, __result.__ctz_);
-      *__result.__seg_ &= ~__m;
-      *__result.__seg_ |= __b << __result.__ctz_;
-      __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word;
-      __result.__ctz_ = static_cast<unsigned>((__dn + __result.__ctz_) % __bits_per_word);
-      __n -= __dn;
-      if (__n > 0) {
-        __m = std::__trailing_mask<__storage_type>(__bits_per_word - __n);
-        *__result.__seg_ &= ~__m;
-        *__result.__seg_ |= __b >> __dn;
-        __result.__ctz_ = static_cast<unsigned>(__n);
-      }
-    }
-  }
-  return __result;
-}
-
 struct __copy_impl {
-  template <class _InIter, class _Sent, class _OutIter>
+  template <class _InIter,
+            class _Sent,
+            class _OutIter,
+            __enable_if_t<!__specialized_algorithm<_Algorithm::__copy,
+                                                   __iterator_pair<_InIter, _Sent>,
+                                                   __single_iterator<_OutIter> >::__has_algorithm,
+                          int> = 0>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter>
   operator()(_InIter __first, _Sent __last, _OutIter __result) const {
     while (__first != __last) {
@@ -167,37 +56,39 @@ struct __copy_impl {
     return std::make_pair(std::move(__first), std::move(__result));
   }
 
-  template <class _InIter, class _OutIter>
-  struct _CopySegment {
-    using _Traits _LIBCPP_NODEBUG = __segmented_iterator_traits<_InIter>;
+  template <class _InIter,
+            class _Sent,
+            class _OutIter,
+            __enable_if_t<__specialized_algorithm<_Algorithm::__copy,
+                                                  __iterator_pair<_InIter, _Sent>,
+                                                  __single_iterator<_OutIter> >::__has_algorithm,
+                          int> = 0>
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static pair<_InIter, _OutIter>
+  operator()(_InIter __first, _Sent __last, _OutIter __result) {
+    return __specialized_algorithm<_Algorithm::__copy, __iterator_pair<_InIter, _Sent>, __single_iterator<_OutIter> >()(
+        std::move(__first), std::move(__last), std::move(__result));
+  }
 
-    _OutIter& __result_;
-
-    _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 explicit _CopySegment(_OutIter& __result)
-        : __result_(__result) {}
-
-    _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void
-    operator()(typename _Traits::__local_iterator __lfirst, typename _Traits::__local_iterator __llast) {
-      __result_ = std::__copy(__lfirst, __llast, std::move(__result_)).second;
-    }
-  };
-
-  template <class _InIter, class _OutIter, __enable_if_t<__is_segmented_iterator<_InIter>::value, int> = 0>
+  template <class _InIter, class _OutIter, __enable_if_t<__is_segmented_iterator_v<_InIter>, int> = 0>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter>
   operator()(_InIter __first, _InIter __last, _OutIter __result) const {
-    std::__for_each_segment(__first, __last, _CopySegment<_InIter, _OutIter>(__result));
+    using __local_iterator = typename __segmented_iterator_traits<_InIter>::__local_iterator;
+    std::__for_each_segment(__first, __last, [&__result](__local_iterator __lfirst, __local_iterator __llast) {
+      __result = std::__copy(std::move(__lfirst), std::move(__llast), std::move(__result)).second;
+    });
     return std::make_pair(__last, std::move(__result));
   }
 
   template <class _InIter,
             class _OutIter,
             __enable_if_t<__has_random_access_iterator_category<_InIter>::value &&
-                              !__is_segmented_iterator<_InIter>::value && __is_segmented_iterator<_OutIter>::value,
+                              !__is_segmented_iterator_v<_InIter> && __is_segmented_iterator_v<_OutIter>,
                           int> = 0>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter>
   operator()(_InIter __first, _InIter __last, _OutIter __result) const {
     using _Traits = __segmented_iterator_traits<_OutIter>;
-    using _DiffT  = typename common_type<__iter_diff_t<_InIter>, __iter_diff_t<_OutIter> >::type;
+    using _DiffT =
+        typename common_type<__iterator_difference_type<_InIter>, __iterator_difference_type<_OutIter> >::type;
 
     if (__first == __last)
       return std::make_pair(std::move(__first), std::move(__result));
@@ -217,16 +108,6 @@ struct __copy_impl {
     }
   }
 
-  template <class _Cp, bool _IsConst>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<__bit_iterator<_Cp, _IsConst>, __bit_iterator<_Cp, false> >
-  operator()(__bit_iterator<_Cp, _IsConst> __first,
-             __bit_iterator<_Cp, _IsConst> __last,
-             __bit_iterator<_Cp, false> __result) const {
-    if (__first.__ctz_ == __result.__ctz_)
-      return std::make_pair(__last, std::__copy_aligned(__first, __last, __result));
-    return std::make_pair(__last, std::__copy_unaligned(__first, __last, __result));
-  }
-
   // At this point, the iterators have been unwrapped so any `contiguous_iterator` has been unwrapped to a pointer.
   template <class _In, class _Out, __enable_if_t<__can_lower_copy_assignment_to_memmove<_In, _Out>::value, int> = 0>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_In*, _Out*>
diff --git a/lib/libcxx/include/__algorithm/copy_backward.h b/lib/libcxx/include/__algorithm/copy_backward.h
index 9f890645a4..8758d2c9e7 100644
--- a/lib/libcxx/include/__algorithm/copy_backward.h
+++ b/lib/libcxx/include/__algorithm/copy_backward.h
@@ -11,6 +11,7 @@
 
 #include <__algorithm/copy_move_common.h>
 #include <__algorithm/copy_n.h>
+#include <__algorithm/for_each_segment.h>
 #include <__algorithm/iterator_operations.h>
 #include <__algorithm/min.h>
 #include <__config>
@@ -170,37 +171,20 @@ struct __copy_backward_impl {
     return std::make_pair(std::move(__original_last_iter), std::move(__result));
   }
 
-  template <class _InIter, class _OutIter, __enable_if_t<__is_segmented_iterator<_InIter>::value, int> = 0>
+  template <class _InIter, class _OutIter, __enable_if_t<__is_segmented_iterator_v<_InIter>, int> = 0>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter>
   operator()(_InIter __first, _InIter __last, _OutIter __result) const {
-    using _Traits = __segmented_iterator_traits<_InIter>;
-    auto __sfirst = _Traits::__segment(__first);
-    auto __slast  = _Traits::__segment(__last);
-    if (__sfirst == __slast) {
-      auto __iters =
-          std::__copy_backward<_AlgPolicy>(_Traits::__local(__first), _Traits::__local(__last), std::move(__result));
-      return std::make_pair(__last, __iters.second);
-    }
-
-    __result =
-        std::__copy_backward<_AlgPolicy>(_Traits::__begin(__slast), _Traits::__local(__last), std::move(__result))
-            .second;
-    --__slast;
-    while (__sfirst != __slast) {
-      __result =
-          std::__copy_backward<_AlgPolicy>(_Traits::__begin(__slast), _Traits::__end(__slast), std::move(__result))
-              .second;
-      --__slast;
-    }
-    __result = std::__copy_backward<_AlgPolicy>(_Traits::__local(__first), _Traits::__end(__slast), std::move(__result))
-                   .second;
+    using __local_iterator = typename __segmented_iterator_traits<_InIter>::__local_iterator;
+    std::__for_each_segment_backward(__first, __last, [&__result](__local_iterator __lfirst, __local_iterator __llast) {
+      __result = std::__copy_backward<_AlgPolicy>(std::move(__lfirst), std::move(__llast), std::move(__result)).second;
+    });
     return std::make_pair(__last, std::move(__result));
   }
 
   template <class _InIter,
             class _OutIter,
             __enable_if_t<__has_random_access_iterator_category<_InIter>::value &&
-                              !__is_segmented_iterator<_InIter>::value && __is_segmented_iterator<_OutIter>::value,
+                              !__is_segmented_iterator_v<_InIter> && __is_segmented_iterator_v<_OutIter>,
                           int> = 0>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter>
   operator()(_InIter __first, _InIter __last, _OutIter __result) const {
@@ -214,7 +198,8 @@ struct __copy_backward_impl {
 
     auto __local_last = _Traits::__local(__result);
     while (true) {
-      using _DiffT = typename common_type<__iter_diff_t<_InIter>, __iter_diff_t<_OutIter> >::type;
+      using _DiffT =
+          typename common_type<__iterator_difference_type<_InIter>, __iterator_difference_type<_OutIter> >::type;
 
       auto __local_first = _Traits::__begin(__segment_iterator);
       auto __size        = std::min<_DiffT>(__local_last - __local_first, __last - __first);
diff --git a/lib/libcxx/include/__algorithm/copy_n.h b/lib/libcxx/include/__algorithm/copy_n.h
index f93f39203a..56fb44811f 100644
--- a/lib/libcxx/include/__algorithm/copy_n.h
+++ b/lib/libcxx/include/__algorithm/copy_n.h
@@ -10,31 +10,63 @@
 #define _LIBCPP___ALGORITHM_COPY_N_H
 
 #include <__algorithm/copy.h>
+#include <__algorithm/iterator_operations.h>
 #include <__config>
 #include <__iterator/iterator_traits.h>
 #include <__type_traits/enable_if.h>
 #include <__utility/convert_to_integral.h>
+#include <__utility/move.h>
+#include <__utility/pair.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
 #endif
 
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
 _LIBCPP_BEGIN_NAMESPACE_STD
 
+template <class _AlgPolicy,
+          class _InIter,
+          class _OutIter,
+          __enable_if_t<__has_random_access_iterator_category<_InIter>::value, int> = 0>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InIter, _OutIter>
+__copy_n(_InIter __first, typename _IterOps<_AlgPolicy>::template __difference_type<_InIter> __n, _OutIter __result) {
+  return std::__copy(__first, __first + __n, std::move(__result));
+}
+
+template <class _AlgPolicy,
+          class _InIter,
+          class _OutIter,
+          __enable_if_t<!__has_random_access_iterator_category<_InIter>::value, int> = 0>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InIter, _OutIter>
+__copy_n(_InIter __first, typename _IterOps<_AlgPolicy>::template __difference_type<_InIter> __n, _OutIter __result) {
+  while (__n != 0) {
+    *__result = *__first;
+    ++__first;
+    ++__result;
+    --__n;
+  }
+  return std::make_pair(std::move(__first), std::move(__result));
+}
+
+// The InputIterator case is handled specially here because it's been written in a way to avoid incrementing __first
+// if not absolutely required. This was done to allow its use with istream_iterator and we want to avoid breaking
+// people, at least currently.
+// See https://github.com/llvm/llvm-project/commit/99847d2bf132854fffa019bab19818768102ccad
 template <class _InputIterator,
           class _Size,
           class _OutputIterator,
-          __enable_if_t<__has_input_iterator_category<_InputIterator>::value &&
-                            !__has_random_access_iterator_category<_InputIterator>::value,
-                        int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator
-copy_n(_InputIterator __first, _Size __orig_n, _OutputIterator __result) {
-  typedef decltype(std::__convert_to_integral(__orig_n)) _IntegralSize;
-  _IntegralSize __n = __orig_n;
-  if (__n > 0) {
+          __enable_if_t<__has_exactly_input_iterator_category<_InputIterator>::value, int> = 0>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator
+copy_n(_InputIterator __first, _Size __n, _OutputIterator __result) {
+  using _IntegralSize       = decltype(std::__convert_to_integral(__n));
+  _IntegralSize __converted = __n;
+  if (__converted > 0) {
     *__result = *__first;
     ++__result;
-    for (--__n; __n > 0; --__n) {
+    for (--__converted; __converted > 0; --__converted) {
       ++__first;
       *__result = *__first;
       ++__result;
@@ -46,15 +78,17 @@ copy_n(_InputIterator __first, _Size __orig_n, _OutputIterator __result) {
 template <class _InputIterator,
           class _Size,
           class _OutputIterator,
-          __enable_if_t<__has_random_access_iterator_category<_InputIterator>::value, int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator
-copy_n(_InputIterator __first, _Size __orig_n, _OutputIterator __result) {
-  typedef typename iterator_traits<_InputIterator>::difference_type difference_type;
-  typedef decltype(std::__convert_to_integral(__orig_n)) _IntegralSize;
-  _IntegralSize __n = __orig_n;
-  return std::copy(__first, __first + difference_type(__n), __result);
+          __enable_if_t<!__has_exactly_input_iterator_category<_InputIterator>::value, int> = 0>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator
+copy_n(_InputIterator __first, _Size __n, _OutputIterator __result) {
+  using _IntegralSize       = decltype(std::__convert_to_integral(__n));
+  _IntegralSize __converted = __n;
+  return std::__copy_n<_ClassicAlgPolicy>(__first, __iterator_difference_type<_InputIterator>(__converted), __result)
+      .second;
 }
 
 _LIBCPP_END_NAMESPACE_STD
 
+_LIBCPP_POP_MACROS
+
 #endif // _LIBCPP___ALGORITHM_COPY_N_H
diff --git a/lib/libcxx/include/__algorithm/count.h b/lib/libcxx/include/__algorithm/count.h
index 0cbe9b6e61..8529d110a3 100644
--- a/lib/libcxx/include/__algorithm/count.h
+++ b/lib/libcxx/include/__algorithm/count.h
@@ -72,7 +72,7 @@ __count_bool(__bit_iterator<_Cp, _IsConst> __first, typename __size_difference_t
 }
 
 template <class, class _Cp, bool _IsConst, class _Tp, class _Proj, __enable_if_t<__is_identity<_Proj>::value, int> = 0>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __iter_diff_t<__bit_iterator<_Cp, _IsConst> >
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __iterator_difference_type<__bit_iterator<_Cp, _IsConst> >
 __count(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, const _Tp& __value, _Proj&) {
   if (__value)
     return std::__count_bool<true>(
@@ -82,7 +82,7 @@ __count(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __l
 }
 
 template <class _InputIterator, class _Tp>
-[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __iter_diff_t<_InputIterator>
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __iterator_difference_type<_InputIterator>
 count(_InputIterator __first, _InputIterator __last, const _Tp& __value) {
   __identity __proj;
   return std::__count<_ClassicAlgPolicy>(__first, __last, __value, __proj);
diff --git a/lib/libcxx/include/__algorithm/equal.h b/lib/libcxx/include/__algorithm/equal.h
index 5a8c9504ed..957cc29759 100644
--- a/lib/libcxx/include/__algorithm/equal.h
+++ b/lib/libcxx/include/__algorithm/equal.h
@@ -160,22 +160,28 @@ template <class _Cp,
           bool _IsConst1,
           bool _IsConst2,
           class _BinaryPredicate,
-          __enable_if_t<__desugars_to_v<__equal_tag, _BinaryPredicate, bool, bool>, int> = 0>
+          class _Proj1,
+          class _Proj2,
+          __enable_if_t<__is_identity<_Proj1>::value && __is_identity<_Proj2>::value &&
+                            __desugars_to_v<__equal_tag, _BinaryPredicate, bool, bool>,
+                        int> = 0>
 [[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __equal_iter_impl(
     __bit_iterator<_Cp, _IsConst1> __first1,
     __bit_iterator<_Cp, _IsConst1> __last1,
     __bit_iterator<_Cp, _IsConst2> __first2,
-    _BinaryPredicate) {
+    _BinaryPredicate,
+    _Proj1&,
+    _Proj2&) {
   if (__first1.__ctz_ == __first2.__ctz_)
     return std::__equal_aligned(__first1, __last1, __first2);
   return std::__equal_unaligned(__first1, __last1, __first2);
 }
 
-template <class _InputIterator1, class _InputIterator2, class _BinaryPredicate>
+template <class _InIter1, class _Sent1, class _InIter2, class _Pred, class _Proj1, class _Proj2>
 [[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __equal_iter_impl(
-    _InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _BinaryPredicate& __pred) {
+    _InIter1 __first1, _Sent1 __last1, _InIter2 __first2, _Pred& __pred, _Proj1& __proj1, _Proj2& __proj2) {
   for (; __first1 != __last1; ++__first1, (void)++__first2)
-    if (!__pred(*__first1, *__first2))
+    if (!std::__invoke(__pred, std::__invoke(__proj1, *__first1), std::__invoke(__proj2, *__first2)))
       return false;
   return true;
 }
@@ -183,19 +189,23 @@ template <class _InputIterator1, class _InputIterator2, class _BinaryPredicate>
 template <class _Tp,
           class _Up,
           class _BinaryPredicate,
-          __enable_if_t<__desugars_to_v<__equal_tag, _BinaryPredicate, _Tp, _Up> && !is_volatile<_Tp>::value &&
-                            !is_volatile<_Up>::value && __libcpp_is_trivially_equality_comparable<_Tp, _Up>::value,
+          class _Proj1,
+          class _Proj2,
+          __enable_if_t<__is_identity<_Proj1>::value && __is_identity<_Proj2>::value &&
+                            __desugars_to_v<__equal_tag, _BinaryPredicate, _Tp, _Up> && !is_volatile<_Tp>::value &&
+                            !is_volatile<_Up>::value && __is_trivially_equality_comparable_v<_Tp, _Up>,
                         int> = 0>
 [[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
-__equal_iter_impl(_Tp* __first1, _Tp* __last1, _Up* __first2, _BinaryPredicate&) {
+__equal_iter_impl(_Tp* __first1, _Tp* __last1, _Up* __first2, _BinaryPredicate&, _Proj1&, _Proj2&) {
   return std::__constexpr_memcmp_equal(__first1, __first2, __element_count(__last1 - __first1));
 }
 
 template <class _InputIterator1, class _InputIterator2, class _BinaryPredicate>
 [[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
 equal(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _BinaryPredicate __pred) {
+  __identity __proj;
   return std::__equal_iter_impl(
-      std::__unwrap_iter(__first1), std::__unwrap_iter(__last1), std::__unwrap_iter(__first2), __pred);
+      std::__unwrap_iter(__first1), std::__unwrap_iter(__last1), std::__unwrap_iter(__first2), __pred, __proj, __proj);
 }
 
 template <class _InputIterator1, class _InputIterator2>
@@ -206,52 +216,28 @@ equal(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first
 
 #if _LIBCPP_STD_VER >= 14
 
-template <class _Iter1, class _Sent1, class _Iter2, class _Sent2, class _Pred, class _Proj1, class _Proj2>
+template <bool __known_equal_length,
+          class _Iter1,
+          class _Sent1,
+          class _Iter2,
+          class _Sent2,
+          class _Pred,
+          class _Proj1,
+          class _Proj2>
 [[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __equal_impl(
     _Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Sent2 __last2, _Pred& __comp, _Proj1& __proj1, _Proj2& __proj2) {
-  while (__first1 != __last1 && __first2 != __last2) {
-    if (!std::__invoke(__comp, std::__invoke(__proj1, *__first1), std::__invoke(__proj2, *__first2)))
-      return false;
-    ++__first1;
-    ++__first2;
+  if constexpr (__known_equal_length) {
+    return std::__equal_iter_impl(
+        std::move(__first1), std::move(__last1), std::move(__first2), __comp, __proj1, __proj2);
+  } else {
+    while (__first1 != __last1 && __first2 != __last2) {
+      if (!std::__invoke(__comp, std::__invoke(__proj1, *__first1), std::__invoke(__proj2, *__first2)))
+        return false;
+      ++__first1;
+      ++__first2;
+    }
+    return __first1 == __last1 && __first2 == __last2;
   }
-  return __first1 == __last1 && __first2 == __last2;
-}
-
-template <class _Tp,
-          class _Up,
-          class _Pred,
-          class _Proj1,
-          class _Proj2,
-          __enable_if_t<__desugars_to_v<__equal_tag, _Pred, _Tp, _Up> && __is_identity<_Proj1>::value &&
-                            __is_identity<_Proj2>::value && !is_volatile<_Tp>::value && !is_volatile<_Up>::value &&
-                            __libcpp_is_trivially_equality_comparable<_Tp, _Up>::value,
-                        int> = 0>
-[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
-__equal_impl(_Tp* __first1, _Tp* __last1, _Up* __first2, _Up*, _Pred&, _Proj1&, _Proj2&) {
-  return std::__constexpr_memcmp_equal(__first1, __first2, __element_count(__last1 - __first1));
-}
-
-template <class _Cp,
-          bool _IsConst1,
-          bool _IsConst2,
-          class _Pred,
-          class _Proj1,
-          class _Proj2,
-          __enable_if_t<__desugars_to_v<__equal_tag, _Pred, bool, bool> && __is_identity<_Proj1>::value &&
-                            __is_identity<_Proj2>::value,
-                        int> = 0>
-[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __equal_impl(
-    __bit_iterator<_Cp, _IsConst1> __first1,
-    __bit_iterator<_Cp, _IsConst1> __last1,
-    __bit_iterator<_Cp, _IsConst2> __first2,
-    __bit_iterator<_Cp, _IsConst2>,
-    _Pred&,
-    _Proj1&,
-    _Proj2&) {
-  if (__first1.__ctz_ == __first2.__ctz_)
-    return std::__equal_aligned(__first1, __last1, __first2);
-  return std::__equal_unaligned(__first1, __last1, __first2);
 }
 
 template <class _InputIterator1, class _InputIterator2, class _BinaryPredicate>
@@ -261,13 +247,15 @@ equal(_InputIterator1 __first1,
       _InputIterator2 __first2,
       _InputIterator2 __last2,
       _BinaryPredicate __pred) {
-  if constexpr (__has_random_access_iterator_category<_InputIterator1>::value &&
-                __has_random_access_iterator_category<_InputIterator2>::value) {
+  static constexpr bool __both_random_access =
+      __has_random_access_iterator_category<_InputIterator1>::value &&
+      __has_random_access_iterator_category<_InputIterator2>::value;
+  if constexpr (__both_random_access) {
     if (std::distance(__first1, __last1) != std::distance(__first2, __last2))
       return false;
   }
   __identity __proj;
-  return std::__equal_impl(
+  return std::__equal_impl<__both_random_access>(
       std::__unwrap_iter(__first1),
       std::__unwrap_iter(__last1),
       std::__unwrap_iter(__first2),
diff --git a/lib/libcxx/include/__algorithm/fill.h b/lib/libcxx/include/__algorithm/fill.h
index 1ce3eadb01..37732cc22a 100644
--- a/lib/libcxx/include/__algorithm/fill.h
+++ b/lib/libcxx/include/__algorithm/fill.h
@@ -10,8 +10,12 @@
 #define _LIBCPP___ALGORITHM_FILL_H
 
 #include <__algorithm/fill_n.h>
+#include <__algorithm/for_each_segment.h>
 #include <__config>
 #include <__iterator/iterator_traits.h>
+#include <__iterator/segmented_iterator.h>
+#include <__type_traits/enable_if.h>
+#include <__type_traits/is_same.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
@@ -21,23 +25,37 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 // fill isn't specialized for std::memset, because the compiler already optimizes the loop to a call to std::memset.
 
-template <class _ForwardIterator, class _Tp>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
-__fill(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value, forward_iterator_tag) {
+template <class _ForwardIterator, class _Sentinel, class _Tp>
+inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
+__fill(_ForwardIterator __first, _Sentinel __last, const _Tp& __value) {
+#ifndef _LIBCPP_CXX03_LANG
+  if constexpr (is_same<_ForwardIterator, _Sentinel>::value && __is_segmented_iterator_v<_ForwardIterator>) {
+    using __local_iterator_t = typename __segmented_iterator_traits<_ForwardIterator>::__local_iterator;
+    std::__for_each_segment(__first, __last, [&](__local_iterator_t __lfirst, __local_iterator_t __llast) {
+      std::__fill(__lfirst, __llast, __value);
+    });
+    return __last;
+  }
+#endif
   for (; __first != __last; ++__first)
     *__first = __value;
+  return __first;
 }
 
-template <class _RandomAccessIterator, class _Tp>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
-__fill(_RandomAccessIterator __first, _RandomAccessIterator __last, const _Tp& __value, random_access_iterator_tag) {
-  std::fill_n(__first, __last - __first, __value);
+template <class _RandomAccessIterator,
+          class _Tp,
+          __enable_if_t<__has_random_access_iterator_category<_RandomAccessIterator>::value &&
+                            !__is_segmented_iterator_v<_RandomAccessIterator>,
+                        int> = 0>
+inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandomAccessIterator
+__fill(_RandomAccessIterator __first, _RandomAccessIterator __last, const _Tp& __value) {
+  return std::__fill_n(__first, __last - __first, __value);
 }
 
 template <class _ForwardIterator, class _Tp>
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
 fill(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) {
-  std::__fill(__first, __last, __value, typename iterator_traits<_ForwardIterator>::iterator_category());
+  std::__fill(__first, __last, __value);
 }
 
 _LIBCPP_END_NAMESPACE_STD
diff --git a/lib/libcxx/include/__algorithm/fill_n.h b/lib/libcxx/include/__algorithm/fill_n.h
index 0da78e1f38..3d06ea4f08 100644
--- a/lib/libcxx/include/__algorithm/fill_n.h
+++ b/lib/libcxx/include/__algorithm/fill_n.h
@@ -9,11 +9,14 @@
 #ifndef _LIBCPP___ALGORITHM_FILL_N_H
 #define _LIBCPP___ALGORITHM_FILL_N_H
 
-#include <__algorithm/min.h>
+#include <__algorithm/for_each_n_segment.h>
+#include <__algorithm/specialized_algorithms.h>
 #include <__config>
-#include <__fwd/bit_reference.h>
-#include <__memory/pointer_traits.h>
+#include <__iterator/iterator_traits.h>
+#include <__iterator/segmented_iterator.h>
+#include <__type_traits/enable_if.h>
 #include <__utility/convert_to_integral.h>
+#include <__utility/move.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
@@ -26,56 +29,39 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 // fill_n isn't specialized for std::memset, because the compiler already optimizes the loop to a call to std::memset.
 
-template <class _OutputIterator, class _Size, class _Tp>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator
-__fill_n(_OutputIterator __first, _Size __n, const _Tp& __value);
-
-template <bool _FillVal, class _Cp>
-_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void
-__fill_n_bool(__bit_iterator<_Cp, false> __first, typename __size_difference_type_traits<_Cp>::size_type __n) {
-  using _It            = __bit_iterator<_Cp, false>;
-  using __storage_type = typename _It::__storage_type;
-
-  const int __bits_per_word = _It::__bits_per_word;
-  // do first partial word
-  if (__first.__ctz_ != 0) {
-    __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_);
-    __storage_type __dn    = std::min(__clz_f, __n);
-    std::__fill_masked_range(std::__to_address(__first.__seg_), __clz_f - __dn, __first.__ctz_, _FillVal);
-    __n -= __dn;
-    ++__first.__seg_;
-  }
-  // do middle whole words
-  __storage_type __nw = __n / __bits_per_word;
-  std::__fill_n(std::__to_address(__first.__seg_), __nw, _FillVal ? static_cast<__storage_type>(-1) : 0);
-  __n -= __nw * __bits_per_word;
-  // do last partial word
-  if (__n > 0) {
-    __first.__seg_ += __nw;
-    std::__fill_masked_range(std::__to_address(__first.__seg_), __bits_per_word - __n, 0u, _FillVal);
-  }
-}
-
-template <class _Cp, class _Size>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cp, false>
-__fill_n(__bit_iterator<_Cp, false> __first, _Size __n, const bool& __value) {
-  if (__n > 0) {
-    if (__value)
-      std::__fill_n_bool<true>(__first, __n);
-    else
-      std::__fill_n_bool<false>(__first, __n);
-  }
-  return __first + __n;
-}
-
-template <class _OutputIterator, class _Size, class _Tp>
+template <
+    class _OutputIterator,
+    class _Size,
+    class _Tp,
+    __enable_if_t<!__specialized_algorithm<_Algorithm::__fill_n, __single_iterator<_OutputIterator> >::__has_algorithm,
+                  int> = 0>
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator
 __fill_n(_OutputIterator __first, _Size __n, const _Tp& __value) {
+#ifndef _LIBCPP_CXX03_LANG
+  if constexpr (__is_segmented_iterator_v<_OutputIterator>) {
+    using __local_iterator = typename __segmented_iterator_traits<_OutputIterator>::__local_iterator;
+    if constexpr (__has_random_access_iterator_category<__local_iterator>::value) {
+      return std::__for_each_n_segment(__first, __n, [&](__local_iterator __lfirst, __local_iterator __llast) {
+        std::__fill_n(__lfirst, __llast - __lfirst, __value);
+      });
+    }
+  }
+#endif
   for (; __n > 0; ++__first, (void)--__n)
     *__first = __value;
   return __first;
 }
 
+template <class _OutIter,
+          class _Size,
+          class _Tp,
+          __enable_if_t<__specialized_algorithm<_Algorithm::__fill_n, __single_iterator<_OutIter> >::__has_algorithm,
+                        int> = 0>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutIter __fill_n(_OutIter __first, _Size __n, const _Tp& __value) {
+  return __specialized_algorithm<_Algorithm::__fill_n, __single_iterator<_OutIter> >()(
+      std::move(__first), __n, __value);
+}
+
 template <class _OutputIterator, class _Size, class _Tp>
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator
 fill_n(_OutputIterator __first, _Size __n, const _Tp& __value) {
diff --git a/lib/libcxx/include/__algorithm/find.h b/lib/libcxx/include/__algorithm/find.h
index a7d9374b3a..852bc2da3e 100644
--- a/lib/libcxx/include/__algorithm/find.h
+++ b/lib/libcxx/include/__algorithm/find.h
@@ -12,16 +12,19 @@
 
 #include <__algorithm/find_segment_if.h>
 #include <__algorithm/min.h>
+#include <__algorithm/simd_utils.h>
 #include <__algorithm/unwrap_iter.h>
 #include <__bit/countr.h>
 #include <__bit/invert_if.h>
 #include <__config>
+#include <__cstddef/size_t.h>
 #include <__functional/identity.h>
 #include <__fwd/bit_reference.h>
 #include <__iterator/segmented_iterator.h>
 #include <__string/constexpr_c_functions.h>
 #include <__type_traits/enable_if.h>
 #include <__type_traits/invoke.h>
+#include <__type_traits/is_constant_evaluated.h>
 #include <__type_traits/is_equality_comparable.h>
 #include <__type_traits/is_integral.h>
 #include <__type_traits/is_signed.h>
@@ -44,46 +47,108 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 // generic implementation
 template <class _Iter, class _Sent, class _Tp, class _Proj>
 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Iter
-__find(_Iter __first, _Sent __last, const _Tp& __value, _Proj& __proj) {
+__find_loop(_Iter __first, _Sent __last, const _Tp& __value, _Proj& __proj) {
   for (; __first != __last; ++__first)
     if (std::__invoke(__proj, *__first) == __value)
       break;
   return __first;
 }
 
+template <class _Iter, class _Sent, class _Tp, class _Proj>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Iter
+__find(_Iter __first, _Sent __last, const _Tp& __value, _Proj& __proj) {
+  return std::__find_loop(std::move(__first), std::move(__last), __value, __proj);
+}
+
+#if _LIBCPP_VECTORIZE_ALGORITHMS
+template <class _Tp, class _Up>
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI
+_LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp* __find_vectorized(_Tp* __first, _Tp* __last, _Up __value) {
+  if (!__libcpp_is_constant_evaluated()) {
+    constexpr size_t __unroll_count = 4;
+    constexpr size_t __vec_size     = __native_vector_size<_Tp>;
+    using __vec                     = __simd_vector<_Tp, __vec_size>;
+
+    auto __orig_first = __first;
+
+    auto __values = static_cast<__simd_vector<_Tp, __vec_size>>(__value); // broadcast the value
+    while (static_cast<size_t>(__last - __first) >= __unroll_count * __vec_size) [[__unlikely__]] {
+      __vec __lhs[__unroll_count];
+
+      for (size_t __i = 0; __i != __unroll_count; ++__i)
+        __lhs[__i] = std::__load_vector<__vec>(__first + __i * __vec_size);
+
+      for (size_t __i = 0; __i != __unroll_count; ++__i) {
+        if (auto __cmp_res = __lhs[__i] == __values; std::__any_of(__cmp_res)) {
+          auto __offset = __i * __vec_size + std::__find_first_set(__cmp_res);
+          return __first + __offset;
+        }
+      }
+
+      __first += __unroll_count * __vec_size;
+    }
+
+    // check the remaining 0-3 vectors
+    while (static_cast<size_t>(__last - __first) >= __vec_size) {
+      if (auto __cmp_res = std::__load_vector<__vec>(__first) == __values; std::__any_of(__cmp_res)) {
+        return __first + std::__find_first_set(__cmp_res);
+      }
+      __first += __vec_size;
+    }
+
+    if (__last - __first == 0)
+      return __first;
+
+    // Check if we can load elements in front of the current pointer. If that's the case load a vector at
+    // (last - vector_size) to check the remaining elements
+    if (static_cast<size_t>(__first - __orig_first) >= __vec_size) {
+      __first = __last - __vec_size;
+      return __first + std::__find_first_set(std::__load_vector<__vec>(__first) == __values);
+    }
+  }
+
+  __identity __proj;
+  return std::__find_loop(__first, __last, __value, __proj);
+}
+#endif
+
+#ifndef _LIBCPP_CXX03_LANG
 // trivially equality comparable implementations
 template <class _Tp,
           class _Up,
           class _Proj,
-          __enable_if_t<__is_identity<_Proj>::value && __libcpp_is_trivially_equality_comparable<_Tp, _Up>::value &&
-                            sizeof(_Tp) == 1,
-                        int> = 0>
+          __enable_if_t<__is_identity<_Proj>::value && __is_trivially_equality_comparable_v<_Tp, _Up>, int> = 0>
 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp* __find(_Tp* __first, _Tp* __last, const _Up& __value, _Proj&) {
-  if (auto __ret = std::__constexpr_memchr(__first, __value, __last - __first))
-    return __ret;
-  return __last;
+  if constexpr (sizeof(_Tp) == 1) {
+    if (auto __ret = std::__constexpr_memchr(__first, __value, __last - __first))
+      return __ret;
+    return __last;
+  }
+#  if _LIBCPP_HAS_WIDE_CHARACTERS
+  else if constexpr (sizeof(_Tp) == sizeof(wchar_t) && _LIBCPP_ALIGNOF(_Tp) >= _LIBCPP_ALIGNOF(wchar_t)) {
+    if (auto __ret = std::__constexpr_wmemchr(__first, __value, __last - __first))
+      return __ret;
+    return __last;
+  }
+#  endif
+#  if _LIBCPP_VECTORIZE_ALGORITHMS
+  else if constexpr (is_integral<_Tp>::value) {
+    return std::__find_vectorized(__first, __last, __value);
+  }
+#  endif
+  else {
+    __identity __proj;
+    return std::__find_loop(__first, __last, __value, __proj);
+  }
 }
-
-#if _LIBCPP_HAS_WIDE_CHARACTERS
-template <class _Tp,
-          class _Up,
-          class _Proj,
-          __enable_if_t<__is_identity<_Proj>::value && __libcpp_is_trivially_equality_comparable<_Tp, _Up>::value &&
-                            sizeof(_Tp) == sizeof(wchar_t) && _LIBCPP_ALIGNOF(_Tp) >= _LIBCPP_ALIGNOF(wchar_t),
-                        int> = 0>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp* __find(_Tp* __first, _Tp* __last, const _Up& __value, _Proj&) {
-  if (auto __ret = std::__constexpr_wmemchr(__first, __value, __last - __first))
-    return __ret;
-  return __last;
-}
-#endif // _LIBCPP_HAS_WIDE_CHARACTERS
+#endif
 
 // TODO: This should also be possible to get right with different signedness
 // cast integral types to allow vectorization
 template <class _Tp,
           class _Up,
           class _Proj,
-          __enable_if_t<__is_identity<_Proj>::value && !__libcpp_is_trivially_equality_comparable<_Tp, _Up>::value &&
+          __enable_if_t<__is_identity<_Proj>::value && !__is_trivially_equality_comparable_v<_Tp, _Up> &&
                             is_integral<_Tp>::value && is_integral<_Up>::value &&
                             is_signed<_Tp>::value == is_signed<_Up>::value,
                         int> = 0>
@@ -143,31 +208,23 @@ __find(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __la
 
 // segmented iterator implementation
 
-template <class>
-struct __find_segment;
-
 template <class _SegmentedIterator,
           class _Tp,
           class _Proj,
-          __enable_if_t<__is_segmented_iterator<_SegmentedIterator>::value, int> = 0>
+          __enable_if_t<__is_segmented_iterator_v<_SegmentedIterator>, int> = 0>
 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _SegmentedIterator
 __find(_SegmentedIterator __first, _SegmentedIterator __last, const _Tp& __value, _Proj& __proj) {
-  return std::__find_segment_if(std::move(__first), std::move(__last), __find_segment<_Tp>(__value), __proj);
+  using __local_iterator = typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator;
+  return std::__find_segment_if(
+      std::move(__first),
+      std::move(__last),
+      [&__value](__local_iterator __lfirst, __local_iterator __llast, _Proj& __lproj) {
+        return std::__rewrap_iter(
+            __lfirst, std::__find(std::__unwrap_iter(__lfirst), std::__unwrap_iter(__llast), __value, __lproj));
+      },
+      __proj);
 }
 
-template <class _Tp>
-struct __find_segment {
-  const _Tp& __value_;
-
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR __find_segment(const _Tp& __value) : __value_(__value) {}
-
-  template <class _InputIterator, class _Proj>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _InputIterator
-  operator()(_InputIterator __first, _InputIterator __last, _Proj& __proj) const {
-    return std::__find(__first, __last, __value_, __proj);
-  }
-};
-
 // public API
 template <class _InputIterator, class _Tp>
 [[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
diff --git a/lib/libcxx/include/__algorithm/find_end.h b/lib/libcxx/include/__algorithm/find_end.h
index 86b4a3e2e3..84b43e31a3 100644
--- a/lib/libcxx/include/__algorithm/find_end.h
+++ b/lib/libcxx/include/__algorithm/find_end.h
@@ -76,6 +76,111 @@ _LIBCPP_HIDE_FROM_ABI inline _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter1, _Iter1>
   }
 }
 
+template <class _AlgPolicy,
+          class _Pred,
+          class _Iter1,
+          class _Sent1,
+          class _Iter2,
+          class _Sent2,
+          class _Proj1,
+          class _Proj2>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Iter1, _Iter1> __find_end_impl(
+    _Iter1 __first1,
+    _Sent1 __sent1,
+    _Iter2 __first2,
+    _Sent2 __sent2,
+    _Pred& __pred,
+    _Proj1& __proj1,
+    _Proj2& __proj2,
+    bidirectional_iterator_tag,
+    bidirectional_iterator_tag) {
+  auto __last1 = _IterOps<_AlgPolicy>::next(__first1, __sent1);
+  auto __last2 = _IterOps<_AlgPolicy>::next(__first2, __sent2);
+  // modeled after search algorithm (in reverse)
+  if (__first2 == __last2)
+    return std::make_pair(__last1, __last1); // Everything matches an empty sequence
+  _Iter1 __l1 = __last1;
+  _Iter2 __l2 = __last2;
+  --__l2;
+  while (true) {
+    // Find last element in sequence 1 that matches *(__last2-1), with a mininum of loop checks
+    while (true) {
+      if (__first1 == __l1) // return __last1 if no element matches *__first2
+        return std::make_pair(__last1, __last1);
+      if (std::__invoke(__pred, std::__invoke(__proj1, *--__l1), std::__invoke(__proj2, *__l2)))
+        break;
+    }
+    // *__l1 matches *__l2, now match elements before here
+    _Iter1 __match_last = __l1;
+    _Iter1 __m1         = __l1;
+    _Iter2 __m2         = __l2;
+    while (true) {
+      if (__m2 == __first2) // If pattern exhausted, __m1 is the answer (works for 1 element pattern)
+        return std::make_pair(__m1, ++__match_last);
+      if (__m1 == __first1) // Otherwise if source exhaused, pattern not found
+        return std::make_pair(__last1, __last1);
+
+      // if there is a mismatch, restart with a new __l1
+      if (!std::__invoke(__pred, std::__invoke(__proj1, *--__m1), std::__invoke(__proj2, *--__m2))) {
+        break;
+      } // else there is a match, check next elements
+    }
+  }
+}
+
+template <class _AlgPolicy,
+          class _Pred,
+          class _Iter1,
+          class _Sent1,
+          class _Iter2,
+          class _Sent2,
+          class _Proj1,
+          class _Proj2>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter1, _Iter1> __find_end_impl(
+    _Iter1 __first1,
+    _Sent1 __sent1,
+    _Iter2 __first2,
+    _Sent2 __sent2,
+    _Pred& __pred,
+    _Proj1& __proj1,
+    _Proj2& __proj2,
+    random_access_iterator_tag,
+    random_access_iterator_tag) {
+  typedef typename iterator_traits<_Iter1>::difference_type _D1;
+  auto __last1 = _IterOps<_AlgPolicy>::next(__first1, __sent1);
+  auto __last2 = _IterOps<_AlgPolicy>::next(__first2, __sent2);
+  // Take advantage of knowing source and pattern lengths.  Stop short when source is smaller than pattern
+  auto __len2 = __last2 - __first2;
+  if (__len2 == 0)
+    return std::make_pair(__last1, __last1);
+  auto __len1 = __last1 - __first1;
+  if (__len1 < __len2)
+    return std::make_pair(__last1, __last1);
+  const _Iter1 __s = __first1 + _D1(__len2 - 1); // End of pattern match can't go before here
+  _Iter1 __l1      = __last1;
+  _Iter2 __l2      = __last2;
+  --__l2;
+  while (true) {
+    while (true) {
+      if (__s == __l1)
+        return std::make_pair(__last1, __last1);
+      if (std::__invoke(__pred, std::__invoke(__proj1, *--__l1), std::__invoke(__proj2, *__l2)))
+        break;
+    }
+    _Iter1 __last_match = __l1;
+    _Iter1 __m1         = __l1;
+    _Iter2 __m2         = __l2;
+    while (true) {
+      if (__m2 == __first2)
+        return std::make_pair(__m1, ++__last_match);
+      // no need to check range on __m1 because __s guarantees we have enough source
+      if (!std::__invoke(__pred, std::__invoke(__proj1, *--__m1), std::__invoke(__proj2, *--__m2))) {
+        break;
+      }
+    }
+  }
+}
+
 template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
 [[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _ForwardIterator1 __find_end_classic(
     _ForwardIterator1 __first1,
diff --git a/lib/libcxx/include/__algorithm/for_each.h b/lib/libcxx/include/__algorithm/for_each.h
index 4167eec350..85fedce3d9 100644
--- a/lib/libcxx/include/__algorithm/for_each.h
+++ b/lib/libcxx/include/__algorithm/for_each.h
@@ -11,45 +11,41 @@
 #define _LIBCPP___ALGORITHM_FOR_EACH_H
 
 #include <__algorithm/for_each_segment.h>
+#include <__algorithm/specialized_algorithms.h>
 #include <__config>
 #include <__functional/identity.h>
 #include <__iterator/segmented_iterator.h>
-#include <__type_traits/enable_if.h>
 #include <__type_traits/invoke.h>
-#include <__utility/move.h>
+#include <__type_traits/is_same.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
 #endif
 
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
 _LIBCPP_BEGIN_NAMESPACE_STD
 
 template <class _InputIterator, class _Sent, class _Func, class _Proj>
 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
-__for_each(_InputIterator __first, _Sent __last, _Func& __f, _Proj& __proj) {
+__for_each(_InputIterator __first, _Sent __last, _Func& __func, _Proj& __proj) {
+#ifndef _LIBCPP_CXX03_LANG
+  if constexpr (using _SpecialAlg =
+                    __specialized_algorithm<_Algorithm::__for_each, __iterator_pair<_InputIterator, _Sent>>;
+                _SpecialAlg::__has_algorithm) {
+    _SpecialAlg()(__first, __last, __func, __proj);
+    return __last;
+  } else if constexpr (is_same<_InputIterator, _Sent>::value && __is_segmented_iterator_v<_InputIterator>) {
+    using __local_iterator_t = typename __segmented_iterator_traits<_InputIterator>::__local_iterator;
+    std::__for_each_segment(__first, __last, [&](__local_iterator_t __lfirst, __local_iterator_t __llast) {
+      std::__for_each(__lfirst, __llast, __func, __proj);
+    });
+    return __last;
+  }
+#endif
   for (; __first != __last; ++__first)
-    std::__invoke(__f, std::__invoke(__proj, *__first));
+    std::__invoke(__func, std::__invoke(__proj, *__first));
   return __first;
 }
 
-#ifndef _LIBCPP_CXX03_LANG
-template <class _SegmentedIterator,
-          class _Func,
-          class _Proj,
-          __enable_if_t<__is_segmented_iterator<_SegmentedIterator>::value, int> = 0>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _SegmentedIterator
-__for_each(_SegmentedIterator __first, _SegmentedIterator __last, _Func& __func, _Proj& __proj) {
-  using __local_iterator_t = typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator;
-  std::__for_each_segment(__first, __last, [&](__local_iterator_t __lfirst, __local_iterator_t __llast) {
-    std::__for_each(__lfirst, __llast, __func, __proj);
-  });
-  return __last;
-}
-#endif // !_LIBCPP_CXX03_LANG
-
 template <class _InputIterator, class _Func>
 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Func
 for_each(_InputIterator __first, _InputIterator __last, _Func __f) {
@@ -60,6 +56,4 @@ for_each(_InputIterator __first, _InputIterator __last, _Func __f) {
 
 _LIBCPP_END_NAMESPACE_STD
 
-_LIBCPP_POP_MACROS
-
 #endif // _LIBCPP___ALGORITHM_FOR_EACH_H
diff --git a/lib/libcxx/include/__algorithm/for_each_n.h b/lib/libcxx/include/__algorithm/for_each_n.h
index 9a6c6bb517..72c7adb093 100644
--- a/lib/libcxx/include/__algorithm/for_each_n.h
+++ b/lib/libcxx/include/__algorithm/for_each_n.h
@@ -16,10 +16,7 @@
 #include <__functional/identity.h>
 #include <__iterator/iterator_traits.h>
 #include <__iterator/segmented_iterator.h>
-#include <__type_traits/disjunction.h>
-#include <__type_traits/enable_if.h>
 #include <__type_traits/invoke.h>
-#include <__type_traits/negation.h>
 #include <__utility/convert_to_integral.h>
 #include <__utility/move.h>
 
@@ -32,57 +29,33 @@ _LIBCPP_PUSH_MACROS
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-template <class _InputIterator,
-          class _Size,
-          class _Func,
-          class _Proj,
-          __enable_if_t<!__has_random_access_iterator_category<_InputIterator>::value &&
-                            _Or< _Not<__is_segmented_iterator<_InputIterator> >,
-                                 _Not<__has_random_access_local_iterator<_InputIterator> > >::value,
-                        int> = 0>
+template <class _InputIterator, class _Size, class _Func, class _Proj>
 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
 __for_each_n(_InputIterator __first, _Size __orig_n, _Func& __f, _Proj& __proj) {
   typedef decltype(std::__convert_to_integral(__orig_n)) _IntegralSize;
   _IntegralSize __n = __orig_n;
-  while (__n > 0) {
-    std::__invoke(__f, std::__invoke(__proj, *__first));
-    ++__first;
-    --__n;
-  }
-  return std::move(__first);
-}
-
-template <class _RandIter,
-          class _Size,
-          class _Func,
-          class _Proj,
-          __enable_if_t<__has_random_access_iterator_category<_RandIter>::value, int> = 0>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandIter
-__for_each_n(_RandIter __first, _Size __orig_n, _Func& __f, _Proj& __proj) {
-  typename std::iterator_traits<_RandIter>::difference_type __n = __orig_n;
-  auto __last                                                   = __first + __n;
-  std::__for_each(__first, __last, __f, __proj);
-  return __last;
-}
 
 #ifndef _LIBCPP_CXX03_LANG
-template <class _SegmentedIterator,
-          class _Size,
-          class _Func,
-          class _Proj,
-          __enable_if_t<!__has_random_access_iterator_category<_SegmentedIterator>::value &&
-                            __is_segmented_iterator<_SegmentedIterator>::value &&
-                            __has_random_access_iterator_category<
-                                typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator>::value,
-                        int> = 0>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _SegmentedIterator
-__for_each_n(_SegmentedIterator __first, _Size __orig_n, _Func& __f, _Proj& __proj) {
-  using __local_iterator_t = typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator;
-  return std::__for_each_n_segment(__first, __orig_n, [&](__local_iterator_t __lfirst, __local_iterator_t __llast) {
-    std::__for_each(__lfirst, __llast, __f, __proj);
-  });
+  if constexpr (__is_segmented_iterator_v<_InputIterator>) {
+    using __local_iterator = typename __segmented_iterator_traits<_InputIterator>::__local_iterator;
+    if constexpr (__has_random_access_iterator_category<__local_iterator>::value) {
+      return std::__for_each_n_segment(__first, __orig_n, [&](__local_iterator __lfirst, __local_iterator __llast) {
+        std::__for_each(__lfirst, __llast, __f, __proj);
+      });
+    } else {
+      return std::__for_each(__first, __first + __n, __f, __proj);
+    }
+  } else
+#endif
+  {
+    while (__n > 0) {
+      std::__invoke(__f, std::__invoke(__proj, *__first));
+      ++__first;
+      --__n;
+    }
+    return std::move(__first);
+  }
 }
-#endif // !_LIBCPP_CXX03_LANG
 
 #if _LIBCPP_STD_VER >= 17
 
diff --git a/lib/libcxx/include/__algorithm/for_each_n_segment.h b/lib/libcxx/include/__algorithm/for_each_n_segment.h
index 1b522fb373..a433df5d09 100644
--- a/lib/libcxx/include/__algorithm/for_each_n_segment.h
+++ b/lib/libcxx/include/__algorithm/for_each_n_segment.h
@@ -27,7 +27,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 template <class _SegmentedIterator, class _Size, class _Functor>
 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _SegmentedIterator
 __for_each_n_segment(_SegmentedIterator __first, _Size __orig_n, _Functor __func) {
-  static_assert(__is_segmented_iterator<_SegmentedIterator>::value &&
+  static_assert(__is_segmented_iterator_v<_SegmentedIterator> &&
                     __has_random_access_iterator_category<
                         typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator>::value,
                 "__for_each_n_segment only works with segmented iterators with random-access local iterators");
diff --git a/lib/libcxx/include/__algorithm/for_each_segment.h b/lib/libcxx/include/__algorithm/for_each_segment.h
index 93aa8259b2..c02436c9aa 100644
--- a/lib/libcxx/include/__algorithm/for_each_segment.h
+++ b/lib/libcxx/include/__algorithm/for_each_segment.h
@@ -48,6 +48,32 @@ __for_each_segment(_SegmentedIterator __first, _SegmentedIterator __last, _Funct
   __func(_Traits::__begin(__sfirst), _Traits::__local(__last));
 }
 
+template <class _SegmentedIterator, class _Functor>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void
+__for_each_segment_backward(_SegmentedIterator __first, _SegmentedIterator __last, _Functor __func) {
+  using _Traits = __segmented_iterator_traits<_SegmentedIterator>;
+
+  auto __sfirst = _Traits::__segment(__first);
+  auto __slast  = _Traits::__segment(__last);
+
+  // We are in a single segment, so we might not be at the beginning or end
+  if (__sfirst == __slast) {
+    __func(_Traits::__local(__first), _Traits::__local(__last));
+    return;
+  }
+
+  // We have more than one segment. Iterate over the last segment, since we might not start at the end
+  __func(_Traits::__begin(__slast), _Traits::__local(__last));
+  --__slast;
+  // iterate over the segments which are guaranteed to be completely in the range
+  while (__sfirst != __slast) {
+    __func(_Traits::__begin(__slast), _Traits::__end(__slast));
+    --__slast;
+  }
+  // iterate over the first segment
+  __func(_Traits::__local(__first), _Traits::__end(__slast));
+}
+
 _LIBCPP_END_NAMESPACE_STD
 
 #endif // _LIBCPP___ALGORITHM_FOR_EACH_SEGMENT_H
diff --git a/lib/libcxx/include/__algorithm/generate.h b/lib/libcxx/include/__algorithm/generate.h
index c95b527402..c4cd75cd0a 100644
--- a/lib/libcxx/include/__algorithm/generate.h
+++ b/lib/libcxx/include/__algorithm/generate.h
@@ -9,7 +9,9 @@
 #ifndef _LIBCPP___ALGORITHM_GENERATE_H
 #define _LIBCPP___ALGORITHM_GENERATE_H
 
+#include <__algorithm/for_each.h>
 #include <__config>
+#include <__utility/forward.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
@@ -20,8 +22,8 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 template <class _ForwardIterator, class _Generator>
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
 generate(_ForwardIterator __first, _ForwardIterator __last, _Generator __gen) {
-  for (; __first != __last; ++__first)
-    *__first = __gen();
+  using __iter_ref = decltype(*__first);
+  std::for_each(__first, __last, [&](__iter_ref __element) { std::forward<__iter_ref>(__element) = __gen(); });
 }
 
 _LIBCPP_END_NAMESPACE_STD
diff --git a/lib/libcxx/include/__algorithm/generate_n.h b/lib/libcxx/include/__algorithm/generate_n.h
index f36403fd0f..23899e49e0 100644
--- a/lib/libcxx/include/__algorithm/generate_n.h
+++ b/lib/libcxx/include/__algorithm/generate_n.h
@@ -9,25 +9,38 @@
 #ifndef _LIBCPP___ALGORITHM_GENERATE_N_H
 #define _LIBCPP___ALGORITHM_GENERATE_N_H
 
+#include <__algorithm/for_each_n.h>
 #include <__config>
-#include <__utility/convert_to_integral.h>
+#include <__functional/identity.h>
+#include <__utility/forward.h>
+#include <__utility/move.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
 #endif
 
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
 _LIBCPP_BEGIN_NAMESPACE_STD
 
+template <class _OutputIterator, class _Size, class _Generator>
+inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator
+__generate_n(_OutputIterator __first, _Size __orig_n, _Generator& __gen) {
+  using __iter_ref = decltype(*__first);
+  __identity __proj;
+  auto __f = [&](__iter_ref __element) { std::forward<__iter_ref>(__element) = __gen(); };
+  return std::__for_each_n(std::move(__first), __orig_n, __f, __proj);
+}
+
 template <class _OutputIterator, class _Size, class _Generator>
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator
 generate_n(_OutputIterator __first, _Size __orig_n, _Generator __gen) {
-  typedef decltype(std::__convert_to_integral(__orig_n)) _IntegralSize;
-  _IntegralSize __n = __orig_n;
-  for (; __n > 0; ++__first, (void)--__n)
-    *__first = __gen();
-  return __first;
+  return std::__generate_n(std::move(__first), __orig_n, __gen);
 }
 
 _LIBCPP_END_NAMESPACE_STD
 
+_LIBCPP_POP_MACROS
+
 #endif // _LIBCPP___ALGORITHM_GENERATE_N_H
diff --git a/lib/libcxx/include/__algorithm/is_permutation.h b/lib/libcxx/include/__algorithm/is_permutation.h
index 1afb11596b..86f469c279 100644
--- a/lib/libcxx/include/__algorithm/is_permutation.h
+++ b/lib/libcxx/include/__algorithm/is_permutation.h
@@ -78,7 +78,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __is_permutation_impl(
     _Pred&& __pred,
     _Proj1&& __proj1,
     _Proj2&& __proj2) {
-  using _D1 = __iter_diff_t<_Iter1>;
+  using _D1 = __iterator_difference_type<_Iter1>;
 
   for (auto __i = __first1; __i != __last1; ++__i) {
     //  Have we already counted the number of *__i in [f1, l1)?
@@ -126,7 +126,7 @@ template <class _AlgPolicy, class _ForwardIterator1, class _Sentinel1, class _Fo
     return true;
 
   //  __first1 != __last1 && *__first1 != *__first2
-  using _D1 = __iter_diff_t<_ForwardIterator1>;
+  using _D1 = __iterator_difference_type<_ForwardIterator1>;
   _D1 __l1  = _IterOps<_AlgPolicy>::distance(__first1, __last1);
   if (__l1 == _D1(1))
     return false;
@@ -173,10 +173,10 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __is_permutation(
   if (__first2 == __last2) // Second range is shorter
     return false;
 
-  using _D1 = __iter_diff_t<_Iter1>;
+  using _D1 = __iterator_difference_type<_Iter1>;
   _D1 __l1  = _IterOps<_AlgPolicy>::distance(__first1, __last1);
 
-  using _D2 = __iter_diff_t<_Iter2>;
+  using _D2 = __iterator_difference_type<_Iter2>;
   _D2 __l2  = _IterOps<_AlgPolicy>::distance(__first2, __last2);
   if (__l1 != __l2)
     return false;
diff --git a/lib/libcxx/include/__algorithm/iterator_operations.h b/lib/libcxx/include/__algorithm/iterator_operations.h
index e5c89c1e67..1aa2f8d160 100644
--- a/lib/libcxx/include/__algorithm/iterator_operations.h
+++ b/lib/libcxx/include/__algorithm/iterator_operations.h
@@ -219,6 +219,9 @@ private:
 template <class _AlgPolicy, class _Iter>
 using __policy_iter_diff_t _LIBCPP_NODEBUG = typename _IterOps<_AlgPolicy>::template __difference_type<_Iter>;
 
+template <class _AlgPolicy, class _Iter>
+using __policy_value_type _LIBCPP_NODEBUG = typename _IterOps<_AlgPolicy>::template __value_type<_Iter>;
+
 _LIBCPP_END_NAMESPACE_STD
 
 _LIBCPP_POP_MACROS
diff --git a/lib/libcxx/include/__algorithm/lexicographical_compare.h b/lib/libcxx/include/__algorithm/lexicographical_compare.h
index ebe7e3b56a..a12add69d4 100644
--- a/lib/libcxx/include/__algorithm/lexicographical_compare.h
+++ b/lib/libcxx/include/__algorithm/lexicographical_compare.h
@@ -66,8 +66,8 @@ template <class _Tp,
           class _Proj2,
           class _Comp,
           __enable_if_t<__desugars_to_v<__totally_ordered_less_tag, _Comp, _Tp, _Tp> && !is_volatile<_Tp>::value &&
-                            __libcpp_is_trivially_equality_comparable<_Tp, _Tp>::value &&
-                            __is_identity<_Proj1>::value && __is_identity<_Proj2>::value,
+                            __is_trivially_equality_comparable_v<_Tp, _Tp> && __is_identity<_Proj1>::value &&
+                            __is_identity<_Proj2>::value,
                         int> = 0>
 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
 __lexicographical_compare(_Tp* __first1, _Tp* __last1, _Tp* __first2, _Tp* __last2, _Comp&, _Proj1&, _Proj2&) {
diff --git a/lib/libcxx/include/__algorithm/lexicographical_compare_three_way.h b/lib/libcxx/include/__algorithm/lexicographical_compare_three_way.h
index a5872e90cf..442223e79e 100644
--- a/lib/libcxx/include/__algorithm/lexicographical_compare_three_way.h
+++ b/lib/libcxx/include/__algorithm/lexicographical_compare_three_way.h
@@ -37,13 +37,13 @@ template <class _InputIterator1, class _InputIterator2, class _Cmp>
 _LIBCPP_HIDE_FROM_ABI constexpr auto __lexicographical_compare_three_way_fast_path(
     _InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _InputIterator2 __last2, _Cmp& __comp)
     -> decltype(__comp(*__first1, *__first2)) {
-  static_assert(
-      signed_integral<__iter_diff_t<_InputIterator1>>, "Using a non-integral difference_type is undefined behavior.");
-  static_assert(
-      signed_integral<__iter_diff_t<_InputIterator2>>, "Using a non-integral difference_type is undefined behavior.");
+  static_assert(signed_integral<__iterator_difference_type<_InputIterator1>>,
+                "Using a non-integral difference_type is undefined behavior.");
+  static_assert(signed_integral<__iterator_difference_type<_InputIterator2>>,
+                "Using a non-integral difference_type is undefined behavior.");
 
-  using _Len1   = __iter_diff_t<_InputIterator1>;
-  using _Len2   = __iter_diff_t<_InputIterator2>;
+  using _Len1   = __iterator_difference_type<_InputIterator1>;
+  using _Len2   = __iterator_difference_type<_InputIterator2>;
   using _Common = common_type_t<_Len1, _Len2>;
 
   _Len1 __len1      = __last1 - __first1;
diff --git a/lib/libcxx/include/__algorithm/make_heap.h b/lib/libcxx/include/__algorithm/make_heap.h
index e8f0cdb273..f98a0d2f89 100644
--- a/lib/libcxx/include/__algorithm/make_heap.h
+++ b/lib/libcxx/include/__algorithm/make_heap.h
@@ -12,9 +12,11 @@
 #include <__algorithm/comp.h>
 #include <__algorithm/comp_ref_type.h>
 #include <__algorithm/iterator_operations.h>
+#include <__algorithm/push_heap.h>
 #include <__algorithm/sift_down.h>
 #include <__config>
 #include <__iterator/iterator_traits.h>
+#include <__type_traits/is_arithmetic.h>
 #include <__utility/move.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -31,13 +33,23 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void
 __make_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare&& __comp) {
   __comp_ref_type<_Compare> __comp_ref = __comp;
 
-  using difference_type = typename iterator_traits<_RandomAccessIterator>::difference_type;
-  difference_type __n   = __last - __first;
+  using __diff_t     = __iterator_difference_type<_RandomAccessIterator>;
+  const __diff_t __n = __last - __first;
+
+  const bool __assume_both_children = is_arithmetic<__iterator_value_type<_RandomAccessIterator> >::value;
+
+  // While it would be correct to always assume we have both children, in practice we observed this to be a performance
+  // improvement only for arithmetic types.
+  const __diff_t __sift_down_n = __assume_both_children ? ((__n & 1) ? __n : __n - 1) : __n;
+
   if (__n > 1) {
     // start from the first parent, there is no need to consider children
-    for (difference_type __start = (__n - 2) / 2; __start >= 0; --__start) {
-      std::__sift_down<_AlgPolicy>(__first, __comp_ref, __n, __first + __start);
+
+    for (__diff_t __start = (__sift_down_n - 2) / 2; __start >= 0; --__start) {
+      std::__sift_down<_AlgPolicy, __assume_both_children>(__first, __comp_ref, __sift_down_n, __start);
     }
+    if _LIBCPP_CONSTEXPR (__assume_both_children)
+      std::__sift_up<_AlgPolicy>(__first, __last, __comp, __n);
   }
 }
 
diff --git a/lib/libcxx/include/__algorithm/mismatch.h b/lib/libcxx/include/__algorithm/mismatch.h
index a6836792c0..7111cd9398 100644
--- a/lib/libcxx/include/__algorithm/mismatch.h
+++ b/lib/libcxx/include/__algorithm/mismatch.h
@@ -60,7 +60,7 @@ __mismatch(_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Pred& __pred, _Pro
 template <class _Iter>
 [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Iter, _Iter>
 __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
-  using __value_type              = __iter_value_type<_Iter>;
+  using __value_type              = __iterator_value_type<_Iter>;
   constexpr size_t __unroll_count = 4;
   constexpr size_t __vec_size     = __native_vector_size<__value_type>;
   using __vec                     = __simd_vector<__value_type, __vec_size>;
@@ -136,7 +136,7 @@ template <class _Tp,
           class _Proj2,
           __enable_if_t<!is_integral<_Tp>::value && __desugars_to_v<__equal_tag, _Pred, _Tp, _Tp> &&
                             __is_identity<_Proj1>::value && __is_identity<_Proj2>::value &&
-                            __can_map_to_integer_v<_Tp> && __libcpp_is_trivially_equality_comparable<_Tp, _Tp>::value,
+                            __can_map_to_integer_v<_Tp> && __is_trivially_equality_comparable_v<_Tp, _Tp>,
                         int> = 0>
 [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Tp*, _Tp*>
 __mismatch(_Tp* __first1, _Tp* __last1, _Tp* __first2, _Pred& __pred, _Proj1& __proj1, _Proj2& __proj2) {
diff --git a/lib/libcxx/include/__algorithm/move.h b/lib/libcxx/include/__algorithm/move.h
index a3320e9f19..ddadfa778f 100644
--- a/lib/libcxx/include/__algorithm/move.h
+++ b/lib/libcxx/include/__algorithm/move.h
@@ -50,37 +50,26 @@ struct __move_impl {
     return std::make_pair(std::move(__first), std::move(__result));
   }
 
-  template <class _InIter, class _OutIter>
-  struct _MoveSegment {
-    using _Traits _LIBCPP_NODEBUG = __segmented_iterator_traits<_InIter>;
-
-    _OutIter& __result_;
-
-    _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 explicit _MoveSegment(_OutIter& __result)
-        : __result_(__result) {}
-
-    _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void
-    operator()(typename _Traits::__local_iterator __lfirst, typename _Traits::__local_iterator __llast) {
-      __result_ = std::__move<_AlgPolicy>(__lfirst, __llast, std::move(__result_)).second;
-    }
-  };
-
-  template <class _InIter, class _OutIter, __enable_if_t<__is_segmented_iterator<_InIter>::value, int> = 0>
+  template <class _InIter, class _OutIter, __enable_if_t<__is_segmented_iterator_v<_InIter>, int> = 0>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter>
   operator()(_InIter __first, _InIter __last, _OutIter __result) const {
-    std::__for_each_segment(__first, __last, _MoveSegment<_InIter, _OutIter>(__result));
+    using __local_iterator = typename __segmented_iterator_traits<_InIter>::__local_iterator;
+    std::__for_each_segment(__first, __last, [&__result](__local_iterator __lfirst, __local_iterator __llast) {
+      __result = std::__move<_AlgPolicy>(__lfirst, __llast, std::move(__result)).second;
+    });
     return std::make_pair(__last, std::move(__result));
   }
 
   template <class _InIter,
             class _OutIter,
             __enable_if_t<__has_random_access_iterator_category<_InIter>::value &&
-                              !__is_segmented_iterator<_InIter>::value && __is_segmented_iterator<_OutIter>::value,
+                              !__is_segmented_iterator_v<_InIter> && __is_segmented_iterator_v<_OutIter>,
                           int> = 0>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter>
   operator()(_InIter __first, _InIter __last, _OutIter __result) const {
     using _Traits = __segmented_iterator_traits<_OutIter>;
-    using _DiffT  = typename common_type<__iter_diff_t<_InIter>, __iter_diff_t<_OutIter> >::type;
+    using _DiffT =
+        typename common_type<__iterator_difference_type<_InIter>, __iterator_difference_type<_OutIter> >::type;
 
     if (__first == __last)
       return std::make_pair(std::move(__first), std::move(__result));
diff --git a/lib/libcxx/include/__algorithm/move_backward.h b/lib/libcxx/include/__algorithm/move_backward.h
index 14482fee18..43b72057a5 100644
--- a/lib/libcxx/include/__algorithm/move_backward.h
+++ b/lib/libcxx/include/__algorithm/move_backward.h
@@ -11,6 +11,7 @@
 
 #include <__algorithm/copy_backward.h>
 #include <__algorithm/copy_move_common.h>
+#include <__algorithm/for_each_segment.h>
 #include <__algorithm/iterator_operations.h>
 #include <__algorithm/min.h>
 #include <__config>
@@ -51,42 +52,26 @@ struct __move_backward_impl {
     return std::make_pair(std::move(__original_last_iter), std::move(__result));
   }
 
-  template <class _InIter, class _OutIter, __enable_if_t<__is_segmented_iterator<_InIter>::value, int> = 0>
+  template <class _InIter, class _OutIter, __enable_if_t<__is_segmented_iterator_v<_InIter>, int> = 0>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter>
   operator()(_InIter __first, _InIter __last, _OutIter __result) const {
-    using _Traits = __segmented_iterator_traits<_InIter>;
-    auto __sfirst = _Traits::__segment(__first);
-    auto __slast  = _Traits::__segment(__last);
-    if (__sfirst == __slast) {
-      auto __iters =
-          std::__move_backward<_AlgPolicy>(_Traits::__local(__first), _Traits::__local(__last), std::move(__result));
-      return std::make_pair(__last, __iters.second);
-    }
-
-    __result =
-        std::__move_backward<_AlgPolicy>(_Traits::__begin(__slast), _Traits::__local(__last), std::move(__result))
-            .second;
-    --__slast;
-    while (__sfirst != __slast) {
-      __result =
-          std::__move_backward<_AlgPolicy>(_Traits::__begin(__slast), _Traits::__end(__slast), std::move(__result))
-              .second;
-      --__slast;
-    }
-    __result = std::__move_backward<_AlgPolicy>(_Traits::__local(__first), _Traits::__end(__slast), std::move(__result))
-                   .second;
+    using __local_iterator = typename __segmented_iterator_traits<_InIter>::__local_iterator;
+    std::__for_each_segment_backward(__first, __last, [&__result](__local_iterator __lfirst, __local_iterator __llast) {
+      __result = std::__move_backward<_AlgPolicy>(std::move(__lfirst), std::move(__llast), std::move(__result)).second;
+    });
     return std::make_pair(__last, std::move(__result));
   }
 
   template <class _InIter,
             class _OutIter,
             __enable_if_t<__has_random_access_iterator_category<_InIter>::value &&
-                              !__is_segmented_iterator<_InIter>::value && __is_segmented_iterator<_OutIter>::value,
+                              !__is_segmented_iterator_v<_InIter> && __is_segmented_iterator_v<_OutIter>,
                           int> = 0>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter>
   operator()(_InIter __first, _InIter __last, _OutIter __result) const {
     using _Traits = __segmented_iterator_traits<_OutIter>;
-    using _DiffT  = typename common_type<__iter_diff_t<_InIter>, __iter_diff_t<_OutIter> >::type;
+    using _DiffT =
+        typename common_type<__iterator_difference_type<_InIter>, __iterator_difference_type<_OutIter> >::type;
 
     // When the range contains no elements, __result might not be a valid iterator
     if (__first == __last)
diff --git a/lib/libcxx/include/__algorithm/none_of.h b/lib/libcxx/include/__algorithm/none_of.h
index e6bd197622..1e1c8d1aad 100644
--- a/lib/libcxx/include/__algorithm/none_of.h
+++ b/lib/libcxx/include/__algorithm/none_of.h
@@ -10,7 +10,9 @@
 #ifndef _LIBCPP___ALGORITHM_NONE_OF_H
 #define _LIBCPP___ALGORITHM_NONE_OF_H
 
+#include <__algorithm/any_of.h>
 #include <__config>
+#include <__functional/identity.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
@@ -21,10 +23,8 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 template <class _InputIterator, class _Predicate>
 [[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
 none_of(_InputIterator __first, _InputIterator __last, _Predicate __pred) {
-  for (; __first != __last; ++__first)
-    if (__pred(*__first))
-      return false;
-  return true;
+  __identity __proj;
+  return !std::__any_of(__first, __last, __pred, __proj);
 }
 
 _LIBCPP_END_NAMESPACE_STD
diff --git a/lib/libcxx/include/__algorithm/partial_sort.h b/lib/libcxx/include/__algorithm/partial_sort.h
index 7f8d0c4914..4b39ae0cf2 100644
--- a/lib/libcxx/include/__algorithm/partial_sort.h
+++ b/lib/libcxx/include/__algorithm/partial_sort.h
@@ -45,7 +45,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandomAccessIterator __part
   for (; __i != __last; ++__i) {
     if (__comp(*__i, *__first)) {
       _IterOps<_AlgPolicy>::iter_swap(__i, __first);
-      std::__sift_down<_AlgPolicy>(__first, __comp, __len, __first);
+      std::__sift_down<_AlgPolicy, false>(__first, __comp, __len, 0);
     }
   }
   std::__sort_heap<_AlgPolicy>(std::move(__first), std::move(__middle), __comp);
diff --git a/lib/libcxx/include/__algorithm/partial_sort_copy.h b/lib/libcxx/include/__algorithm/partial_sort_copy.h
index 172f53b290..2230dfc9cc 100644
--- a/lib/libcxx/include/__algorithm/partial_sort_copy.h
+++ b/lib/libcxx/include/__algorithm/partial_sort_copy.h
@@ -60,7 +60,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator, _Random
     for (; __first != __last; ++__first)
       if (std::__invoke(__comp, std::__invoke(__proj1, *__first), std::__invoke(__proj2, *__result_first))) {
         *__result_first = *__first;
-        std::__sift_down<_AlgPolicy>(__result_first, __projected_comp, __len, __result_first);
+        std::__sift_down<_AlgPolicy, false>(__result_first, __projected_comp, __len, 0);
       }
     std::__sort_heap<_AlgPolicy>(__result_first, __r, __projected_comp);
   }
diff --git a/lib/libcxx/include/__algorithm/pstl.h b/lib/libcxx/include/__algorithm/pstl.h
index aa7b49de93..7169dd85df 100644
--- a/lib/libcxx/include/__algorithm/pstl.h
+++ b/lib/libcxx/include/__algorithm/pstl.h
@@ -115,7 +115,7 @@ template <class _ExecutionPolicy,
           class _Predicate,
           class _RawPolicy                                    = __remove_cvref_t<_ExecutionPolicy>,
           enable_if_t<is_execution_policy_v<_RawPolicy>, int> = 0>
-_LIBCPP_HIDE_FROM_ABI __iter_diff_t<_ForwardIterator>
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI __iterator_difference_type<_ForwardIterator>
 count_if(_ExecutionPolicy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) {
   _LIBCPP_REQUIRE_CPP17_FORWARD_ITERATOR(
       _ForwardIterator, "count_if(first, last, pred) requires [first, last) to be ForwardIterators");
@@ -129,7 +129,7 @@ template <class _ExecutionPolicy,
           class _Tp,
           class _RawPolicy                                    = __remove_cvref_t<_ExecutionPolicy>,
           enable_if_t<is_execution_policy_v<_RawPolicy>, int> = 0>
-_LIBCPP_HIDE_FROM_ABI __iter_diff_t<_ForwardIterator>
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI __iterator_difference_type<_ForwardIterator>
 count(_ExecutionPolicy&& __policy, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) {
   _LIBCPP_REQUIRE_CPP17_FORWARD_ITERATOR(
       _ForwardIterator, "count(first, last, val) requires [first, last) to be ForwardIterators");
@@ -144,7 +144,7 @@ template <class _ExecutionPolicy,
           class _Pred,
           class _RawPolicy                                    = __remove_cvref_t<_ExecutionPolicy>,
           enable_if_t<is_execution_policy_v<_RawPolicy>, int> = 0>
-_LIBCPP_HIDE_FROM_ABI bool
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool
 equal(_ExecutionPolicy&& __policy,
       _ForwardIterator1 __first1,
       _ForwardIterator1 __last1,
@@ -166,7 +166,7 @@ template <class _ExecutionPolicy,
           class _ForwardIterator2,
           class _RawPolicy                                    = __remove_cvref_t<_ExecutionPolicy>,
           enable_if_t<is_execution_policy_v<_RawPolicy>, int> = 0>
-_LIBCPP_HIDE_FROM_ABI bool
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool
 equal(_ExecutionPolicy&& __policy, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2) {
   _LIBCPP_REQUIRE_CPP17_FORWARD_ITERATOR(_ForwardIterator1, "equal requires ForwardIterators");
   _LIBCPP_REQUIRE_CPP17_FORWARD_ITERATOR(_ForwardIterator2, "equal requires ForwardIterators");
@@ -185,7 +185,7 @@ template <class _ExecutionPolicy,
           class _Pred,
           class _RawPolicy                                    = __remove_cvref_t<_ExecutionPolicy>,
           enable_if_t<is_execution_policy_v<_RawPolicy>, int> = 0>
-_LIBCPP_HIDE_FROM_ABI bool
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool
 equal(_ExecutionPolicy&& __policy,
       _ForwardIterator1 __first1,
       _ForwardIterator1 __last1,
@@ -209,7 +209,7 @@ template <class _ExecutionPolicy,
           class _ForwardIterator2,
           class _RawPolicy                                    = __remove_cvref_t<_ExecutionPolicy>,
           enable_if_t<is_execution_policy_v<_RawPolicy>, int> = 0>
-_LIBCPP_HIDE_FROM_ABI bool
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool
 equal(_ExecutionPolicy&& __policy,
       _ForwardIterator1 __first1,
       _ForwardIterator1 __last1,
@@ -259,7 +259,7 @@ template <class _ExecutionPolicy,
           class _Predicate,
           class _RawPolicy                                    = __remove_cvref_t<_ExecutionPolicy>,
           enable_if_t<is_execution_policy_v<_RawPolicy>, int> = 0>
-_LIBCPP_HIDE_FROM_ABI _ForwardIterator
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI _ForwardIterator
 find_if(_ExecutionPolicy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) {
   _LIBCPP_REQUIRE_CPP17_FORWARD_ITERATOR(_ForwardIterator, "find_if requires ForwardIterators");
   using _Implementation = __pstl::__dispatch<__pstl::__find_if, __pstl::__current_configuration, _RawPolicy>;
@@ -272,7 +272,7 @@ template <class _ExecutionPolicy,
           class _Predicate,
           class _RawPolicy                                    = __remove_cvref_t<_ExecutionPolicy>,
           enable_if_t<is_execution_policy_v<_RawPolicy>, int> = 0>
-_LIBCPP_HIDE_FROM_ABI _ForwardIterator
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI _ForwardIterator
 find_if_not(_ExecutionPolicy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) {
   _LIBCPP_REQUIRE_CPP17_FORWARD_ITERATOR(_ForwardIterator, "find_if_not requires ForwardIterators");
   using _Implementation = __pstl::__dispatch<__pstl::__find_if_not, __pstl::__current_configuration, _RawPolicy>;
@@ -285,7 +285,7 @@ template <class _ExecutionPolicy,
           class _Tp,
           class _RawPolicy                                    = __remove_cvref_t<_ExecutionPolicy>,
           enable_if_t<is_execution_policy_v<_RawPolicy>, int> = 0>
-_LIBCPP_HIDE_FROM_ABI _ForwardIterator
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI _ForwardIterator
 find(_ExecutionPolicy&& __policy, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) {
   _LIBCPP_REQUIRE_CPP17_FORWARD_ITERATOR(_ForwardIterator, "find requires ForwardIterators");
   using _Implementation = __pstl::__dispatch<__pstl::__find, __pstl::__current_configuration, _RawPolicy>;
diff --git a/lib/libcxx/include/__algorithm/radix_sort.h b/lib/libcxx/include/__algorithm/radix_sort.h
index 055d8a0765..5549a69f5e 100644
--- a/lib/libcxx/include/__algorithm/radix_sort.h
+++ b/lib/libcxx/include/__algorithm/radix_sort.h
@@ -72,14 +72,14 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 #if _LIBCPP_STD_VER >= 14
 
 template <class _InputIterator, class _OutputIterator>
-_LIBCPP_HIDE_FROM_ABI constexpr pair<_OutputIterator, __iter_value_type<_InputIterator>>
+_LIBCPP_HIDE_FROM_ABI constexpr pair<_OutputIterator, __iterator_value_type<_InputIterator>>
 __partial_sum_max(_InputIterator __first, _InputIterator __last, _OutputIterator __result) {
   if (__first == __last)
     return {__result, 0};
 
-  auto __max                              = *__first;
-  __iter_value_type<_InputIterator> __sum = *__first;
-  *__result                               = __sum;
+  auto __max                                  = *__first;
+  __iterator_value_type<_InputIterator> __sum = *__first;
+  *__result                                   = __sum;
 
   while (++__first != __last) {
     if (__max < *__first) {
@@ -124,7 +124,7 @@ _LIBCPP_HIDE_FROM_ABI constexpr auto __nth_radix(size_t __radix_number, _Radix _
 template <class _ForwardIterator, class _Map, class _RandomAccessIterator>
 _LIBCPP_HIDE_FROM_ABI constexpr void
 __collect(_ForwardIterator __first, _ForwardIterator __last, _Map __map, _RandomAccessIterator __counters) {
-  using __value_type = __iter_value_type<_ForwardIterator>;
+  using __value_type = __iterator_value_type<_ForwardIterator>;
   using __traits     = __counting_sort_traits<__value_type, _Map>;
 
   std::for_each(__first, __last, [&__counters, &__map](const auto& __preimage) { ++__counters[__map(__preimage)]; });
@@ -160,7 +160,7 @@ _LIBCPP_HIDE_FROM_ABI constexpr bool __collect_impl(
     _RandomAccessIterator1 __counters,
     _RandomAccessIterator2 __maximums,
     index_sequence<_Radices...>) {
-  using __value_type                 = __iter_value_type<_ForwardIterator>;
+  using __value_type                 = __iterator_value_type<_ForwardIterator>;
   constexpr auto __radix_value_range = __radix_sort_traits<__value_type, _Map, _Radix>::__radix_value_range;
 
   auto __previous  = numeric_limits<__invoke_result_t<_Map, __value_type>>::min();
@@ -189,7 +189,7 @@ __collect(_ForwardIterator __first,
           _Radix __radix,
           _RandomAccessIterator1 __counters,
           _RandomAccessIterator2 __maximums) {
-  using __value_type           = __iter_value_type<_ForwardIterator>;
+  using __value_type           = __iterator_value_type<_ForwardIterator>;
   constexpr auto __radix_count = __radix_sort_traits<__value_type, _Map, _Radix>::__radix_count;
   return std::__collect_impl(
       __first, __last, __map, __radix, __counters, __maximums, make_index_sequence<__radix_count>());
@@ -213,10 +213,10 @@ _LIBCPP_HIDE_FROM_ABI constexpr void __dispose_backward(
 template <class _ForwardIterator, class _RandomAccessIterator, class _Map>
 _LIBCPP_HIDE_FROM_ABI constexpr _RandomAccessIterator
 __counting_sort_impl(_ForwardIterator __first, _ForwardIterator __last, _RandomAccessIterator __result, _Map __map) {
-  using __value_type = __iter_value_type<_ForwardIterator>;
+  using __value_type = __iterator_value_type<_ForwardIterator>;
   using __traits     = __counting_sort_traits<__value_type, _Map>;
 
-  __iter_diff_t<_RandomAccessIterator> __counters[__traits::__value_range + 1] = {0};
+  __iterator_difference_type<_RandomAccessIterator> __counters[__traits::__value_range + 1] = {0};
 
   std::__collect(__first, __last, __map, std::next(std::begin(__counters)));
   std::__dispose(__first, __last, __result, __map, std::begin(__counters));
@@ -224,12 +224,13 @@ __counting_sort_impl(_ForwardIterator __first, _ForwardIterator __last, _RandomA
   return __result + __counters[__traits::__value_range];
 }
 
-template <class _RandomAccessIterator1,
-          class _RandomAccessIterator2,
-          class _Map,
-          class _Radix,
-          enable_if_t< __radix_sort_traits<__iter_value_type<_RandomAccessIterator1>, _Map, _Radix>::__radix_count == 1,
-                       int> = 0>
+template <
+    class _RandomAccessIterator1,
+    class _RandomAccessIterator2,
+    class _Map,
+    class _Radix,
+    enable_if_t<__radix_sort_traits<__iterator_value_type<_RandomAccessIterator1>, _Map, _Radix>::__radix_count == 1,
+                int> = 0>
 _LIBCPP_HIDE_FROM_ABI constexpr void __radix_sort_impl(
     _RandomAccessIterator1 __first,
     _RandomAccessIterator1 __last,
@@ -243,24 +244,25 @@ _LIBCPP_HIDE_FROM_ABI constexpr void __radix_sort_impl(
   std::move(__buffer, __buffer_end, __first);
 }
 
-template <
-    class _RandomAccessIterator1,
-    class _RandomAccessIterator2,
-    class _Map,
-    class _Radix,
-    enable_if_t< __radix_sort_traits<__iter_value_type<_RandomAccessIterator1>, _Map, _Radix>::__radix_count % 2 == 0,
-                 int> = 0 >
+template <class _RandomAccessIterator1,
+          class _RandomAccessIterator2,
+          class _Map,
+          class _Radix,
+          enable_if_t<
+              __radix_sort_traits<__iterator_value_type<_RandomAccessIterator1>, _Map, _Radix>::__radix_count % 2 == 0,
+              int> = 0>
 _LIBCPP_HIDE_FROM_ABI constexpr void __radix_sort_impl(
     _RandomAccessIterator1 __first,
     _RandomAccessIterator1 __last,
     _RandomAccessIterator2 __buffer_begin,
     _Map __map,
     _Radix __radix) {
-  using __value_type = __iter_value_type<_RandomAccessIterator1>;
+  using __value_type = __iterator_value_type<_RandomAccessIterator1>;
   using __traits     = __radix_sort_traits<__value_type, _Map, _Radix>;
 
-  __iter_diff_t<_RandomAccessIterator1> __counters[__traits::__radix_count][__traits::__radix_value_range] = {{0}};
-  __iter_diff_t<_RandomAccessIterator1> __maximums[__traits::__radix_count]                                = {0};
+  __iterator_difference_type<_RandomAccessIterator1>
+      __counters[__traits::__radix_count][__traits::__radix_value_range]                 = {{0}};
+  __iterator_difference_type<_RandomAccessIterator1> __maximums[__traits::__radix_count] = {0};
   const auto __is_sorted = std::__collect(__first, __last, __map, __radix, __counters, __maximums);
   if (!__is_sorted) {
     const auto __range_size = std::distance(__first, __last);
diff --git a/lib/libcxx/include/__algorithm/ranges_copy_n.h b/lib/libcxx/include/__algorithm/ranges_copy_n.h
index 1fbc61674e..6bee4c3e7c 100644
--- a/lib/libcxx/include/__algorithm/ranges_copy_n.h
+++ b/lib/libcxx/include/__algorithm/ranges_copy_n.h
@@ -9,16 +9,12 @@
 #ifndef _LIBCPP___ALGORITHM_RANGES_COPY_N_H
 #define _LIBCPP___ALGORITHM_RANGES_COPY_N_H
 
-#include <__algorithm/copy.h>
+#include <__algorithm/copy_n.h>
 #include <__algorithm/in_out_result.h>
 #include <__algorithm/iterator_operations.h>
-#include <__algorithm/ranges_copy.h>
 #include <__config>
-#include <__functional/identity.h>
 #include <__iterator/concepts.h>
 #include <__iterator/incrementable_traits.h>
-#include <__iterator/unreachable_sentinel.h>
-#include <__iterator/wrap_iter.h>
 #include <__utility/move.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -37,32 +33,13 @@ namespace ranges {
 template <class _Ip, class _Op>
 using copy_n_result = in_out_result<_Ip, _Op>;
 
-// TODO: Merge this with copy_n
 struct __copy_n {
-  template <class _InIter, class _DiffType, class _OutIter>
-  _LIBCPP_HIDE_FROM_ABI constexpr static copy_n_result<_InIter, _OutIter>
-  __go(_InIter __first, _DiffType __n, _OutIter __result) {
-    while (__n != 0) {
-      *__result = *__first;
-      ++__first;
-      ++__result;
-      --__n;
-    }
-    return {std::move(__first), std::move(__result)};
-  }
-
-  template <random_access_iterator _InIter, class _DiffType, random_access_iterator _OutIter>
-  _LIBCPP_HIDE_FROM_ABI constexpr static copy_n_result<_InIter, _OutIter>
-  __go(_InIter __first, _DiffType __n, _OutIter __result) {
-    auto __ret = std::__copy(__first, __first + __n, __result);
-    return {__ret.first, __ret.second};
-  }
-
   template <input_iterator _Ip, weakly_incrementable _Op>
     requires indirectly_copyable<_Ip, _Op>
   _LIBCPP_HIDE_FROM_ABI constexpr copy_n_result<_Ip, _Op>
   operator()(_Ip __first, iter_difference_t<_Ip> __n, _Op __result) const {
-    return __go(std::move(__first), __n, std::move(__result));
+    auto __res = std::__copy_n<_RangeAlgPolicy>(std::move(__first), __n, std::move(__result));
+    return {std::move(__res.first), std::move(__res.second)};
   }
 };
 
diff --git a/lib/libcxx/include/__algorithm/ranges_equal.h b/lib/libcxx/include/__algorithm/ranges_equal.h
index c26d13f002..8eb2fc1017 100644
--- a/lib/libcxx/include/__algorithm/ranges_equal.h
+++ b/lib/libcxx/include/__algorithm/ranges_equal.h
@@ -13,13 +13,12 @@
 #include <__algorithm/unwrap_range.h>
 #include <__config>
 #include <__functional/identity.h>
-#include <__functional/invoke.h>
 #include <__functional/ranges_operations.h>
 #include <__iterator/concepts.h>
-#include <__iterator/distance.h>
 #include <__iterator/indirectly_comparable.h>
 #include <__ranges/access.h>
 #include <__ranges/concepts.h>
+#include <__ranges/size.h>
 #include <__utility/move.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -51,20 +50,17 @@ struct __equal {
       _Pred __pred   = {},
       _Proj1 __proj1 = {},
       _Proj2 __proj2 = {}) const {
-    if constexpr (sized_sentinel_for<_Sent1, _Iter1> && sized_sentinel_for<_Sent2, _Iter2>) {
+    static constexpr bool __both_sized = sized_sentinel_for<_Sent1, _Iter1> && sized_sentinel_for<_Sent2, _Iter2>;
+    if constexpr (__both_sized) {
       if (__last1 - __first1 != __last2 - __first2)
         return false;
     }
-    auto __unwrapped1 = std::__unwrap_range(std::move(__first1), std::move(__last1));
-    auto __unwrapped2 = std::__unwrap_range(std::move(__first2), std::move(__last2));
-    return std::__equal_impl(
-        std::move(__unwrapped1.first),
-        std::move(__unwrapped1.second),
-        std::move(__unwrapped2.first),
-        std::move(__unwrapped2.second),
-        __pred,
-        __proj1,
-        __proj2);
+
+    auto [__ufirst1, __ulast1] = std::__unwrap_range(std::move(__first1), std::move(__last1));
+    auto [__ufirst2, __ulast2] = std::__unwrap_range(std::move(__first2), std::move(__last2));
+
+    return std::__equal_impl<__both_sized>(
+        std::move(__ufirst1), std::move(__ulast1), std::move(__ufirst2), std::move(__ulast2), __pred, __proj1, __proj2);
   }
 
   template <input_range _Range1,
@@ -75,21 +71,16 @@ struct __equal {
     requires indirectly_comparable<iterator_t<_Range1>, iterator_t<_Range2>, _Pred, _Proj1, _Proj2>
   [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(
       _Range1&& __range1, _Range2&& __range2, _Pred __pred = {}, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const {
-    if constexpr (sized_range<_Range1> && sized_range<_Range2>) {
-      if (ranges::distance(__range1) != ranges::distance(__range2))
+    static constexpr bool __both_sized = sized_range<_Range1> && sized_range<_Range2>;
+    if constexpr (__both_sized) {
+      if (ranges::size(__range1) != ranges::size(__range2))
         return false;
     }
-    auto __unwrapped1 = std::__unwrap_range(ranges::begin(__range1), ranges::end(__range1));
-    auto __unwrapped2 = std::__unwrap_range(ranges::begin(__range2), ranges::end(__range2));
-    return std::__equal_impl(
-        std::move(__unwrapped1.first),
-        std::move(__unwrapped1.second),
-        std::move(__unwrapped2.first),
-        std::move(__unwrapped2.second),
-        __pred,
-        __proj1,
-        __proj2);
-    return false;
+
+    auto [__ufirst1, __ulast1] = std::__unwrap_range(ranges::begin(__range1), ranges::end(__range1));
+    auto [__ufirst2, __ulast2] = std::__unwrap_range(ranges::begin(__range2), ranges::end(__range2));
+    return std::__equal_impl<__both_sized>(
+        std::move(__ufirst1), std::move(__ulast1), std::move(__ufirst2), std::move(__ulast2), __pred, __proj1, __proj2);
   }
 };
 
diff --git a/lib/libcxx/include/__algorithm/ranges_fill.h b/lib/libcxx/include/__algorithm/ranges_fill.h
index c248009f98..814ae6363f 100644
--- a/lib/libcxx/include/__algorithm/ranges_fill.h
+++ b/lib/libcxx/include/__algorithm/ranges_fill.h
@@ -9,12 +9,14 @@
 #ifndef _LIBCPP___ALGORITHM_RANGES_FILL_H
 #define _LIBCPP___ALGORITHM_RANGES_FILL_H
 
-#include <__algorithm/ranges_fill_n.h>
+#include <__algorithm/fill.h>
+#include <__algorithm/fill_n.h>
 #include <__config>
 #include <__iterator/concepts.h>
 #include <__ranges/access.h>
 #include <__ranges/concepts.h>
 #include <__ranges/dangling.h>
+#include <__utility/move.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
@@ -31,12 +33,11 @@ namespace ranges {
 struct __fill {
   template <class _Type, output_iterator<const _Type&> _Iter, sentinel_for<_Iter> _Sent>
   _LIBCPP_HIDE_FROM_ABI constexpr _Iter operator()(_Iter __first, _Sent __last, const _Type& __value) const {
-    if constexpr (random_access_iterator<_Iter> && sized_sentinel_for<_Sent, _Iter>) {
-      return ranges::fill_n(__first, __last - __first, __value);
+    if constexpr (sized_sentinel_for<_Sent, _Iter>) {
+      auto __n = __last - __first;
+      return std::__fill_n(std::move(__first), __n, __value);
     } else {
-      for (; __first != __last; ++__first)
-        *__first = __value;
-      return __first;
+      return std::__fill(std::move(__first), std::move(__last), __value);
     }
   }
 
diff --git a/lib/libcxx/include/__algorithm/ranges_for_each.h b/lib/libcxx/include/__algorithm/ranges_for_each.h
index e9c84e8583..7a547fb269 100644
--- a/lib/libcxx/include/__algorithm/ranges_for_each.h
+++ b/lib/libcxx/include/__algorithm/ranges_for_each.h
@@ -12,6 +12,7 @@
 #include <__algorithm/for_each.h>
 #include <__algorithm/for_each_n.h>
 #include <__algorithm/in_fun_result.h>
+#include <__algorithm/specialized_algorithms.h>
 #include <__concepts/assignable.h>
 #include <__config>
 #include <__functional/identity.h>
@@ -20,6 +21,7 @@
 #include <__ranges/access.h>
 #include <__ranges/concepts.h>
 #include <__ranges/dangling.h>
+#include <__type_traits/remove_cvref.h>
 #include <__utility/move.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -71,7 +73,13 @@ public:
             indirectly_unary_invocable<projected<iterator_t<_Range>, _Proj>> _Func>
   _LIBCPP_HIDE_FROM_ABI constexpr for_each_result<borrowed_iterator_t<_Range>, _Func>
   operator()(_Range&& __range, _Func __func, _Proj __proj = {}) const {
-    return __for_each_impl(ranges::begin(__range), ranges::end(__range), __func, __proj);
+    using _SpecialAlg = __specialized_algorithm<_Algorithm::__for_each, __single_range<remove_cvref_t<_Range>>>;
+    if constexpr (_SpecialAlg::__has_algorithm) {
+      auto [__iter, __func2] = _SpecialAlg()(__range, std::move(__func), std::move(__proj));
+      return {std::move(__iter), std::move(__func)};
+    } else {
+      return __for_each_impl(ranges::begin(__range), ranges::end(__range), __func, __proj);
+    }
   }
 };
 
diff --git a/lib/libcxx/include/__algorithm/ranges_generate_n.h b/lib/libcxx/include/__algorithm/ranges_generate_n.h
index a318994d0e..0cc9ce7b11 100644
--- a/lib/libcxx/include/__algorithm/ranges_generate_n.h
+++ b/lib/libcxx/include/__algorithm/ranges_generate_n.h
@@ -9,6 +9,7 @@
 #ifndef _LIBCPP___ALGORITHM_RANGES_GENERATE_N_H
 #define _LIBCPP___ALGORITHM_RANGES_GENERATE_N_H
 
+#include <__algorithm/generate_n.h>
 #include <__concepts/constructible.h>
 #include <__concepts/invocable.h>
 #include <__config>
@@ -38,12 +39,7 @@ struct __generate_n {
     requires invocable<_Func&> && indirectly_writable<_OutIter, invoke_result_t<_Func&>>
   _LIBCPP_HIDE_FROM_ABI constexpr _OutIter
   operator()(_OutIter __first, iter_difference_t<_OutIter> __n, _Func __gen) const {
-    for (; __n > 0; --__n) {
-      *__first = __gen();
-      ++__first;
-    }
-
-    return __first;
+    return std::__generate_n(std::move(__first), __n, __gen);
   }
 };
 
diff --git a/lib/libcxx/include/__algorithm/ranges_search_n.h b/lib/libcxx/include/__algorithm/ranges_search_n.h
index 81b568c096..746bfcc3d1 100644
--- a/lib/libcxx/include/__algorithm/ranges_search_n.h
+++ b/lib/libcxx/include/__algorithm/ranges_search_n.h
@@ -54,8 +54,8 @@ struct __search_n {
       }
 
       if constexpr (random_access_iterator<_Iter1>) {
-        auto __ret = std::__search_n_random_access_impl<_RangeAlgPolicy>(
-            __first, __last, __count, __value, __pred, __proj, __size);
+        auto __ret =
+            std::__search_n_random_access_impl<_RangeAlgPolicy>(__first, __count, __value, __pred, __proj, __size);
         return {std::move(__ret.first), std::move(__ret.second)};
       }
     }
diff --git a/lib/libcxx/include/__algorithm/rotate.h b/lib/libcxx/include/__algorithm/rotate.h
index c676980f0c..b6d9eb3b2d 100644
--- a/lib/libcxx/include/__algorithm/rotate.h
+++ b/lib/libcxx/include/__algorithm/rotate.h
@@ -12,16 +12,13 @@
 #include <__algorithm/copy.h>
 #include <__algorithm/copy_backward.h>
 #include <__algorithm/iterator_operations.h>
+#include <__algorithm/min.h>
 #include <__algorithm/move.h>
 #include <__algorithm/move_backward.h>
 #include <__algorithm/swap_ranges.h>
 #include <__config>
-#include <__cstddef/size_t.h>
 #include <__fwd/bit_reference.h>
 #include <__iterator/iterator_traits.h>
-#include <__memory/construct_at.h>
-#include <__memory/pointer_traits.h>
-#include <__type_traits/is_constant_evaluated.h>
 #include <__type_traits/is_trivially_assignable.h>
 #include <__utility/move.h>
 #include <__utility/pair.h>
@@ -89,46 +86,32 @@ __rotate_forward(_ForwardIterator __first, _ForwardIterator __middle, _ForwardIt
   return __r;
 }
 
-template <typename _Integral>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 _Integral __algo_gcd(_Integral __x, _Integral __y) {
-  do {
-    _Integral __t = __x % __y;
-    __x           = __y;
-    __y           = __t;
-  } while (__y);
-  return __x;
-}
+template <class _AlgPolicy, class _Iter, class _Sent>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 _Iter
+__rotate_random_access(_Iter __first, _Iter __middle, _Sent __sent) {
+  auto __left  = _IterOps<_AlgPolicy>::distance(__first, __middle);
+  auto __right = _IterOps<_AlgPolicy>::distance(__middle, __sent);
+  auto __last  = __first + __right;
 
-template <class _AlgPolicy, typename _RandomAccessIterator>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 _RandomAccessIterator
-__rotate_gcd(_RandomAccessIterator __first, _RandomAccessIterator __middle, _RandomAccessIterator __last) {
-  typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
-  typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
-  using _Ops = _IterOps<_AlgPolicy>;
+  auto __min_len = std::min(__left, __right);
 
-  const difference_type __m1 = __middle - __first;
-  const difference_type __m2 = _Ops::distance(__middle, __last);
-  if (__m1 == __m2) {
-    std::__swap_ranges<_AlgPolicy>(__first, __middle, __middle, __last);
-    return __middle;
+  while (__min_len > 0) {
+    if (__left <= __right) {
+      do {
+        std::__swap_ranges<_AlgPolicy>(__first, __first + __left, __first + __left);
+        __first += __left;
+        __right -= __left;
+      } while (__left <= __right);
+      __min_len = __right;
+    } else {
+      do {
+        std::__swap_ranges<_AlgPolicy>(__first + (__left - __right), __first + __left, __first + __left);
+        __left -= __right;
+      } while (__left > __right);
+      __min_len = __left;
+    }
   }
-  const difference_type __g = std::__algo_gcd(__m1, __m2);
-  for (_RandomAccessIterator __p = __first + __g; __p != __first;) {
-    value_type __t(_Ops::__iter_move(--__p));
-    _RandomAccessIterator __p1 = __p;
-    _RandomAccessIterator __p2 = __p1 + __m1;
-    do {
-      *__p1                     = _Ops::__iter_move(__p2);
-      __p1                      = __p2;
-      const difference_type __d = _Ops::distance(__p2, __last);
-      if (__m1 < __d)
-        __p2 += __m1;
-      else
-        __p2 = __first + (__m1 - __d);
-    } while (__p2 != __p);
-    *__p1 = std::move(__t);
-  }
-  return __first + __m2;
+  return __last;
 }
 
 template <class _AlgPolicy, class _ForwardIterator>
@@ -170,7 +153,7 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _RandomAccessIterator
       return std::__rotate_left<_AlgPolicy>(__first, __last);
     if (_IterOps<_AlgPolicy>::next(__middle) == __last)
       return std::__rotate_right<_AlgPolicy>(__first, __last);
-    return std::__rotate_gcd<_AlgPolicy>(__first, __middle, __last);
+    return std::__rotate_random_access<_AlgPolicy>(__first, __middle, __last);
   }
   return std::__rotate_forward<_AlgPolicy>(__first, __middle, __last);
 }
diff --git a/lib/libcxx/include/__algorithm/search_n.h b/lib/libcxx/include/__algorithm/search_n.h
index 38474e1b23..0962542e13 100644
--- a/lib/libcxx/include/__algorithm/search_n.h
+++ b/lib/libcxx/include/__algorithm/search_n.h
@@ -14,11 +14,7 @@
 #include <__algorithm/iterator_operations.h>
 #include <__config>
 #include <__functional/identity.h>
-#include <__iterator/advance.h>
-#include <__iterator/concepts.h>
-#include <__iterator/distance.h>
 #include <__iterator/iterator_traits.h>
-#include <__ranges/concepts.h>
 #include <__type_traits/enable_if.h>
 #include <__type_traits/invoke.h>
 #include <__type_traits/is_callable.h>
@@ -68,44 +64,60 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter, _Iter> __search_
   }
 }
 
-template <class _AlgPolicy, class _Pred, class _Iter, class _Sent, class _SizeT, class _Type, class _Proj, class _DiffT>
+// Finds the longest suffix in [__first, __last) where each element satisfies __pred.
+template <class _RAIter, class _Pred, class _Proj, class _ValueT>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _RAIter
+__find_longest_suffix(_RAIter __first, _RAIter __last, const _ValueT& __value, _Pred& __pred, _Proj& __proj) {
+  while (__first != __last) {
+    if (!std::__invoke(__pred, std::__invoke(__proj, *--__last), __value)) {
+      return ++__last;
+    }
+  }
+  return __first;
+}
+
+template <class _AlgPolicy, class _Pred, class _Iter, class _SizeT, class _Type, class _Proj, class _DiffT>
 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 std::pair<_Iter, _Iter> __search_n_random_access_impl(
-    _Iter __first, _Sent __last, _SizeT __count, const _Type& __value, _Pred& __pred, _Proj& __proj, _DiffT __size1) {
-  using difference_type = typename iterator_traits<_Iter>::difference_type;
+    _Iter __first, _SizeT __count_in, const _Type& __value, _Pred& __pred, _Proj& __proj, _DiffT __size) {
+  auto __last  = __first + __size;
+  auto __count = static_cast<_DiffT>(__count_in);
+
   if (__count == 0)
     return std::make_pair(__first, __first);
-  if (__size1 < static_cast<_DiffT>(__count)) {
-    _IterOps<_AlgPolicy>::__advance_to(__first, __last);
-    return std::make_pair(__first, __first);
-  }
+  if (__size < __count)
+    return std::make_pair(__last, __last);
+
+  // [__match_start, __match_start + __count) is the subrange which we currently check whether it only contains matching
+  // elements. This subrange is returned in case all the elements match.
+  // [__match_start, __matched_until) is the longest subrange where all elements are known to match at any given point
+  // in time.
+  // [__matched_until, __match_start + __count) is the subrange where we don't know whether the elements match.
+
+  // This algorithm tries to expand the subrange [__match_start, __matched_until) into a range of sufficient length.
+  // When we fail to do that because we find a mismatching element, we move it forward to the beginning of the next
+  // consecutive sequence that is not known not to match.
+
+  const _Iter __try_match_until = __last - __count;
+  _Iter __match_start           = __first;
+  _Iter __matched_until         = __first;
 
-  const auto __s = __first + __size1 - difference_type(__count - 1); // Start of pattern match can't go beyond here
   while (true) {
-    // Find first element in sequence that matchs __value, with a mininum of loop checks
-    while (true) {
-      if (__first >= __s) { // return __last if no element matches __value
-        _IterOps<_AlgPolicy>::__advance_to(__first, __last);
-        return std::make_pair(__first, __first);
-      }
-      if (std::__invoke(__pred, std::__invoke(__proj, *__first), __value))
-        break;
-      ++__first;
-    }
-    // *__first matches __value_, now match elements after here
-    auto __m = __first;
-    _SizeT __c(0);
-    while (true) {
-      if (++__c == __count) // If pattern exhausted, __first is the answer (works for 1 element pattern)
-        return std::make_pair(__first, __first + _DiffT(__count));
-      ++__m; // no need to check range on __m because __s guarantees we have enough source
+    // There's no chance of expanding the subrange into a sequence of sufficient length, since we don't have enough
+    // elements in the haystack anymore.
+    if (__match_start > __try_match_until)
+      return std::make_pair(__last, __last);
 
-      // if there is a mismatch, restart with a new __first
-      if (!std::__invoke(__pred, std::__invoke(__proj, *__m), __value)) {
-        __first = __m;
-        ++__first;
-        break;
-      } // else there is a match, check next elements
-    }
+    auto __mismatch = std::__find_longest_suffix(__matched_until, __match_start + __count, __value, __pred, __proj);
+
+    // If all elements in [__matched_until, __match_start + __count) match, we know that
+    // [__match_start, __match_start + __count) is a full sequence of matching elements, so we're done.
+    if (__mismatch == __matched_until)
+      return std::make_pair(__match_start, __match_start + __count);
+
+    // Otherwise, we have to move the [__match_start, __matched_until) subrange forward past the point where we know for
+    // sure a match is impossible.
+    __matched_until = __match_start + __count;
+    __match_start   = __mismatch;
   }
 }
 
@@ -119,7 +131,7 @@ template <class _Iter,
 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter, _Iter>
 __search_n_impl(_Iter __first, _Sent __last, _DiffT __count, const _Type& __value, _Pred& __pred, _Proj& __proj) {
   return std::__search_n_random_access_impl<_ClassicAlgPolicy>(
-      __first, __last, __count, __value, __pred, __proj, __last - __first);
+      __first, __count, __value, __pred, __proj, __last - __first);
 }
 
 template <class _Iter1,
diff --git a/lib/libcxx/include/__algorithm/sift_down.h b/lib/libcxx/include/__algorithm/sift_down.h
index 42803e3063..f827754575 100644
--- a/lib/libcxx/include/__algorithm/sift_down.h
+++ b/lib/libcxx/include/__algorithm/sift_down.h
@@ -24,59 +24,60 @@ _LIBCPP_PUSH_MACROS
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
+template <class _AlgPolicy, bool __assume_both_children, class _Compare, class _RandomAccessIterator>
 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void
 __sift_down(_RandomAccessIterator __first,
             _Compare&& __comp,
-            typename iterator_traits<_RandomAccessIterator>::difference_type __len,
-            _RandomAccessIterator __start) {
+            __iterator_difference_type<_RandomAccessIterator> __len,
+            __iterator_difference_type<_RandomAccessIterator> __start) {
   using _Ops = _IterOps<_AlgPolicy>;
 
   typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
   typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
   // left-child of __start is at 2 * __start + 1
   // right-child of __start is at 2 * __start + 2
-  difference_type __child = __start - __first;
+  difference_type __child = __start;
 
   if (__len < 2 || (__len - 2) / 2 < __child)
     return;
 
-  __child                         = 2 * __child + 1;
-  _RandomAccessIterator __child_i = __first + __child;
+  __child = 2 * __child + 1;
 
-  if ((__child + 1) < __len && __comp(*__child_i, *(__child_i + difference_type(1)))) {
+  if _LIBCPP_CONSTEXPR (__assume_both_children) {
+    // right-child exists and is greater than left-child
+    __child += __comp(__first[__child], __first[__child + 1]);
+  } else if ((__child + 1) < __len && __comp(__first[__child], __first[__child + 1])) {
     // right-child exists and is greater than left-child
-    ++__child_i;
     ++__child;
   }
 
   // check if we are in heap-order
-  if (__comp(*__child_i, *__start))
+  if (__comp(__first[__child], __first[__start]))
     // we are, __start is larger than its largest child
     return;
 
-  value_type __top(_Ops::__iter_move(__start));
+  value_type __top(_Ops::__iter_move(__first + __start));
   do {
     // we are not in heap-order, swap the parent with its largest child
-    *__start = _Ops::__iter_move(__child_i);
-    __start  = __child_i;
+    __first[__start] = _Ops::__iter_move(__first + __child);
+    __start          = __child;
 
     if ((__len - 2) / 2 < __child)
       break;
 
     // recompute the child based off of the updated parent
-    __child   = 2 * __child + 1;
-    __child_i = __first + __child;
+    __child = 2 * __child + 1;
 
-    if ((__child + 1) < __len && __comp(*__child_i, *(__child_i + difference_type(1)))) {
+    if _LIBCPP_CONSTEXPR (__assume_both_children) {
+      __child += __comp(__first[__child], __first[__child + 1]);
+    } else if ((__child + 1) < __len && __comp(__first[__child], __first[__child + 1])) {
       // right-child exists and is greater than left-child
-      ++__child_i;
       ++__child;
     }
 
     // check if we are in heap-order
-  } while (!__comp(*__child_i, __top));
-  *__start = std::move(__top);
+  } while (!__comp(__first[__child], __top));
+  __first[__start] = std::move(__top);
 }
 
 template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
diff --git a/lib/libcxx/include/__algorithm/simd_utils.h b/lib/libcxx/include/__algorithm/simd_utils.h
index 47942a09e6..f73c9ea4b6 100644
--- a/lib/libcxx/include/__algorithm/simd_utils.h
+++ b/lib/libcxx/include/__algorithm/simd_utils.h
@@ -26,9 +26,7 @@ _LIBCPP_PUSH_MACROS
 #include <__undef_macros>
 
 // TODO: Find out how altivec changes things and allow vectorizations there too.
-// TODO: Simplify this condition once we stop building with AppleClang 15 in the CI.
-#if _LIBCPP_STD_VER >= 14 && defined(_LIBCPP_COMPILER_CLANG_BASED) && !defined(__ALTIVEC__) &&                         \
-    !(defined(_LIBCPP_APPLE_CLANG_VER) && _LIBCPP_APPLE_CLANG_VER < 1600)
+#if _LIBCPP_STD_VER >= 14 && defined(_LIBCPP_COMPILER_CLANG_BASED) && !defined(__ALTIVEC__)
 #  define _LIBCPP_HAS_ALGORITHM_VECTOR_UTILS 1
 #else
 #  define _LIBCPP_HAS_ALGORITHM_VECTOR_UTILS 0
@@ -116,16 +114,47 @@ template <class _VecT, class _Iter>
   }(make_index_sequence<__simd_vector_size_v<_VecT>>{});
 }
 
+// Load the first _Np elements, zero the rest
+_LIBCPP_DIAGNOSTIC_PUSH
+_LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wpsabi")
+template <class _VecT, size_t _Np, class _Iter>
+[[__nodiscard__]] _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _VecT __partial_load(_Iter __iter) noexcept {
+  return [=]<size_t... _LoadIndices, size_t... _ZeroIndices>(
+             index_sequence<_LoadIndices...>, index_sequence<_ZeroIndices...>) _LIBCPP_ALWAYS_INLINE noexcept {
+    return _VecT{__iter[_LoadIndices]..., ((void)_ZeroIndices, 0)...};
+  }(make_index_sequence<_Np>{}, make_index_sequence<__simd_vector_size_v<_VecT> - _Np>{});
+}
+
+// Create a vector where every elements is __val
+template <class _VecT>
+[[__nodiscard__]] _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _VecT
+__broadcast(__simd_vector_underlying_type_t<_VecT> __val) {
+  return [&]<std::size_t... _Indices>(index_sequence<_Indices...>) {
+    return _VecT{((void)_Indices, __val)...};
+  }(make_index_sequence<__simd_vector_size_v<_VecT>>());
+}
+_LIBCPP_DIAGNOSTIC_POP
+
+template <class _Tp, size_t _Np>
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool __any_of(__simd_vector<_Tp, _Np> __vec) noexcept {
+  return __builtin_reduce_or(__builtin_convertvector(__vec, __simd_vector<bool, _Np>));
+}
+
 template <class _Tp, size_t _Np>
 [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool __all_of(__simd_vector<_Tp, _Np> __vec) noexcept {
   return __builtin_reduce_and(__builtin_convertvector(__vec, __simd_vector<bool, _Np>));
 }
 
+template <class _Tp, size_t _Np>
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool __none_of(__simd_vector<_Tp, _Np> __vec) noexcept {
+  return !__builtin_reduce_or(__builtin_convertvector(__vec, __simd_vector<bool, _Np>));
+}
+
 template <class _Tp, size_t _Np>
 [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t __find_first_set(__simd_vector<_Tp, _Np> __vec) noexcept {
   using __mask_vec = __simd_vector<bool, _Np>;
 
-  // This has MSan disabled du to https://github.com/llvm/llvm-project/issues/85876
+  // This has MSan disabled du to https://llvm.org/PR85876
   auto __impl = [&]<class _MaskT>(_MaskT) _LIBCPP_NO_SANITIZE("memory") noexcept {
 #  if defined(_LIBCPP_BIG_ENDIAN)
     return std::min<size_t>(
diff --git a/lib/libcxx/include/__algorithm/specialized_algorithms.h b/lib/libcxx/include/__algorithm/specialized_algorithms.h
new file mode 100644
index 0000000000..7cb94c015f
--- /dev/null
+++ b/lib/libcxx/include/__algorithm/specialized_algorithms.h
@@ -0,0 +1,54 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___ALGORITHM_SPECIALIZED_ALGORITHMS_H
+#define _LIBCPP___ALGORITHM_SPECIALIZED_ALGORITHMS_H
+
+#include <__config>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+namespace _Algorithm {
+struct __copy {};
+struct __fill_n {};
+struct __for_each {};
+} // namespace _Algorithm
+
+template <class>
+struct __single_iterator;
+
+template <class, class>
+struct __iterator_pair;
+
+template <class>
+struct __single_range;
+
+// This struct allows specializing algorithms for specific arguments. This is useful when we know a more efficient
+// algorithm implementation for e.g. library-defined iterators. _Alg is one of tags defined inside the _Algorithm
+// namespace above. _Ranges is an essentially arbitrary subset of the arguments to the algorithm that are used for
+// dispatching. This set is specific to the algorithm: look at each algorithm to see which arguments they use for
+// dispatching to specialized algorithms.
+//
+// A specialization of `__specialized_algorithm` has to define `__has_algorithm` to true for the specialized algorithm
+// to be used. This is intended for cases where iterators can do generic unwrapping and forward to a different
+// specialization of `__specialized_algorithm`.
+//
+// If __has_algorithm is true, there has to be an operator() which will get called with the actual arguments to the
+// algorithm.
+template <class _Alg, class... _Ranges>
+struct __specialized_algorithm {
+  static const bool __has_algorithm = false;
+};
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___ALGORITHM_SPECIALIZED_ALGORITHMS_H
diff --git a/lib/libcxx/include/__algorithm/stable_sort.h b/lib/libcxx/include/__algorithm/stable_sort.h
index 1ca66f6a51..64c8080834 100644
--- a/lib/libcxx/include/__algorithm/stable_sort.h
+++ b/lib/libcxx/include/__algorithm/stable_sort.h
@@ -247,7 +247,7 @@ _LIBCPP_CONSTEXPR_SINCE_CXX26 void __stable_sort(
   constexpr auto __default_comp = __desugars_to_v<__less_tag, _Compare, value_type, value_type >;
   constexpr auto __radix_sortable =
       __is_ordered_integer_representable_v<value_type> &&
-      is_same_v< value_type&, __iter_reference<_RandomAccessIterator>>;
+      is_same_v< value_type&, __iterator_reference<_RandomAccessIterator>>;
   if constexpr (__default_comp && __radix_sortable) {
     if (__len <= __buff_size && __len >= static_cast<difference_type>(std::__radix_sort_min_bound<value_type>()) &&
         __len <= static_cast<difference_type>(std::__radix_sort_max_bound<value_type>())) {
diff --git a/lib/libcxx/include/__assertion_handler b/lib/libcxx/include/__assertion_handler
index f115658f9f..d352405e90 100644
--- a/lib/libcxx/include/__assertion_handler
+++ b/lib/libcxx/include/__assertion_handler
@@ -16,6 +16,7 @@
 #  include <__cxx03/__verbose_trap>
 #else
 #  include <__config>
+#  include <__log_hardening_failure>
 #  include <__verbose_abort>
 #  include <__verbose_trap>
 #endif
@@ -24,14 +25,40 @@
 #  pragma GCC system_header
 #endif
 
-#if _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_DEBUG
+#if __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)
 
-#  define _LIBCPP_ASSERTION_HANDLER(message) _LIBCPP_VERBOSE_ABORT("%s", message)
+// Keep the old implementation that doesn't support assertion semantics for backward compatibility with the frozen C++03
+// mode.
+#  if _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_DEBUG
+#    define _LIBCPP_ASSERTION_HANDLER(message) _LIBCPP_VERBOSE_ABORT("%s", message)
+#  else
+#    define _LIBCPP_ASSERTION_HANDLER(message) _LIBCPP_VERBOSE_TRAP(message)
+#  endif // _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_DEBUG
 
 #else
 
-#  define _LIBCPP_ASSERTION_HANDLER(message) _LIBCPP_VERBOSE_TRAP(message)
+#  if _LIBCPP_ASSERTION_SEMANTIC == _LIBCPP_ASSERTION_SEMANTIC_IGNORE
+#    define _LIBCPP_ASSERTION_HANDLER(message) ((void)0)
 
-#endif // _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_DEBUG
+#  elif _LIBCPP_ASSERTION_SEMANTIC == _LIBCPP_ASSERTION_SEMANTIC_OBSERVE
+#    define _LIBCPP_ASSERTION_HANDLER(message) _LIBCPP_LOG_HARDENING_FAILURE(message)
+
+#  elif _LIBCPP_ASSERTION_SEMANTIC == _LIBCPP_ASSERTION_SEMANTIC_QUICK_ENFORCE
+#    define _LIBCPP_ASSERTION_HANDLER(message) _LIBCPP_VERBOSE_TRAP(message)
+
+#  elif _LIBCPP_ASSERTION_SEMANTIC == _LIBCPP_ASSERTION_SEMANTIC_ENFORCE
+#    define _LIBCPP_ASSERTION_HANDLER(message) _LIBCPP_VERBOSE_ABORT("%s", message)
+
+#  else
+
+#    error _LIBCPP_ASSERTION_SEMANTIC must be set to one of the following values: \
+_LIBCPP_ASSERTION_SEMANTIC_IGNORE, \
+_LIBCPP_ASSERTION_SEMANTIC_OBSERVE, \
+_LIBCPP_ASSERTION_SEMANTIC_QUICK_ENFORCE, \
+_LIBCPP_ASSERTION_SEMANTIC_ENFORCE
+
+#  endif // _LIBCPP_ASSERTION_SEMANTIC == _LIBCPP_ASSERTION_SEMANTIC_IGNORE
+
+#endif // __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)
 
 #endif // _LIBCPP___ASSERTION_HANDLER
diff --git a/lib/libcxx/include/__atomic/atomic.h b/lib/libcxx/include/__atomic/atomic.h
index eead49dde6..02528cd964 100644
--- a/lib/libcxx/include/__atomic/atomic.h
+++ b/lib/libcxx/include/__atomic/atomic.h
@@ -10,7 +10,9 @@
 #define _LIBCPP___ATOMIC_ATOMIC_H
 
 #include <__atomic/atomic_sync.h>
+#include <__atomic/atomic_waitable_traits.h>
 #include <__atomic/check_memory_order.h>
+#include <__atomic/floating_point_helper.h>
 #include <__atomic/is_always_lock_free.h>
 #include <__atomic/memory_order.h>
 #include <__atomic/support.h>
@@ -47,10 +49,10 @@ struct __atomic_base // false
   static constexpr bool is_always_lock_free = __libcpp_is_always_lock_free<__cxx_atomic_impl<_Tp> >::__value;
 #endif
 
-  _LIBCPP_HIDE_FROM_ABI bool is_lock_free() const volatile _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool is_lock_free() const volatile _NOEXCEPT {
     return __cxx_atomic_is_lock_free(sizeof(__cxx_atomic_impl<_Tp>));
   }
-  _LIBCPP_HIDE_FROM_ABI bool is_lock_free() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool is_lock_free() const _NOEXCEPT {
     return static_cast<__atomic_base const volatile*>(this)->is_lock_free();
   }
   _LIBCPP_HIDE_FROM_ABI void store(_Tp __d, memory_order __m = memory_order_seq_cst) volatile _NOEXCEPT
@@ -61,11 +63,11 @@ struct __atomic_base // false
       _LIBCPP_CHECK_STORE_MEMORY_ORDER(__m) {
     std::__cxx_atomic_store(std::addressof(__a_), __d, __m);
   }
-  _LIBCPP_HIDE_FROM_ABI _Tp load(memory_order __m = memory_order_seq_cst) const volatile _NOEXCEPT
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _Tp load(memory_order __m = memory_order_seq_cst) const volatile _NOEXCEPT
       _LIBCPP_CHECK_LOAD_MEMORY_ORDER(__m) {
     return std::__cxx_atomic_load(std::addressof(__a_), __m);
   }
-  _LIBCPP_HIDE_FROM_ABI _Tp load(memory_order __m = memory_order_seq_cst) const _NOEXCEPT
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _Tp load(memory_order __m = memory_order_seq_cst) const _NOEXCEPT
       _LIBCPP_CHECK_LOAD_MEMORY_ORDER(__m) {
     return std::__cxx_atomic_load(std::addressof(__a_), __m);
   }
@@ -113,22 +115,16 @@ struct __atomic_base // false
   }
 
 #if _LIBCPP_STD_VER >= 20
-  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(_Tp __v, memory_order __m = memory_order_seq_cst) const
-      volatile _NOEXCEPT {
+  _LIBCPP_HIDE_FROM_ABI void wait(_Tp __v, memory_order __m = memory_order_seq_cst) const volatile _NOEXCEPT {
     std::__atomic_wait(*this, __v, __m);
   }
-  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void
-  wait(_Tp __v, memory_order __m = memory_order_seq_cst) const _NOEXCEPT {
+  _LIBCPP_HIDE_FROM_ABI void wait(_Tp __v, memory_order __m = memory_order_seq_cst) const _NOEXCEPT {
     std::__atomic_wait(*this, __v, __m);
   }
-  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_one() volatile _NOEXCEPT {
-    std::__atomic_notify_one(*this);
-  }
-  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_one() _NOEXCEPT { std::__atomic_notify_one(*this); }
-  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_all() volatile _NOEXCEPT {
-    std::__atomic_notify_all(*this);
-  }
-  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_all() _NOEXCEPT { std::__atomic_notify_all(*this); }
+  _LIBCPP_HIDE_FROM_ABI void notify_one() volatile _NOEXCEPT { std::__atomic_notify_one(*this); }
+  _LIBCPP_HIDE_FROM_ABI void notify_one() _NOEXCEPT { std::__atomic_notify_one(*this); }
+  _LIBCPP_HIDE_FROM_ABI void notify_all() volatile _NOEXCEPT { std::__atomic_notify_all(*this); }
+  _LIBCPP_HIDE_FROM_ABI void notify_all() _NOEXCEPT { std::__atomic_notify_all(*this); }
 #endif //  _LIBCPP_STD_VER >= 20
 
 #if _LIBCPP_STD_VER >= 20
@@ -205,12 +201,15 @@ struct __atomic_base<_Tp, true> : public __atomic_base<_Tp, false> {
   _LIBCPP_HIDE_FROM_ABI _Tp operator^=(_Tp __op) _NOEXCEPT { return fetch_xor(__op) ^ __op; }
 };
 
+#if _LIBCPP_STD_VER >= 20
 // Here we need _IsIntegral because the default template argument is not enough
 // e.g  __atomic_base<int> is __atomic_base<int, true>, which inherits from
 // __atomic_base<int, false> and the caller of the wait function is
 // __atomic_base<int, false>. So specializing __atomic_base<_Tp> does not work
 template <class _Tp, bool _IsIntegral>
 struct __atomic_waitable_traits<__atomic_base<_Tp, _IsIntegral> > {
+  using __value_type _LIBCPP_NODEBUG = _Tp;
+
   static _LIBCPP_HIDE_FROM_ABI _Tp __atomic_load(const __atomic_base<_Tp, _IsIntegral>& __a, memory_order __order) {
     return __a.load(__order);
   }
@@ -231,6 +230,8 @@ struct __atomic_waitable_traits<__atomic_base<_Tp, _IsIntegral> > {
   }
 };
 
+#endif // _LIBCPP_STD_VER >= 20
+
 template <typename _Tp>
 struct __check_atomic_mandates {
   using type _LIBCPP_NODEBUG = _Tp;
@@ -324,50 +325,26 @@ struct atomic<_Tp*> : public __atomic_base<_Tp*> {
   atomic& operator=(const atomic&) volatile = delete;
 };
 
+#if _LIBCPP_STD_VER >= 20
 template <class _Tp>
 struct __atomic_waitable_traits<atomic<_Tp> > : __atomic_waitable_traits<__atomic_base<_Tp> > {};
 
-#if _LIBCPP_STD_VER >= 20
 template <class _Tp>
   requires is_floating_point_v<_Tp>
 struct atomic<_Tp> : __atomic_base<_Tp> {
 private:
-  _LIBCPP_HIDE_FROM_ABI static constexpr bool __is_fp80_long_double() {
-    // Only x87-fp80 long double has 64-bit mantissa
-    return __LDBL_MANT_DIG__ == 64 && std::is_same_v<_Tp, long double>;
-  }
-
-  _LIBCPP_HIDE_FROM_ABI static constexpr bool __has_rmw_builtin() {
-#  ifndef _LIBCPP_COMPILER_CLANG_BASED
-    return false;
-#  else
-    // The builtin __cxx_atomic_fetch_add errors during compilation for
-    // long double on platforms with fp80 format.
-    // For more details, see
-    // lib/Sema/SemaChecking.cpp function IsAllowedValueType
-    // LLVM Parser does not allow atomicrmw with x86_fp80 type.
-    // if (ValType->isSpecificBuiltinType(BuiltinType::LongDouble) &&
-    //    &Context.getTargetInfo().getLongDoubleFormat() ==
-    //        &llvm::APFloat::x87DoubleExtended())
-    // For more info
-    // https://github.com/llvm/llvm-project/issues/68602
-    // https://reviews.llvm.org/D53965
-    return !__is_fp80_long_double();
-#  endif
-  }
-
   template <class _This, class _Operation, class _BuiltinOp>
   _LIBCPP_HIDE_FROM_ABI static _Tp
   __rmw_op(_This&& __self, _Tp __operand, memory_order __m, _Operation __operation, _BuiltinOp __builtin_op) {
-    if constexpr (__has_rmw_builtin()) {
+    if constexpr (std::__has_rmw_builtin<_Tp>()) {
       return __builtin_op(std::addressof(std::forward<_This>(__self).__a_), __operand, __m);
     } else {
       _Tp __old = __self.load(memory_order_relaxed);
       _Tp __new = __operation(__old, __operand);
       while (!__self.compare_exchange_weak(__old, __new, __m, memory_order_relaxed)) {
 #  ifdef _LIBCPP_COMPILER_CLANG_BASED
-        if constexpr (__is_fp80_long_double()) {
-          // https://github.com/llvm/llvm-project/issues/47978
+        if constexpr (std::__is_fp80_long_double<_Tp>()) {
+          // https://llvm.org/PR47978
           // clang bug: __old is not updated on failure for atomic<long double>::compare_exchange_weak
           // Note __old = __self.load(memory_order_relaxed) will not work
           std::__cxx_atomic_load_inplace(std::addressof(__self.__a_), std::addressof(__old), memory_order_relaxed);
@@ -462,12 +439,12 @@ public:
 // atomic_is_lock_free
 
 template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI bool atomic_is_lock_free(const volatile atomic<_Tp>* __o) _NOEXCEPT {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool atomic_is_lock_free(const volatile atomic<_Tp>* __o) _NOEXCEPT {
   return __o->is_lock_free();
 }
 
 template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI bool atomic_is_lock_free(const atomic<_Tp>* __o) _NOEXCEPT {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool atomic_is_lock_free(const atomic<_Tp>* __o) _NOEXCEPT {
   return __o->is_lock_free();
 }
 
@@ -516,25 +493,25 @@ atomic_store_explicit(atomic<_Tp>* __o, typename atomic<_Tp>::value_type __d, me
 // atomic_load
 
 template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI _Tp atomic_load(const volatile atomic<_Tp>* __o) _NOEXCEPT {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _Tp atomic_load(const volatile atomic<_Tp>* __o) _NOEXCEPT {
   return __o->load();
 }
 
 template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI _Tp atomic_load(const atomic<_Tp>* __o) _NOEXCEPT {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _Tp atomic_load(const atomic<_Tp>* __o) _NOEXCEPT {
   return __o->load();
 }
 
 // atomic_load_explicit
 
 template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI _Tp atomic_load_explicit(const volatile atomic<_Tp>* __o, memory_order __m) _NOEXCEPT
-    _LIBCPP_CHECK_LOAD_MEMORY_ORDER(__m) {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _Tp
+atomic_load_explicit(const volatile atomic<_Tp>* __o, memory_order __m) _NOEXCEPT _LIBCPP_CHECK_LOAD_MEMORY_ORDER(__m) {
   return __o->load(__m);
 }
 
 template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI _Tp atomic_load_explicit(const atomic<_Tp>* __o, memory_order __m) _NOEXCEPT
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _Tp atomic_load_explicit(const atomic<_Tp>* __o, memory_order __m) _NOEXCEPT
     _LIBCPP_CHECK_LOAD_MEMORY_ORDER(__m) {
   return __o->load(__m);
 }
@@ -642,28 +619,27 @@ _LIBCPP_HIDE_FROM_ABI bool atomic_compare_exchange_strong_explicit(
 // atomic_wait
 
 template <class _Tp>
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void
+_LIBCPP_HIDE_FROM_ABI void
 atomic_wait(const volatile atomic<_Tp>* __o, typename atomic<_Tp>::value_type __v) _NOEXCEPT {
   return __o->wait(__v);
 }
 
 template <class _Tp>
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void
-atomic_wait(const atomic<_Tp>* __o, typename atomic<_Tp>::value_type __v) _NOEXCEPT {
+_LIBCPP_HIDE_FROM_ABI void atomic_wait(const atomic<_Tp>* __o, typename atomic<_Tp>::value_type __v) _NOEXCEPT {
   return __o->wait(__v);
 }
 
 // atomic_wait_explicit
 
 template <class _Tp>
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void
+_LIBCPP_HIDE_FROM_ABI void
 atomic_wait_explicit(const volatile atomic<_Tp>* __o, typename atomic<_Tp>::value_type __v, memory_order __m) _NOEXCEPT
     _LIBCPP_CHECK_LOAD_MEMORY_ORDER(__m) {
   return __o->wait(__v, __m);
 }
 
 template <class _Tp>
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void
+_LIBCPP_HIDE_FROM_ABI void
 atomic_wait_explicit(const atomic<_Tp>* __o, typename atomic<_Tp>::value_type __v, memory_order __m) _NOEXCEPT
     _LIBCPP_CHECK_LOAD_MEMORY_ORDER(__m) {
   return __o->wait(__v, __m);
@@ -672,22 +648,22 @@ atomic_wait_explicit(const atomic<_Tp>* __o, typename atomic<_Tp>::value_type __
 // atomic_notify_one
 
 template <class _Tp>
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void atomic_notify_one(volatile atomic<_Tp>* __o) _NOEXCEPT {
+_LIBCPP_HIDE_FROM_ABI void atomic_notify_one(volatile atomic<_Tp>* __o) _NOEXCEPT {
   __o->notify_one();
 }
 template <class _Tp>
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void atomic_notify_one(atomic<_Tp>* __o) _NOEXCEPT {
+_LIBCPP_HIDE_FROM_ABI void atomic_notify_one(atomic<_Tp>* __o) _NOEXCEPT {
   __o->notify_one();
 }
 
 // atomic_notify_all
 
 template <class _Tp>
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void atomic_notify_all(volatile atomic<_Tp>* __o) _NOEXCEPT {
+_LIBCPP_HIDE_FROM_ABI void atomic_notify_all(volatile atomic<_Tp>* __o) _NOEXCEPT {
   __o->notify_all();
 }
 template <class _Tp>
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void atomic_notify_all(atomic<_Tp>* __o) _NOEXCEPT {
+_LIBCPP_HIDE_FROM_ABI void atomic_notify_all(atomic<_Tp>* __o) _NOEXCEPT {
   __o->notify_all();
 }
 
diff --git a/lib/libcxx/include/__atomic/atomic_flag.h b/lib/libcxx/include/__atomic/atomic_flag.h
index 5cc6fb0c55..42864c869d 100644
--- a/lib/libcxx/include/__atomic/atomic_flag.h
+++ b/lib/libcxx/include/__atomic/atomic_flag.h
@@ -10,6 +10,7 @@
 #define _LIBCPP___ATOMIC_ATOMIC_FLAG_H
 
 #include <__atomic/atomic_sync.h>
+#include <__atomic/atomic_waitable_traits.h>
 #include <__atomic/contention_t.h>
 #include <__atomic/memory_order.h>
 #include <__atomic/support.h>
@@ -49,22 +50,16 @@ struct atomic_flag {
   }
 
 #if _LIBCPP_STD_VER >= 20
-  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(bool __v, memory_order __m = memory_order_seq_cst) const
-      volatile _NOEXCEPT {
+  _LIBCPP_HIDE_FROM_ABI void wait(bool __v, memory_order __m = memory_order_seq_cst) const volatile _NOEXCEPT {
     std::__atomic_wait(*this, _LIBCPP_ATOMIC_FLAG_TYPE(__v), __m);
   }
-  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void
-  wait(bool __v, memory_order __m = memory_order_seq_cst) const _NOEXCEPT {
+  _LIBCPP_HIDE_FROM_ABI void wait(bool __v, memory_order __m = memory_order_seq_cst) const _NOEXCEPT {
     std::__atomic_wait(*this, _LIBCPP_ATOMIC_FLAG_TYPE(__v), __m);
   }
-  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_one() volatile _NOEXCEPT {
-    std::__atomic_notify_one(*this);
-  }
-  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_one() _NOEXCEPT { std::__atomic_notify_one(*this); }
-  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_all() volatile _NOEXCEPT {
-    std::__atomic_notify_all(*this);
-  }
-  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_all() _NOEXCEPT { std::__atomic_notify_all(*this); }
+  _LIBCPP_HIDE_FROM_ABI void notify_one() volatile _NOEXCEPT { std::__atomic_notify_one(*this); }
+  _LIBCPP_HIDE_FROM_ABI void notify_one() _NOEXCEPT { std::__atomic_notify_one(*this); }
+  _LIBCPP_HIDE_FROM_ABI void notify_all() volatile _NOEXCEPT { std::__atomic_notify_all(*this); }
+  _LIBCPP_HIDE_FROM_ABI void notify_all() _NOEXCEPT { std::__atomic_notify_all(*this); }
 #endif
 
 #if _LIBCPP_STD_VER >= 20
@@ -80,8 +75,11 @@ struct atomic_flag {
   atomic_flag& operator=(const atomic_flag&) volatile = delete;
 };
 
+#if _LIBCPP_STD_VER >= 20
 template <>
 struct __atomic_waitable_traits<atomic_flag> {
+  using __value_type _LIBCPP_NODEBUG = _LIBCPP_ATOMIC_FLAG_TYPE;
+
   static _LIBCPP_HIDE_FROM_ABI _LIBCPP_ATOMIC_FLAG_TYPE __atomic_load(const atomic_flag& __a, memory_order __order) {
     return std::__cxx_atomic_load(&__a.__a_, __order);
   }
@@ -101,6 +99,7 @@ struct __atomic_waitable_traits<atomic_flag> {
     return std::addressof(__a.__a_);
   }
 };
+#endif // _LIBCPP_STD_VER >= 20
 
 inline _LIBCPP_HIDE_FROM_ABI bool atomic_flag_test(const volatile atomic_flag* __o) _NOEXCEPT { return __o->test(); }
 
@@ -143,43 +142,26 @@ inline _LIBCPP_HIDE_FROM_ABI void atomic_flag_clear_explicit(atomic_flag* __o, m
 }
 
 #if _LIBCPP_STD_VER >= 20
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void
-atomic_flag_wait(const volatile atomic_flag* __o, bool __v) _NOEXCEPT {
+inline _LIBCPP_HIDE_FROM_ABI void atomic_flag_wait(const volatile atomic_flag* __o, bool __v) _NOEXCEPT {
   __o->wait(__v);
 }
 
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void
-atomic_flag_wait(const atomic_flag* __o, bool __v) _NOEXCEPT {
-  __o->wait(__v);
-}
+inline _LIBCPP_HIDE_FROM_ABI void atomic_flag_wait(const atomic_flag* __o, bool __v) _NOEXCEPT { __o->wait(__v); }
 
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void
+inline _LIBCPP_HIDE_FROM_ABI void
 atomic_flag_wait_explicit(const volatile atomic_flag* __o, bool __v, memory_order __m) _NOEXCEPT {
   __o->wait(__v, __m);
 }
 
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void
+inline _LIBCPP_HIDE_FROM_ABI void
 atomic_flag_wait_explicit(const atomic_flag* __o, bool __v, memory_order __m) _NOEXCEPT {
   __o->wait(__v, __m);
 }
 
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void
-atomic_flag_notify_one(volatile atomic_flag* __o) _NOEXCEPT {
-  __o->notify_one();
-}
-
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void atomic_flag_notify_one(atomic_flag* __o) _NOEXCEPT {
-  __o->notify_one();
-}
-
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void
-atomic_flag_notify_all(volatile atomic_flag* __o) _NOEXCEPT {
-  __o->notify_all();
-}
-
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void atomic_flag_notify_all(atomic_flag* __o) _NOEXCEPT {
-  __o->notify_all();
-}
+inline _LIBCPP_HIDE_FROM_ABI void atomic_flag_notify_one(volatile atomic_flag* __o) _NOEXCEPT { __o->notify_one(); }
+inline _LIBCPP_HIDE_FROM_ABI void atomic_flag_notify_one(atomic_flag* __o) _NOEXCEPT { __o->notify_one(); }
+inline _LIBCPP_HIDE_FROM_ABI void atomic_flag_notify_all(volatile atomic_flag* __o) _NOEXCEPT { __o->notify_all(); }
+inline _LIBCPP_HIDE_FROM_ABI void atomic_flag_notify_all(atomic_flag* __o) _NOEXCEPT { __o->notify_all(); }
 #endif // _LIBCPP_STD_VER >= 20
 
 _LIBCPP_END_NAMESPACE_STD
diff --git a/lib/libcxx/include/__atomic/atomic_ref.h b/lib/libcxx/include/__atomic/atomic_ref.h
index b5493662c5..69edbfe6ec 100644
--- a/lib/libcxx/include/__atomic/atomic_ref.h
+++ b/lib/libcxx/include/__atomic/atomic_ref.h
@@ -19,7 +19,9 @@
 
 #include <__assert>
 #include <__atomic/atomic_sync.h>
+#include <__atomic/atomic_waitable_traits.h>
 #include <__atomic/check_memory_order.h>
+#include <__atomic/floating_point_helper.h>
 #include <__atomic/memory_order.h>
 #include <__atomic/to_gcc_order.h>
 #include <__concepts/arithmetic.h>
@@ -121,7 +123,9 @@ public:
   static constexpr bool is_always_lock_free =
       __atomic_always_lock_free(sizeof(_Tp), std::addressof(__get_aligner_instance<required_alignment>::__instance));
 
-  _LIBCPP_HIDE_FROM_ABI bool is_lock_free() const noexcept { return __atomic_is_lock_free(sizeof(_Tp), __ptr_); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool is_lock_free() const noexcept {
+    return __atomic_is_lock_free(sizeof(_Tp), __ptr_);
+  }
 
   _LIBCPP_HIDE_FROM_ABI void store(_Tp __desired, memory_order __order = memory_order::seq_cst) const noexcept
       _LIBCPP_CHECK_STORE_MEMORY_ORDER(__order) {
@@ -136,7 +140,7 @@ public:
     return __desired;
   }
 
-  _LIBCPP_HIDE_FROM_ABI _Tp load(memory_order __order = memory_order::seq_cst) const noexcept
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _Tp load(memory_order __order = memory_order::seq_cst) const noexcept
       _LIBCPP_CHECK_LOAD_MEMORY_ORDER(__order) {
     _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
         __order == memory_order::relaxed || __order == memory_order::consume || __order == memory_order::acquire ||
@@ -219,6 +223,9 @@ public:
   }
   _LIBCPP_HIDE_FROM_ABI void notify_one() const noexcept { std::__atomic_notify_one(*this); }
   _LIBCPP_HIDE_FROM_ABI void notify_all() const noexcept { std::__atomic_notify_all(*this); }
+#  if _LIBCPP_STD_VER >= 26
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp* address() const noexcept { return __ptr_; }
+#  endif
 
 protected:
   using _Aligned_Tp [[__gnu__::__aligned__(required_alignment), __gnu__::__nodebug__]] = _Tp;
@@ -229,6 +236,8 @@ protected:
 
 template <class _Tp>
 struct __atomic_waitable_traits<__atomic_ref_base<_Tp>> {
+  using __value_type _LIBCPP_NODEBUG = _Tp;
+
   static _LIBCPP_HIDE_FROM_ABI _Tp __atomic_load(const __atomic_ref_base<_Tp>& __a, memory_order __order) {
     return __a.load(__order);
   }
@@ -322,20 +331,28 @@ struct atomic_ref<_Tp> : public __atomic_ref_base<_Tp> {
   atomic_ref& operator=(const atomic_ref&) = delete;
 
   _LIBCPP_HIDE_FROM_ABI _Tp fetch_add(_Tp __arg, memory_order __order = memory_order_seq_cst) const noexcept {
-    _Tp __old = this->load(memory_order_relaxed);
-    _Tp __new = __old + __arg;
-    while (!this->compare_exchange_weak(__old, __new, __order, memory_order_relaxed)) {
-      __new = __old + __arg;
+    if constexpr (std::__has_rmw_builtin<_Tp>()) {
+      return __atomic_fetch_add(this->__ptr_, __arg, std::__to_gcc_order(__order));
+    } else {
+      _Tp __old = this->load(memory_order_relaxed);
+      _Tp __new = __old + __arg;
+      while (!this->compare_exchange_weak(__old, __new, __order, memory_order_relaxed)) {
+        __new = __old + __arg;
+      }
+      return __old;
     }
-    return __old;
   }
   _LIBCPP_HIDE_FROM_ABI _Tp fetch_sub(_Tp __arg, memory_order __order = memory_order_seq_cst) const noexcept {
-    _Tp __old = this->load(memory_order_relaxed);
-    _Tp __new = __old - __arg;
-    while (!this->compare_exchange_weak(__old, __new, __order, memory_order_relaxed)) {
-      __new = __old - __arg;
+    if constexpr (std::__has_rmw_builtin<_Tp>()) {
+      return __atomic_fetch_sub(this->__ptr_, __arg, std::__to_gcc_order(__order));
+    } else {
+      _Tp __old = this->load(memory_order_relaxed);
+      _Tp __new = __old - __arg;
+      while (!this->compare_exchange_weak(__old, __new, __order, memory_order_relaxed)) {
+        __new = __old - __arg;
+      }
+      return __old;
     }
-    return __old;
   }
 
   _LIBCPP_HIDE_FROM_ABI _Tp operator+=(_Tp __arg) const noexcept { return fetch_add(__arg) + __arg; }
diff --git a/lib/libcxx/include/__atomic/atomic_sync.h b/lib/libcxx/include/__atomic/atomic_sync.h
index 0dae448d64..1234cdea50 100644
--- a/lib/libcxx/include/__atomic/atomic_sync.h
+++ b/lib/libcxx/include/__atomic/atomic_sync.h
@@ -9,6 +9,7 @@
 #ifndef _LIBCPP___ATOMIC_ATOMIC_SYNC_H
 #define _LIBCPP___ATOMIC_ATOMIC_SYNC_H
 
+#include <__atomic/atomic_waitable_traits.h>
 #include <__atomic/contention_t.h>
 #include <__atomic/memory_order.h>
 #include <__atomic/to_gcc_order.h>
@@ -19,6 +20,7 @@
 #include <__type_traits/conjunction.h>
 #include <__type_traits/decay.h>
 #include <__type_traits/invoke.h>
+#include <__type_traits/is_same.h>
 #include <__type_traits/void_t.h>
 #include <__utility/declval.h>
 #include <cstring>
@@ -29,50 +31,89 @@
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-// The customisation points to enable the following functions:
-// - __atomic_wait
-// - __atomic_wait_unless
-// - __atomic_notify_one
-// - __atomic_notify_all
-// Note that std::atomic<T>::wait was back-ported to C++03
-// The below implementations look ugly to support C++03
-template <class _Tp, class = void>
-struct __atomic_waitable_traits {
-  template <class _AtomicWaitable>
-  static void __atomic_load(_AtomicWaitable&&, memory_order) = delete;
-
-  template <class _AtomicWaitable>
-  static void __atomic_contention_address(_AtomicWaitable&&) = delete;
-};
-
-template <class _Tp, class = void>
-struct __atomic_waitable : false_type {};
-
-template <class _Tp>
-struct __atomic_waitable< _Tp,
-                          __void_t<decltype(__atomic_waitable_traits<__decay_t<_Tp> >::__atomic_load(
-                                       std::declval<const _Tp&>(), std::declval<memory_order>())),
-                                   decltype(__atomic_waitable_traits<__decay_t<_Tp> >::__atomic_contention_address(
-                                       std::declval<const _Tp&>()))> > : true_type {};
-
 #if _LIBCPP_STD_VER >= 20
 #  if _LIBCPP_HAS_THREADS
 
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_one(void const volatile*) _NOEXCEPT;
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_all(void const volatile*) _NOEXCEPT;
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI __cxx_contention_t
-__libcpp_atomic_monitor(void const volatile*) _NOEXCEPT;
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void
-__libcpp_atomic_wait(void const volatile*, __cxx_contention_t) _NOEXCEPT;
+#    if !_LIBCPP_AVAILABILITY_HAS_NEW_SYNC
 
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void
-__cxx_atomic_notify_one(__cxx_atomic_contention_t const volatile*) _NOEXCEPT;
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void
-__cxx_atomic_notify_all(__cxx_atomic_contention_t const volatile*) _NOEXCEPT;
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI __cxx_contention_t
+// old dylib interface kept for backwards compatibility
+_LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_one(void const volatile*) _NOEXCEPT;
+_LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_all(void const volatile*) _NOEXCEPT;
+_LIBCPP_EXPORTED_FROM_ABI __cxx_contention_t __libcpp_atomic_monitor(void const volatile*) _NOEXCEPT;
+_LIBCPP_EXPORTED_FROM_ABI void __libcpp_atomic_wait(void const volatile*, __cxx_contention_t) _NOEXCEPT;
+
+_LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_one(__cxx_atomic_contention_t const volatile*) _NOEXCEPT;
+_LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_all(__cxx_atomic_contention_t const volatile*) _NOEXCEPT;
+_LIBCPP_EXPORTED_FROM_ABI __cxx_contention_t
 __libcpp_atomic_monitor(__cxx_atomic_contention_t const volatile*) _NOEXCEPT;
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void
+_LIBCPP_EXPORTED_FROM_ABI void
 __libcpp_atomic_wait(__cxx_atomic_contention_t const volatile*, __cxx_contention_t) _NOEXCEPT;
+#    endif // !_LIBCPP_AVAILABILITY_HAS_NEW_SYNC
+
+// new dylib interface
+
+// return the global contention state's current value for the address
+_LIBCPP_AVAILABILITY_NEW_SYNC _LIBCPP_EXPORTED_FROM_ABI __cxx_contention_t
+__atomic_monitor_global(void const* __address) _NOEXCEPT;
+
+// wait on the global contention state to be changed from the given value for the address
+_LIBCPP_AVAILABILITY_NEW_SYNC _LIBCPP_EXPORTED_FROM_ABI void
+__atomic_wait_global_table(void const* __address, __cxx_contention_t __monitor_value) _NOEXCEPT;
+
+// notify one waiter waiting on the global contention state for the address
+_LIBCPP_AVAILABILITY_NEW_SYNC _LIBCPP_EXPORTED_FROM_ABI void __atomic_notify_one_global_table(void const*) _NOEXCEPT;
+
+// notify all waiters waiting on the global contention state for the address
+_LIBCPP_AVAILABILITY_NEW_SYNC _LIBCPP_EXPORTED_FROM_ABI void __atomic_notify_all_global_table(void const*) _NOEXCEPT;
+
+// wait on the address directly with the native platform wait
+template <std::size_t _Size>
+_LIBCPP_AVAILABILITY_NEW_SYNC _LIBCPP_EXPORTED_FROM_ABI void
+__atomic_wait_native(void const* __address, void const* __old_value) _NOEXCEPT;
+
+// notify one waiter waiting on the address directly with the native platform wait
+template <std::size_t _Size>
+_LIBCPP_AVAILABILITY_NEW_SYNC _LIBCPP_EXPORTED_FROM_ABI void __atomic_notify_one_native(const void*) _NOEXCEPT;
+
+// notify all waiters waiting on the address directly with the native platform wait
+template <std::size_t _Size>
+_LIBCPP_AVAILABILITY_NEW_SYNC _LIBCPP_EXPORTED_FROM_ABI void __atomic_notify_all_native(const void*) _NOEXCEPT;
+
+#    if _LIBCPP_AVAILABILITY_HAS_NEW_SYNC
+
+template <class _AtomicWaitable, class _Poll>
+struct __atomic_wait_backoff_impl {
+  const _AtomicWaitable& __a_;
+  _Poll __poll_;
+  memory_order __order_;
+
+  using __waitable_traits _LIBCPP_NODEBUG = __atomic_waitable_traits<__decay_t<_AtomicWaitable> >;
+  using __value_type _LIBCPP_NODEBUG      = typename __waitable_traits::__value_type;
+
+  _LIBCPP_HIDE_FROM_ABI __backoff_results operator()(chrono::nanoseconds __elapsed) const {
+    if (__elapsed > chrono::microseconds(4)) {
+      auto __contention_address = const_cast<const void*>(
+          static_cast<const volatile void*>(__waitable_traits::__atomic_contention_address(__a_)));
+
+      if constexpr (__has_native_atomic_wait<__value_type>) {
+        auto __atomic_value = __waitable_traits::__atomic_load(__a_, __order_);
+        if (__poll_(__atomic_value))
+          return __backoff_results::__poll_success;
+        std::__atomic_wait_native<sizeof(__value_type)>(__contention_address, std::addressof(__atomic_value));
+      } else {
+        __cxx_contention_t __monitor_val = std::__atomic_monitor_global(__contention_address);
+        auto __atomic_value              = __waitable_traits::__atomic_load(__a_, __order_);
+        if (__poll_(__atomic_value))
+          return __backoff_results::__poll_success;
+        std::__atomic_wait_global_table(__contention_address, __monitor_val);
+      }
+    } else {
+    } // poll
+    return __backoff_results::__continue_poll;
+  }
+};
+
+#    else // _LIBCPP_AVAILABILITY_HAS_NEW_SYNC
 
 template <class _AtomicWaitable, class _Poll>
 struct __atomic_wait_backoff_impl {
@@ -82,7 +123,6 @@ struct __atomic_wait_backoff_impl {
 
   using __waitable_traits _LIBCPP_NODEBUG = __atomic_waitable_traits<__decay_t<_AtomicWaitable> >;
 
-  _LIBCPP_AVAILABILITY_SYNC
   _LIBCPP_HIDE_FROM_ABI bool
   __update_monitor_val_and_poll(__cxx_atomic_contention_t const volatile*, __cxx_contention_t& __monitor_val) const {
     // In case the contention type happens to be __cxx_atomic_contention_t, i.e. __cxx_atomic_impl<int64_t>,
@@ -95,7 +135,6 @@ struct __atomic_wait_backoff_impl {
     return __poll_(__monitor_val);
   }
 
-  _LIBCPP_AVAILABILITY_SYNC
   _LIBCPP_HIDE_FROM_ABI bool
   __update_monitor_val_and_poll(void const volatile* __contention_address, __cxx_contention_t& __monitor_val) const {
     // In case the contention type is anything else, platform wait is monitoring a __cxx_atomic_contention_t
@@ -105,20 +144,21 @@ struct __atomic_wait_backoff_impl {
     return __poll_(__current_val);
   }
 
-  _LIBCPP_AVAILABILITY_SYNC
-  _LIBCPP_HIDE_FROM_ABI bool operator()(chrono::nanoseconds __elapsed) const {
+  _LIBCPP_HIDE_FROM_ABI __backoff_results operator()(chrono::nanoseconds __elapsed) const {
     if (__elapsed > chrono::microseconds(4)) {
       auto __contention_address = __waitable_traits::__atomic_contention_address(__a_);
       __cxx_contention_t __monitor_val;
       if (__update_monitor_val_and_poll(__contention_address, __monitor_val))
-        return true;
+        return __backoff_results::__poll_success;
       std::__libcpp_atomic_wait(__contention_address, __monitor_val);
     } else {
     } // poll
-    return false;
+    return __backoff_results::__continue_poll;
   }
 };
 
+#    endif // _LIBCPP_AVAILABILITY_HAS_NEW_SYNC
+
 // The semantics of this function are similar to `atomic`'s
 // `.wait(T old, std::memory_order order)`, but instead of having a hardcoded
 // predicate (is the loaded value unequal to `old`?), the predicate function is
@@ -128,9 +168,8 @@ struct __atomic_wait_backoff_impl {
 // `false`, it must set the argument to its current understanding of the atomic
 // value. The predicate function must not return `false` spuriously.
 template <class _AtomicWaitable, class _Poll>
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void
-__atomic_wait_unless(const _AtomicWaitable& __a, memory_order __order, _Poll&& __poll) {
-  static_assert(__atomic_waitable<_AtomicWaitable>::value, "");
+_LIBCPP_HIDE_FROM_ABI void __atomic_wait_unless(const _AtomicWaitable& __a, memory_order __order, _Poll&& __poll) {
+  static_assert(__atomic_waitable<_AtomicWaitable>);
   __atomic_wait_backoff_impl<_AtomicWaitable, __decay_t<_Poll> > __backoff_fn = {__a, __poll, __order};
   std::__libcpp_thread_poll_with_backoff(
       /* poll */
@@ -141,18 +180,52 @@ __atomic_wait_unless(const _AtomicWaitable& __a, memory_order __order, _Poll&& _
       /* backoff */ __backoff_fn);
 }
 
+#    if _LIBCPP_AVAILABILITY_HAS_NEW_SYNC
+
 template <class _AtomicWaitable>
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void __atomic_notify_one(const _AtomicWaitable& __a) {
-  static_assert(__atomic_waitable<_AtomicWaitable>::value, "");
+_LIBCPP_HIDE_FROM_ABI void __atomic_notify_one(const _AtomicWaitable& __a) {
+  static_assert(__atomic_waitable<_AtomicWaitable>);
+  using __value_type _LIBCPP_NODEBUG = typename __atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__value_type;
+  using __waitable_traits _LIBCPP_NODEBUG = __atomic_waitable_traits<__decay_t<_AtomicWaitable> >;
+  auto __contention_address =
+      const_cast<const void*>(static_cast<const volatile void*>(__waitable_traits::__atomic_contention_address(__a)));
+  if constexpr (__has_native_atomic_wait<__value_type>) {
+    std::__atomic_notify_one_native<sizeof(__value_type)>(__contention_address);
+  } else {
+    std::__atomic_notify_one_global_table(__contention_address);
+  }
+}
+
+template <class _AtomicWaitable>
+_LIBCPP_HIDE_FROM_ABI void __atomic_notify_all(const _AtomicWaitable& __a) {
+  static_assert(__atomic_waitable<_AtomicWaitable>);
+  using __value_type _LIBCPP_NODEBUG = typename __atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__value_type;
+  using __waitable_traits _LIBCPP_NODEBUG = __atomic_waitable_traits<__decay_t<_AtomicWaitable> >;
+  auto __contention_address =
+      const_cast<const void*>(static_cast<const volatile void*>(__waitable_traits::__atomic_contention_address(__a)));
+  if constexpr (__has_native_atomic_wait<__value_type>) {
+    std::__atomic_notify_all_native<sizeof(__value_type)>(__contention_address);
+  } else {
+    std::__atomic_notify_all_global_table(__contention_address);
+  }
+}
+
+#    else // _LIBCPP_AVAILABILITY_HAS_NEW_SYNC
+
+template <class _AtomicWaitable>
+_LIBCPP_HIDE_FROM_ABI void __atomic_notify_one(const _AtomicWaitable& __a) {
+  static_assert(__atomic_waitable<_AtomicWaitable>);
   std::__cxx_atomic_notify_one(__atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__atomic_contention_address(__a));
 }
 
 template <class _AtomicWaitable>
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void __atomic_notify_all(const _AtomicWaitable& __a) {
-  static_assert(__atomic_waitable<_AtomicWaitable>::value, "");
+_LIBCPP_HIDE_FROM_ABI void __atomic_notify_all(const _AtomicWaitable& __a) {
+  static_assert(__atomic_waitable<_AtomicWaitable>);
   std::__cxx_atomic_notify_all(__atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__atomic_contention_address(__a));
 }
 
+#    endif
+
 #  else // _LIBCPP_HAS_THREADS
 
 template <class _AtomicWaitable, class _Poll>
@@ -180,9 +253,8 @@ _LIBCPP_HIDE_FROM_ABI bool __cxx_nonatomic_compare_equal(_Tp const& __lhs, _Tp c
 }
 
 template <class _AtomicWaitable, class _Tp>
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void
-__atomic_wait(_AtomicWaitable& __a, _Tp __val, memory_order __order) {
-  static_assert(__atomic_waitable<_AtomicWaitable>::value, "");
+_LIBCPP_HIDE_FROM_ABI void __atomic_wait(_AtomicWaitable& __a, _Tp __val, memory_order __order) {
+  static_assert(__atomic_waitable<_AtomicWaitable>);
   std::__atomic_wait_unless(__a, __order, [&](_Tp const& __current) {
     return !std::__cxx_nonatomic_compare_equal(__current, __val);
   });
diff --git a/lib/libcxx/include/__atomic/atomic_sync_timed.h b/lib/libcxx/include/__atomic/atomic_sync_timed.h
new file mode 100644
index 0000000000..7daff73db7
--- /dev/null
+++ b/lib/libcxx/include/__atomic/atomic_sync_timed.h
@@ -0,0 +1,144 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___ATOMIC_ATOMIC_SYNC_TIMED_H
+#define _LIBCPP___ATOMIC_ATOMIC_SYNC_TIMED_H
+
+#include <__atomic/atomic_waitable_traits.h>
+#include <__atomic/contention_t.h>
+#include <__atomic/memory_order.h>
+#include <__atomic/to_gcc_order.h>
+#include <__chrono/duration.h>
+#include <__config>
+#include <__memory/addressof.h>
+#include <__thread/poll_with_backoff.h>
+#include <__thread/timed_backoff_policy.h>
+#include <__type_traits/conjunction.h>
+#include <__type_traits/decay.h>
+#include <__type_traits/has_unique_object_representation.h>
+#include <__type_traits/invoke.h>
+#include <__type_traits/is_same.h>
+#include <__type_traits/is_trivially_copyable.h>
+#include <__type_traits/void_t.h>
+#include <__utility/declval.h>
+#include <cstdint>
+#include <cstring>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER >= 20
+#  if _LIBCPP_HAS_THREADS && _LIBCPP_AVAILABILITY_HAS_NEW_SYNC
+
+_LIBCPP_AVAILABILITY_NEW_SYNC
+_LIBCPP_EXPORTED_FROM_ABI __cxx_contention_t __atomic_monitor_global(void const* __address) _NOEXCEPT;
+
+// wait on the global contention state to be changed from the given value for the address
+_LIBCPP_AVAILABILITY_NEW_SYNC _LIBCPP_EXPORTED_FROM_ABI void __atomic_wait_global_table_with_timeout(
+    void const* __address, __cxx_contention_t __monitor_value, uint64_t __timeout_ns) _NOEXCEPT;
+
+// wait on the address directly with the native platform wait
+template <std::size_t _Size>
+_LIBCPP_AVAILABILITY_NEW_SYNC _LIBCPP_EXPORTED_FROM_ABI void
+__atomic_wait_native_with_timeout(void const* __address, void const* __old_value, uint64_t __timeout_ns) _NOEXCEPT;
+
+template <class _AtomicWaitable, class _Poll, class _Rep, class _Period>
+struct __atomic_wait_timed_backoff_impl {
+  const _AtomicWaitable& __a_;
+  _Poll __poll_;
+  memory_order __order_;
+  chrono::duration<_Rep, _Period> __rel_time_;
+
+  using __waitable_traits _LIBCPP_NODEBUG = __atomic_waitable_traits<__decay_t<_AtomicWaitable> >;
+  using __value_type _LIBCPP_NODEBUG      = typename __waitable_traits::__value_type;
+
+  _LIBCPP_HIDE_FROM_ABI __backoff_results operator()(chrono::nanoseconds __elapsed) const {
+    if (__elapsed > chrono::microseconds(4)) {
+      auto __contention_address = const_cast<const void*>(
+          static_cast<const volatile void*>(__waitable_traits::__atomic_contention_address(__a_)));
+
+      uint64_t __timeout_ns =
+          static_cast<uint64_t>((chrono::duration_cast<chrono::nanoseconds>(__rel_time_) - __elapsed).count());
+
+      if constexpr (__has_native_atomic_wait<__value_type>) {
+        auto __atomic_value = __waitable_traits::__atomic_load(__a_, __order_);
+        if (__poll_(__atomic_value))
+          return __backoff_results::__poll_success;
+        std::__atomic_wait_native_with_timeout<sizeof(__value_type)>(
+            __contention_address, std::addressof(__atomic_value), __timeout_ns);
+      } else {
+        __cxx_contention_t __monitor_val = std::__atomic_monitor_global(__contention_address);
+        auto __atomic_value              = __waitable_traits::__atomic_load(__a_, __order_);
+        if (__poll_(__atomic_value))
+          return __backoff_results::__poll_success;
+        std::__atomic_wait_global_table_with_timeout(__contention_address, __monitor_val, __timeout_ns);
+      }
+    } else {
+    } // poll
+    return __backoff_results::__continue_poll;
+  }
+};
+
+// The semantics of this function are similar to `atomic`'s
+// `.wait(T old, std::memory_order order)` with a timeout, but instead of having a hardcoded
+// predicate (is the loaded value unequal to `old`?), the predicate function is
+// specified as an argument. The loaded value is given as an in-out argument to
+// the predicate. If the predicate function returns `true`,
+// `__atomic_wait_unless_with_timeout` will return. If the predicate function returns
+// `false`, it must set the argument to its current understanding of the atomic
+// value. The predicate function must not return `false` spuriously.
+template <class _AtomicWaitable, class _Poll, class _Rep, class _Period>
+_LIBCPP_HIDE_FROM_ABI bool __atomic_wait_unless_with_timeout(
+    const _AtomicWaitable& __a,
+    memory_order __order,
+    _Poll&& __poll,
+    chrono::duration<_Rep, _Period> const& __rel_time) {
+  static_assert(__atomic_waitable<_AtomicWaitable>, "");
+  __atomic_wait_timed_backoff_impl<_AtomicWaitable, __decay_t<_Poll>, _Rep, _Period> __backoff_fn = {
+      __a, __poll, __order, __rel_time};
+  auto __poll_result = std::__libcpp_thread_poll_with_backoff(
+      /* poll */
+      [&]() {
+        auto __current_val = __atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__atomic_load(__a, __order);
+        return __poll(__current_val);
+      },
+      /* backoff */ __backoff_fn,
+      __rel_time);
+
+  return __poll_result == __poll_with_backoff_results::__poll_success;
+}
+
+#  elif _LIBCPP_HAS_THREADS // _LIBCPP_HAS_THREADS && _LIBCPP_AVAILABILITY_HAS_NEW_SYNC
+
+template <class _AtomicWaitable, class _Poll, class _Rep, class _Period>
+_LIBCPP_HIDE_FROM_ABI bool __atomic_wait_unless_with_timeout(
+    const _AtomicWaitable& __a,
+    memory_order __order,
+    _Poll&& __poll,
+    chrono::duration<_Rep, _Period> const& __rel_time) {
+  auto __res = std::__libcpp_thread_poll_with_backoff(
+      /* poll */
+      [&]() {
+        auto __current_val = __atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__atomic_load(__a, __order);
+        return __poll(__current_val);
+      },
+      /* backoff */ __libcpp_timed_backoff_policy(),
+      __rel_time);
+  return __res == __poll_with_backoff_results::__poll_success;
+}
+
+#  endif // _LIBCPP_HAS_THREADS && _LIBCPP_AVAILABILITY_HAS_NEW_SYNC
+
+#endif // C++20
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___ATOMIC_ATOMIC_SYNC_TIMED_H
diff --git a/lib/libcxx/include/__atomic/atomic_waitable_traits.h b/lib/libcxx/include/__atomic/atomic_waitable_traits.h
new file mode 100644
index 0000000000..849c33122b
--- /dev/null
+++ b/lib/libcxx/include/__atomic/atomic_waitable_traits.h
@@ -0,0 +1,103 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___ATOMIC_ATOMIC_WAITABLE_TRAITS_H
+#define _LIBCPP___ATOMIC_ATOMIC_WAITABLE_TRAITS_H
+
+#include <__atomic/contention_t.h>
+#include <__atomic/memory_order.h>
+#include <__config>
+#include <__type_traits/decay.h>
+#include <__type_traits/has_unique_object_representation.h>
+#include <__type_traits/is_same.h>
+#include <__type_traits/is_trivially_copyable.h>
+#include <__type_traits/void_t.h>
+#include <__utility/declval.h>
+#include <cstring>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER >= 20
+
+// The customisation points to enable the following functions:
+// - __atomic_wait
+// - __atomic_wait_unless
+// - __atomic_notify_one
+// - __atomic_notify_all
+template <class _Tp, class = void>
+struct __atomic_waitable_traits {
+  using __value_type _LIBCPP_NODEBUG = void;
+
+  template <class _AtomicWaitable>
+  static void __atomic_load(_AtomicWaitable&&, memory_order) = delete;
+
+  template <class _AtomicWaitable>
+  static void __atomic_contention_address(_AtomicWaitable&&) = delete;
+};
+
+template <class _Tp>
+concept __atomic_waitable = requires(const _Tp __t, memory_order __order) {
+  typename __atomic_waitable_traits<__decay_t<_Tp> >::__value_type;
+  { __atomic_waitable_traits<__decay_t<_Tp> >::__atomic_load(__t, __order) };
+  { __atomic_waitable_traits<__decay_t<_Tp> >::__atomic_contention_address(__t) };
+};
+
+#  ifdef __linux__
+#    define _LIBCPP_NATIVE_PLATFORM_WAIT_SIZES(_APPLY) _APPLY(4)
+#  elif defined(__APPLE__)
+#    define _LIBCPP_NATIVE_PLATFORM_WAIT_SIZES(_APPLY)                                                                 \
+      _APPLY(4)                                                                                                        \
+      _APPLY(8)
+#  elif defined(__FreeBSD__) && __SIZEOF_LONG__ == 8
+#    define _LIBCPP_NATIVE_PLATFORM_WAIT_SIZES(_APPLY) _APPLY(8)
+#  elif defined(_WIN32)
+#    define _LIBCPP_NATIVE_PLATFORM_WAIT_SIZES(_APPLY) _APPLY(8)
+#  else
+#    define _LIBCPP_NATIVE_PLATFORM_WAIT_SIZES(_APPLY) _APPLY(sizeof(__cxx_contention_t))
+#  endif // __linux__
+
+// concepts defines the types are supported natively by the platform's wait
+
+#  if defined(_LIBCPP_ABI_ATOMIC_WAIT_NATIVE_BY_SIZE)
+
+template <class _Tp>
+_LIBCPP_HIDE_FROM_ABI constexpr bool __has_native_atomic_wait_impl() {
+  if (alignof(_Tp) % sizeof(_Tp) != 0)
+    return false;
+  switch (sizeof(_Tp)) {
+#    define _LIBCPP_MAKE_CASE(n)                                                                                       \
+    case n:                                                                                                            \
+      return true;
+    _LIBCPP_NATIVE_PLATFORM_WAIT_SIZES(_LIBCPP_MAKE_CASE)
+  default:
+    return false;
+#    undef _LIBCPP_MAKE_CASE
+  };
+}
+
+template <class _Tp>
+concept __has_native_atomic_wait =
+    has_unique_object_representations_v<_Tp> && is_trivially_copyable_v<_Tp> &&
+    std::__has_native_atomic_wait_impl<_Tp>();
+
+#  else // _LIBCPP_ABI_ATOMIC_WAIT_NATIVE_BY_SIZE
+
+template <class _Tp>
+concept __has_native_atomic_wait = is_same_v<_Tp, __cxx_contention_t>;
+
+#  endif // _LIBCPP_ABI_ATOMIC_WAIT_NATIVE_BY_SIZE
+
+#endif // C++20
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___ATOMIC_ATOMIC_WAITABLE_TRAITS_H
diff --git a/lib/libcxx/include/__atomic/contention_t.h b/lib/libcxx/include/__atomic/contention_t.h
index 5b42a0125f..b7e370439e 100644
--- a/lib/libcxx/include/__atomic/contention_t.h
+++ b/lib/libcxx/include/__atomic/contention_t.h
@@ -19,11 +19,35 @@
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-#if defined(__linux__) || (defined(_AIX) && !defined(__64BIT__))
+// The original definition of `__cxx_contention_t` seemed a bit arbitrary.
+// When we enable the _LIBCPP_ABI_ATOMIC_WAIT_NATIVE_BY_SIZE ABI,
+// use definitions that are based on what the underlying platform supports
+// instead.
+#if defined(_LIBCPP_ABI_ATOMIC_WAIT_NATIVE_BY_SIZE)
+
+#  ifdef __linux__
 using __cxx_contention_t _LIBCPP_NODEBUG = int32_t;
-#else
+#  elif defined(__APPLE__)
 using __cxx_contention_t _LIBCPP_NODEBUG = int64_t;
-#endif // __linux__ || (_AIX && !__64BIT__)
+#  elif defined(__FreeBSD__) && __SIZEOF_LONG__ == 8
+using __cxx_contention_t _LIBCPP_NODEBUG = int64_t;
+#  elif defined(_AIX) && !defined(__64BIT__)
+using __cxx_contention_t _LIBCPP_NODEBUG = int32_t;
+#  elif defined(_WIN32)
+using __cxx_contention_t _LIBCPP_NODEBUG = int64_t;
+#  else
+using __cxx_contention_t _LIBCPP_NODEBUG = int64_t;
+#  endif // __linux__
+
+#else // _LIBCPP_ABI_ATOMIC_WAIT_NATIVE_BY_SIZE
+
+#  if defined(__linux__) || (defined(_AIX) && !defined(__64BIT__))
+using __cxx_contention_t _LIBCPP_NODEBUG = int32_t;
+#  else
+using __cxx_contention_t _LIBCPP_NODEBUG = int64_t;
+#  endif // __linux__ || (_AIX && !__64BIT__)
+
+#endif // _LIBCPP_ABI_ATOMIC_WAIT_NATIVE_BY_SIZE
 
 using __cxx_atomic_contention_t _LIBCPP_NODEBUG = __cxx_atomic_impl<__cxx_contention_t>;
 
diff --git a/lib/libcxx/include/__atomic/floating_point_helper.h b/lib/libcxx/include/__atomic/floating_point_helper.h
new file mode 100644
index 0000000000..8762ec234b
--- /dev/null
+++ b/lib/libcxx/include/__atomic/floating_point_helper.h
@@ -0,0 +1,55 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___ATOMIC_FLOATING_POINT_HELPER_H
+#define _LIBCPP___ATOMIC_FLOATING_POINT_HELPER_H
+
+#include <__config>
+#include <__type_traits/is_floating_point.h>
+#include <__type_traits/is_same.h>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER >= 20
+
+template <class _Tp>
+_LIBCPP_HIDE_FROM_ABI constexpr bool __is_fp80_long_double() {
+  // Only x87-fp80 long double has 64-bit mantissa
+  return __LDBL_MANT_DIG__ == 64 && std::is_same_v<_Tp, long double>;
+}
+
+template <class _Tp>
+_LIBCPP_HIDE_FROM_ABI constexpr bool __has_rmw_builtin() {
+  static_assert(std::is_floating_point_v<_Tp>);
+#  ifndef _LIBCPP_COMPILER_CLANG_BASED
+  return false;
+#  else
+  // The builtin __cxx_atomic_fetch_add errors during compilation for
+  // long double on platforms with fp80 format.
+  // For more details, see
+  // lib/Sema/SemaChecking.cpp function IsAllowedValueType
+  // LLVM Parser does not allow atomicrmw with x86_fp80 type.
+  // if (ValType->isSpecificBuiltinType(BuiltinType::LongDouble) &&
+  //    &Context.getTargetInfo().getLongDoubleFormat() ==
+  //        &llvm::APFloat::x87DoubleExtended())
+  // For more info
+  // https://llvm.org/PR68602
+  // https://reviews.llvm.org/D53965
+  return !std::__is_fp80_long_double<_Tp>();
+#  endif
+}
+
+#endif
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___ATOMIC_FLOATING_POINT_HELPER_H
diff --git a/lib/libcxx/include/__bit/countl.h b/lib/libcxx/include/__bit/countl.h
index 0759140208..c95828f58d 100644
--- a/lib/libcxx/include/__bit/countl.h
+++ b/lib/libcxx/include/__bit/countl.h
@@ -24,7 +24,6 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 template <class _Tp>
 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countl_zero(_Tp __t) _NOEXCEPT {
-  static_assert(__is_unsigned_integer_v<_Tp>, "__countl_zero requires an unsigned integer type");
   return __builtin_clzg(__t, numeric_limits<_Tp>::digits);
 }
 
@@ -37,7 +36,7 @@ template <__unsigned_integer _Tp>
 
 template <__unsigned_integer _Tp>
 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr int countl_one(_Tp __t) noexcept {
-  return __t != numeric_limits<_Tp>::max() ? std::countl_zero(static_cast<_Tp>(~__t)) : numeric_limits<_Tp>::digits;
+  return std::countl_zero(static_cast<_Tp>(~__t));
 }
 
 #endif // _LIBCPP_STD_VER >= 20
diff --git a/lib/libcxx/include/__bit/countr.h b/lib/libcxx/include/__bit/countr.h
index f6c98695d3..16f689d6da 100644
--- a/lib/libcxx/include/__bit/countr.h
+++ b/lib/libcxx/include/__bit/countr.h
@@ -24,7 +24,6 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 template <class _Tp>
 [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __countr_zero(_Tp __t) _NOEXCEPT {
-  static_assert(__is_unsigned_integer_v<_Tp>, "__countr_zero only works with unsigned types");
   return __builtin_ctzg(__t, numeric_limits<_Tp>::digits);
 }
 
@@ -37,7 +36,7 @@ template <__unsigned_integer _Tp>
 
 template <__unsigned_integer _Tp>
 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr int countr_one(_Tp __t) noexcept {
-  return __t != numeric_limits<_Tp>::max() ? std::countr_zero(static_cast<_Tp>(~__t)) : numeric_limits<_Tp>::digits;
+  return std::countr_zero(static_cast<_Tp>(~__t));
 }
 
 #endif // _LIBCPP_STD_VER >= 20
diff --git a/lib/libcxx/include/__bit/has_single_bit.h b/lib/libcxx/include/__bit/has_single_bit.h
index b43e69323e..c49c518f2b 100644
--- a/lib/libcxx/include/__bit/has_single_bit.h
+++ b/lib/libcxx/include/__bit/has_single_bit.h
@@ -25,7 +25,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 template <__unsigned_integer _Tp>
 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool has_single_bit(_Tp __t) noexcept {
-  return __t != 0 && (((__t & (__t - 1)) == 0));
+  return __builtin_popcountg(__t) == 1;
 }
 
 _LIBCPP_END_NAMESPACE_STD
diff --git a/lib/libcxx/include/__bit/popcount.h b/lib/libcxx/include/__bit/popcount.h
index 8d9ba09938..d104c8e8f0 100644
--- a/lib/libcxx/include/__bit/popcount.h
+++ b/lib/libcxx/include/__bit/popcount.h
@@ -23,7 +23,6 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 template <class _Tp>
 [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __popcount(_Tp __t) _NOEXCEPT {
-  static_assert(__is_unsigned_integer_v<_Tp>, "__popcount only works with unsigned types");
   return __builtin_popcountg(__t);
 }
 
diff --git a/lib/libcxx/include/__bit/rotate.h b/lib/libcxx/include/__bit/rotate.h
index c6f34bdaf6..fde9058887 100644
--- a/lib/libcxx/include/__bit/rotate.h
+++ b/lib/libcxx/include/__bit/rotate.h
@@ -22,46 +22,35 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 // Writing two full functions for rotl and rotr makes it easier for the compiler
 // to optimize the code. On x86 this function becomes the ROL instruction and
 // the rotr function becomes the ROR instruction.
-template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp __rotl(_Tp __x, int __s) _NOEXCEPT {
-  static_assert(__is_unsigned_integer_v<_Tp>, "__rotl requires an unsigned integer type");
-  const int __n = numeric_limits<_Tp>::digits;
-  int __r       = __s % __n;
-
-  if (__r == 0)
-    return __x;
-
-  if (__r > 0)
-    return (__x << __r) | (__x >> (__n - __r));
-
-  return (__x >> -__r) | (__x << (__n + __r));
-}
-
-template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp __rotr(_Tp __x, int __s) _NOEXCEPT {
-  static_assert(__is_unsigned_integer_v<_Tp>, "__rotr requires an unsigned integer type");
-  const int __n = numeric_limits<_Tp>::digits;
-  int __r       = __s % __n;
-
-  if (__r == 0)
-    return __x;
-
-  if (__r > 0)
-    return (__x >> __r) | (__x << (__n - __r));
-
-  return (__x << -__r) | (__x >> (__n + __r));
-}
 
 #if _LIBCPP_STD_VER >= 20
 
 template <__unsigned_integer _Tp>
 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp rotl(_Tp __t, int __cnt) noexcept {
-  return std::__rotl(__t, __cnt);
+  const int __n = numeric_limits<_Tp>::digits;
+  int __r       = __cnt % __n;
+
+  if (__r == 0)
+    return __t;
+
+  if (__r > 0)
+    return (__t << __r) | (__t >> (__n - __r));
+
+  return (__t >> -__r) | (__t << (__n + __r));
 }
 
 template <__unsigned_integer _Tp>
 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp rotr(_Tp __t, int __cnt) noexcept {
-  return std::__rotr(__t, __cnt);
+  const int __n = numeric_limits<_Tp>::digits;
+  int __r       = __cnt % __n;
+
+  if (__r == 0)
+    return __t;
+
+  if (__r > 0)
+    return (__t >> __r) | (__t << (__n - __r));
+
+  return (__t << -__r) | (__t >> (__n + __r));
 }
 
 #endif // _LIBCPP_STD_VER >= 20
diff --git a/lib/libcxx/include/__bit_reference b/lib/libcxx/include/__bit_reference
index a3e6defd40..8daf3a2baa 100644
--- a/lib/libcxx/include/__bit_reference
+++ b/lib/libcxx/include/__bit_reference
@@ -15,8 +15,10 @@
 #include <__algorithm/copy_backward.h>
 #include <__algorithm/copy_n.h>
 #include <__algorithm/equal.h>
+#include <__algorithm/fill_n.h>
 #include <__algorithm/min.h>
 #include <__algorithm/rotate.h>
+#include <__algorithm/specialized_algorithms.h>
 #include <__algorithm/swap_ranges.h>
 #include <__assert>
 #include <__bit/countr.h>
@@ -137,7 +139,7 @@ public:
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 operator bool() const _NOEXCEPT {
     return static_cast<bool>(*__seg_ & __mask_);
   }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool operator~() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool operator~() const _NOEXCEPT {
     return !static_cast<bool>(*this);
   }
 
@@ -307,6 +309,15 @@ public:
   {
   }
 
+#ifdef _LIBCPP_ABI_TRIVIALLY_COPYABLE_BIT_ITERATOR
+  template <bool _IsConstDep = _IsConst, __enable_if_t<_IsConstDep, int> = 0>
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator(const __bit_iterator<_Cp, false>& __it) _NOEXCEPT
+      : __seg_(__it.__seg_),
+        __ctz_(__it.__ctz_) {}
+
+  _LIBCPP_HIDE_FROM_ABI __bit_iterator(const __bit_iterator&)            = default;
+  _LIBCPP_HIDE_FROM_ABI __bit_iterator& operator=(const __bit_iterator&) = default;
+#else
   // When _IsConst=false, this is the copy constructor.
   // It is non-trivial. Making it trivial would break ABI.
   // When _IsConst=true, this is a converting constructor;
@@ -327,6 +338,7 @@ public:
     __ctz_ = __it.__ctz_;
     return *this;
   }
+#endif
 
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reference operator*() const _NOEXCEPT {
     _LIBCPP_ASSERT_INTERNAL(__ctz_ < __bits_per_word, "Dereferencing an invalid __bit_iterator.");
@@ -467,20 +479,6 @@ private:
   template <class _Dp>
   friend struct __bit_array;
 
-  template <bool _FillVal, class _Dp>
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 friend void
-  __fill_n_bool(__bit_iterator<_Dp, false> __first, typename __size_difference_type_traits<_Dp>::size_type __n);
-
-  template <class _Dp, bool _IC>
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, false> __copy_aligned(
-      __bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result);
-  template <class _Dp, bool _IC>
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, false> __copy_unaligned(
-      __bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result);
-  template <class _Dp, bool _IC>
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 friend pair<__bit_iterator<_Dp, _IC>, __bit_iterator<_Dp, false> >
-  __copy_impl::operator()(
-      __bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result) const;
   template <class _Dp, bool _IC>
   _LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, false> __copy_backward_aligned(
       __bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result);
@@ -511,10 +509,20 @@ private:
             bool _IsConst1,
             bool _IsConst2,
             class _BinaryPredicate,
-            __enable_if_t<__desugars_to_v<__equal_tag, _BinaryPredicate, bool, bool>, int> >
+            class _Proj1,
+            class _Proj2,
+            __enable_if_t<__is_identity<_Proj1>::value && __is_identity<_Proj2>::value &&
+                              __desugars_to_v<__equal_tag, _BinaryPredicate, bool, bool>,
+                          int> >
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 friend bool __equal_iter_impl(
-      __bit_iterator<_Dp, _IsConst1>, __bit_iterator<_Dp, _IsConst1>, __bit_iterator<_Dp, _IsConst2>, _BinaryPredicate);
-  template <class _Dp,
+      __bit_iterator<_Dp, _IsConst1>,
+      __bit_iterator<_Dp, _IsConst1>,
+      __bit_iterator<_Dp, _IsConst2>,
+      _BinaryPredicate,
+      _Proj1&,
+      _Proj2&);
+  template <bool,
+            class _Dp,
             bool _IsConst1,
             bool _IsConst2,
             class _Pred,
@@ -537,6 +545,187 @@ private:
   template <bool _ToCount, class _Dp, bool _IC>
   friend typename __bit_iterator<_Dp, _IC>::difference_type _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
   __count_bool(__bit_iterator<_Dp, _IC>, typename __size_difference_type_traits<_Dp>::size_type);
+
+  template <class, class...>
+  friend struct __specialized_algorithm;
+};
+
+template <class _Cp>
+struct __specialized_algorithm<_Algorithm::__fill_n, __single_iterator<__bit_iterator<_Cp, false> > > {
+  static const bool __has_algorithm = true;
+
+  template <bool _FillVal>
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 static void
+  __impl(__bit_iterator<_Cp, false> __first, typename __size_difference_type_traits<_Cp>::size_type __n) {
+    using _It            = __bit_iterator<_Cp, false>;
+    using __storage_type = typename _It::__storage_type;
+
+    const int __bits_per_word = _It::__bits_per_word;
+    // do first partial word
+    if (__first.__ctz_ != 0) {
+      __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_);
+      __storage_type __dn    = std::min(__clz_f, __n);
+      std::__fill_masked_range(std::__to_address(__first.__seg_), __clz_f - __dn, __first.__ctz_, _FillVal);
+      __n -= __dn;
+      ++__first.__seg_;
+    }
+    // do middle whole words
+    __storage_type __nw = __n / __bits_per_word;
+    std::__fill_n(std::__to_address(__first.__seg_), __nw, _FillVal ? static_cast<__storage_type>(-1) : 0);
+    __n -= __nw * __bits_per_word;
+    // do last partial word
+    if (__n > 0) {
+      __first.__seg_ += __nw;
+      std::__fill_masked_range(std::__to_address(__first.__seg_), __bits_per_word - __n, 0u, _FillVal);
+    }
+  }
+
+  template <class _Size, class _Tp>
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 static __bit_iterator<_Cp, false>
+  operator()(__bit_iterator<_Cp, false> __first, _Size __n, const _Tp& __value) {
+    if (__n > 0) {
+      if (__value)
+        __impl<true>(__first, __n);
+      else
+        __impl<false>(__first, __n);
+    }
+    return __first + __n;
+  }
+};
+
+template <class _Cp, bool _IsConst>
+struct __specialized_algorithm<_Algorithm::__copy,
+                               __iterator_pair<__bit_iterator<_Cp, _IsConst>, __bit_iterator<_Cp, _IsConst> >,
+                               __single_iterator<__bit_iterator<_Cp, false> > > {
+  static const bool __has_algorithm = true;
+
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 static __bit_iterator<_Cp, false>
+  __aligned_impl(__bit_iterator<_Cp, _IsConst> __first,
+                 __bit_iterator<_Cp, _IsConst> __last,
+                 __bit_iterator<_Cp, false> __result) {
+    using _In             = __bit_iterator<_Cp, _IsConst>;
+    using difference_type = typename _In::difference_type;
+    using __storage_type  = typename _In::__storage_type;
+
+    const int __bits_per_word = _In::__bits_per_word;
+    difference_type __n       = __last - __first;
+    if (__n > 0) {
+      // do first word
+      if (__first.__ctz_ != 0) {
+        unsigned __clz       = __bits_per_word - __first.__ctz_;
+        difference_type __dn = std::min(static_cast<difference_type>(__clz), __n);
+        __n -= __dn;
+        __storage_type __m = std::__middle_mask<__storage_type>(__clz - __dn, __first.__ctz_);
+        __storage_type __b = *__first.__seg_ & __m;
+        *__result.__seg_ &= ~__m;
+        *__result.__seg_ |= __b;
+        __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word;
+        __result.__ctz_ = static_cast<unsigned>((__dn + __result.__ctz_) % __bits_per_word);
+        ++__first.__seg_;
+        // __first.__ctz_ = 0;
+      }
+      // __first.__ctz_ == 0;
+      // do middle words
+      __storage_type __nw = __n / __bits_per_word;
+      std::copy(std::__to_address(__first.__seg_),
+                std::__to_address(__first.__seg_ + __nw),
+                std::__to_address(__result.__seg_));
+      __n -= __nw * __bits_per_word;
+      __result.__seg_ += __nw;
+      // do last word
+      if (__n > 0) {
+        __first.__seg_ += __nw;
+        __storage_type __m = std::__trailing_mask<__storage_type>(__bits_per_word - __n);
+        __storage_type __b = *__first.__seg_ & __m;
+        *__result.__seg_ &= ~__m;
+        *__result.__seg_ |= __b;
+        __result.__ctz_ = static_cast<unsigned>(__n);
+      }
+    }
+    return __result;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 static __bit_iterator<_Cp, false>
+  __unaligned_impl(__bit_iterator<_Cp, _IsConst> __first,
+                   __bit_iterator<_Cp, _IsConst> __last,
+                   __bit_iterator<_Cp, false> __result) {
+    using _In             = __bit_iterator<_Cp, _IsConst>;
+    using difference_type = typename _In::difference_type;
+    using __storage_type  = typename _In::__storage_type;
+
+    const int __bits_per_word = _In::__bits_per_word;
+    difference_type __n       = __last - __first;
+    if (__n > 0) {
+      // do first word
+      if (__first.__ctz_ != 0) {
+        unsigned __clz_f     = __bits_per_word - __first.__ctz_;
+        difference_type __dn = std::min(static_cast<difference_type>(__clz_f), __n);
+        __n -= __dn;
+        __storage_type __m   = std::__middle_mask<__storage_type>(__clz_f - __dn, __first.__ctz_);
+        __storage_type __b   = *__first.__seg_ & __m;
+        unsigned __clz_r     = __bits_per_word - __result.__ctz_;
+        __storage_type __ddn = std::min<__storage_type>(__dn, __clz_r);
+        __m                  = std::__middle_mask<__storage_type>(__clz_r - __ddn, __result.__ctz_);
+        *__result.__seg_ &= ~__m;
+        if (__result.__ctz_ > __first.__ctz_)
+          *__result.__seg_ |= __b << (__result.__ctz_ - __first.__ctz_);
+        else
+          *__result.__seg_ |= __b >> (__first.__ctz_ - __result.__ctz_);
+        __result.__seg_ += (__ddn + __result.__ctz_) / __bits_per_word;
+        __result.__ctz_ = static_cast<unsigned>((__ddn + __result.__ctz_) % __bits_per_word);
+        __dn -= __ddn;
+        if (__dn > 0) {
+          __m = std::__trailing_mask<__storage_type>(__bits_per_word - __dn);
+          *__result.__seg_ &= ~__m;
+          *__result.__seg_ |= __b >> (__first.__ctz_ + __ddn);
+          __result.__ctz_ = static_cast<unsigned>(__dn);
+        }
+        ++__first.__seg_;
+        // __first.__ctz_ = 0;
+      }
+      // __first.__ctz_ == 0;
+      // do middle words
+      unsigned __clz_r   = __bits_per_word - __result.__ctz_;
+      __storage_type __m = std::__leading_mask<__storage_type>(__result.__ctz_);
+      for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_) {
+        __storage_type __b = *__first.__seg_;
+        *__result.__seg_ &= ~__m;
+        *__result.__seg_ |= __b << __result.__ctz_;
+        ++__result.__seg_;
+        *__result.__seg_ &= __m;
+        *__result.__seg_ |= __b >> __clz_r;
+      }
+      // do last word
+      if (__n > 0) {
+        __m                 = std::__trailing_mask<__storage_type>(__bits_per_word - __n);
+        __storage_type __b  = *__first.__seg_ & __m;
+        __storage_type __dn = std::min(__n, static_cast<difference_type>(__clz_r));
+        __m                 = std::__middle_mask<__storage_type>(__clz_r - __dn, __result.__ctz_);
+        *__result.__seg_ &= ~__m;
+        *__result.__seg_ |= __b << __result.__ctz_;
+        __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word;
+        __result.__ctz_ = static_cast<unsigned>((__dn + __result.__ctz_) % __bits_per_word);
+        __n -= __dn;
+        if (__n > 0) {
+          __m = std::__trailing_mask<__storage_type>(__bits_per_word - __n);
+          *__result.__seg_ &= ~__m;
+          *__result.__seg_ |= __b >> __dn;
+          __result.__ctz_ = static_cast<unsigned>(__n);
+        }
+      }
+    }
+    return __result;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 static pair<__bit_iterator<_Cp, _IsConst>, __bit_iterator<_Cp, false> >
+  operator()(__bit_iterator<_Cp, _IsConst> __first,
+             __bit_iterator<_Cp, _IsConst> __last,
+             __bit_iterator<_Cp, false> __result) {
+    if (__first.__ctz_ == __result.__ctz_)
+      return std::make_pair(__last, __aligned_impl(__first, __last, __result));
+    return std::make_pair(__last, __unaligned_impl(__first, __last, __result));
+  }
 };
 
 _LIBCPP_END_NAMESPACE_STD
diff --git a/lib/libcxx/include/__charconv/from_chars_integral.h b/lib/libcxx/include/__charconv/from_chars_integral.h
index c1f033b37b..903e892cab 100644
--- a/lib/libcxx/include/__charconv/from_chars_integral.h
+++ b/lib/libcxx/include/__charconv/from_chars_integral.h
@@ -18,8 +18,8 @@
 #include <__memory/addressof.h>
 #include <__system_error/errc.h>
 #include <__type_traits/enable_if.h>
-#include <__type_traits/integral_constant.h>
 #include <__type_traits/is_integral.h>
+#include <__type_traits/is_signed.h>
 #include <__type_traits/is_unsigned.h>
 #include <__type_traits/make_unsigned.h>
 #include <limits>
diff --git a/lib/libcxx/include/__charconv/from_chars_result.h b/lib/libcxx/include/__charconv/from_chars_result.h
index a7bfd6530a..b4ecea3d11 100644
--- a/lib/libcxx/include/__charconv/from_chars_result.h
+++ b/lib/libcxx/include/__charconv/from_chars_result.h
@@ -21,7 +21,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 #if _LIBCPP_STD_VER >= 17
 
-struct _LIBCPP_EXPORTED_FROM_ABI from_chars_result {
+struct from_chars_result {
   const char* ptr;
   errc ec;
 #  if _LIBCPP_STD_VER >= 20
diff --git a/lib/libcxx/include/__charconv/to_chars_integral.h b/lib/libcxx/include/__charconv/to_chars_integral.h
index f10cc35668..6d42513926 100644
--- a/lib/libcxx/include/__charconv/to_chars_integral.h
+++ b/lib/libcxx/include/__charconv/to_chars_integral.h
@@ -24,6 +24,7 @@
 #include <__type_traits/integral_constant.h>
 #include <__type_traits/is_integral.h>
 #include <__type_traits/is_same.h>
+#include <__type_traits/is_signed.h>
 #include <__type_traits/make_32_64_or_128_bit.h>
 #include <__type_traits/make_unsigned.h>
 #include <__utility/unreachable.h>
diff --git a/lib/libcxx/include/__charconv/to_chars_result.h b/lib/libcxx/include/__charconv/to_chars_result.h
index 41dea4ab14..a3cd4e28d0 100644
--- a/lib/libcxx/include/__charconv/to_chars_result.h
+++ b/lib/libcxx/include/__charconv/to_chars_result.h
@@ -21,7 +21,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 #if _LIBCPP_STD_VER >= 17
 
-struct _LIBCPP_EXPORTED_FROM_ABI to_chars_result {
+struct to_chars_result {
   char* ptr;
   errc ec;
 #  if _LIBCPP_STD_VER >= 20
diff --git a/lib/libcxx/include/__charconv/traits.h b/lib/libcxx/include/__charconv/traits.h
index 9fd0092ca7..b8c840d1eb 100644
--- a/lib/libcxx/include/__charconv/traits.h
+++ b/lib/libcxx/include/__charconv/traits.h
@@ -113,31 +113,10 @@ struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t<sizeof(_Tp) == sizeof(__u
 };
 #  endif
 
-template <typename _Tp>
-inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool
-__mul_overflowed(unsigned char __a, _Tp __b, unsigned char& __r) {
-  auto __c = __a * __b;
-  __r      = __c;
-  return __c > numeric_limits<unsigned char>::max();
-}
-
-template <typename _Tp>
-inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool
-__mul_overflowed(unsigned short __a, _Tp __b, unsigned short& __r) {
-  auto __c = __a * __b;
-  __r      = __c;
-  return __c > numeric_limits<unsigned short>::max();
-}
-
-template <typename _Tp>
-inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool __mul_overflowed(_Tp __a, _Tp __b, _Tp& __r) {
-  static_assert(is_unsigned<_Tp>::value, "");
-  return __builtin_mul_overflow(__a, __b, std::addressof(__r));
-}
-
 template <typename _Tp, typename _Up>
-inline _LIBCPP_HIDE_FROM_ABI bool _LIBCPP_CONSTEXPR_SINCE_CXX23 __mul_overflowed(_Tp __a, _Up __b, _Tp& __r) {
-  return __itoa::__mul_overflowed(__a, static_cast<_Tp>(__b), __r);
+_LIBCPP_HIDE_FROM_ABI bool _LIBCPP_CONSTEXPR_SINCE_CXX23 __mul_overflowed(_Tp __a, _Up __b, _Tp& __r) {
+  static_assert(is_unsigned<_Tp>::value);
+  return __builtin_mul_overflow(__a, static_cast<_Tp>(__b), std::addressof(__r));
 }
 
 template <typename _Tp>
diff --git a/lib/libcxx/include/__chrono/day.h b/lib/libcxx/include/__chrono/day.h
index f5b14689a7..46822c5991 100644
--- a/lib/libcxx/include/__chrono/day.h
+++ b/lib/libcxx/include/__chrono/day.h
@@ -13,6 +13,8 @@
 #include <__chrono/duration.h>
 #include <__compare/ordering.h>
 #include <__config>
+#include <__cstddef/size_t.h>
+#include <__functional/hash.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
@@ -92,6 +94,15 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr day& day::operator-=(const days& __dd) no
 
 } // namespace chrono
 
+#  if _LIBCPP_STD_VER >= 26
+
+template <>
+struct hash<chrono::day> {
+  _LIBCPP_HIDE_FROM_ABI static size_t operator()(const chrono::day& __d) noexcept { return static_cast<unsigned>(__d); }
+};
+
+#  endif // _LIBCPP_STD_VER >= 26
+
 _LIBCPP_END_NAMESPACE_STD
 
 #endif // _LIBCPP_STD_VER >= 20
diff --git a/lib/libcxx/include/__chrono/duration.h b/lib/libcxx/include/__chrono/duration.h
index 57fa64d650..9313fc797e 100644
--- a/lib/libcxx/include/__chrono/duration.h
+++ b/lib/libcxx/include/__chrono/duration.h
@@ -13,6 +13,8 @@
 #include <__compare/ordering.h>
 #include <__compare/three_way_comparable.h>
 #include <__config>
+#include <__cstddef/size_t.h>
+#include <__functional/hash.h>
 #include <__type_traits/common_type.h>
 #include <__type_traits/enable_if.h>
 #include <__type_traits/is_convertible.h>
@@ -102,7 +104,8 @@ struct __duration_cast<_FromDuration, _ToDuration, _Period, false, false> {
 };
 
 template <class _ToDuration, class _Rep, class _Period, __enable_if_t<__is_duration_v<_ToDuration>, int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _ToDuration duration_cast(const duration<_Rep, _Period>& __fd) {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _ToDuration
+duration_cast(const duration<_Rep, _Period>& __fd) {
   return __duration_cast<duration<_Rep, _Period>, _ToDuration>()(__fd);
 }
 
@@ -117,14 +120,18 @@ inline constexpr bool treat_as_floating_point_v = treat_as_floating_point<_Rep>:
 template <class _Rep>
 struct duration_values {
 public:
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR _Rep zero() _NOEXCEPT { return _Rep(0); }
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR _Rep max() _NOEXCEPT { return numeric_limits<_Rep>::max(); }
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR _Rep min() _NOEXCEPT { return numeric_limits<_Rep>::lowest(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR _Rep zero() _NOEXCEPT { return _Rep(0); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR _Rep max() _NOEXCEPT {
+    return numeric_limits<_Rep>::max();
+  }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR _Rep min() _NOEXCEPT {
+    return numeric_limits<_Rep>::lowest();
+  }
 };
 
 #if _LIBCPP_STD_VER >= 17
 template <class _ToDuration, class _Rep, class _Period, enable_if_t<__is_duration_v<_ToDuration>, int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _ToDuration floor(const duration<_Rep, _Period>& __d) {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _ToDuration floor(const duration<_Rep, _Period>& __d) {
   _ToDuration __t = chrono::duration_cast<_ToDuration>(__d);
   if (__t > __d)
     __t = __t - _ToDuration{1};
@@ -132,7 +139,7 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _ToDuration floor(const duration<
 }
 
 template <class _ToDuration, class _Rep, class _Period, enable_if_t<__is_duration_v<_ToDuration>, int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _ToDuration ceil(const duration<_Rep, _Period>& __d) {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _ToDuration ceil(const duration<_Rep, _Period>& __d) {
   _ToDuration __t = chrono::duration_cast<_ToDuration>(__d);
   if (__t < __d)
     __t = __t + _ToDuration{1};
@@ -140,7 +147,7 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _ToDuration ceil(const duration<_
 }
 
 template <class _ToDuration, class _Rep, class _Period, enable_if_t<__is_duration_v<_ToDuration>, int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _ToDuration round(const duration<_Rep, _Period>& __d) {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _ToDuration round(const duration<_Rep, _Period>& __d) {
   _ToDuration __lower = chrono::floor<_ToDuration>(__d);
   _ToDuration __upper = __lower + _ToDuration{1};
   auto __lower_diff   = __d - __lower;
@@ -220,14 +227,14 @@ public:
 
   // observer
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR rep count() const { return __rep_; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR rep count() const { return __rep_; }
 
   // arithmetic
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR typename common_type<duration>::type operator+() const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR typename common_type<duration>::type operator+() const {
     return typename common_type<duration>::type(*this);
   }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR typename common_type<duration>::type operator-() const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR typename common_type<duration>::type operator-() const {
     return typename common_type<duration>::type(-__rep_);
   }
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 duration& operator++() {
@@ -269,13 +276,13 @@ public:
 
   // special values
 
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR duration zero() _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR duration zero() _NOEXCEPT {
     return duration(duration_values<rep>::zero());
   }
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR duration min() _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR duration min() _NOEXCEPT {
     return duration(duration_values<rep>::min());
   }
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR duration max() _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR duration max() _NOEXCEPT {
     return duration(duration_values<rep>::max());
   }
 };
@@ -389,7 +396,7 @@ operator<=>(const duration<_Rep1, _Period1>& __lhs, const duration<_Rep2, _Perio
 // Duration +
 
 template <class _Rep1, class _Period1, class _Rep2, class _Period2>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR
 typename common_type<duration<_Rep1, _Period1>, duration<_Rep2, _Period2> >::type
 operator+(const duration<_Rep1, _Period1>& __lhs, const duration<_Rep2, _Period2>& __rhs) {
   typedef typename common_type<duration<_Rep1, _Period1>, duration<_Rep2, _Period2> >::type _Cd;
@@ -399,7 +406,7 @@ operator+(const duration<_Rep1, _Period1>& __lhs, const duration<_Rep2, _Period2
 // Duration -
 
 template <class _Rep1, class _Period1, class _Rep2, class _Period2>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR
 typename common_type<duration<_Rep1, _Period1>, duration<_Rep2, _Period2> >::type
 operator-(const duration<_Rep1, _Period1>& __lhs, const duration<_Rep2, _Period2>& __rhs) {
   typedef typename common_type<duration<_Rep1, _Period1>, duration<_Rep2, _Period2> >::type _Cd;
@@ -412,7 +419,8 @@ template <class _Rep1,
           class _Period,
           class _Rep2,
           __enable_if_t<is_convertible<const _Rep2&, typename common_type<_Rep1, _Rep2>::type>::value, int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR duration<typename common_type<_Rep1, _Rep2>::type, _Period>
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI
+_LIBCPP_CONSTEXPR duration<typename common_type<_Rep1, _Rep2>::type, _Period>
 operator*(const duration<_Rep1, _Period>& __d, const _Rep2& __s) {
   typedef typename common_type<_Rep1, _Rep2>::type _Cr;
   typedef duration<_Cr, _Period> _Cd;
@@ -423,7 +431,8 @@ template <class _Rep1,
           class _Period,
           class _Rep2,
           __enable_if_t<is_convertible<const _Rep1&, typename common_type<_Rep1, _Rep2>::type>::value, int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR duration<typename common_type<_Rep1, _Rep2>::type, _Period>
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI
+_LIBCPP_CONSTEXPR duration<typename common_type<_Rep1, _Rep2>::type, _Period>
 operator*(const _Rep1& __s, const duration<_Rep2, _Period>& __d) {
   return __d * __s;
 }
@@ -436,7 +445,8 @@ template <class _Rep1,
           __enable_if_t<!__is_duration_v<_Rep2> &&
                             is_convertible<const _Rep2&, typename common_type<_Rep1, _Rep2>::type>::value,
                         int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR duration<typename common_type<_Rep1, _Rep2>::type, _Period>
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI
+_LIBCPP_CONSTEXPR duration<typename common_type<_Rep1, _Rep2>::type, _Period>
 operator/(const duration<_Rep1, _Period>& __d, const _Rep2& __s) {
   typedef typename common_type<_Rep1, _Rep2>::type _Cr;
   typedef duration<_Cr, _Period> _Cd;
@@ -444,7 +454,7 @@ operator/(const duration<_Rep1, _Period>& __d, const _Rep2& __s) {
 }
 
 template <class _Rep1, class _Period1, class _Rep2, class _Period2>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR typename common_type<_Rep1, _Rep2>::type
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR typename common_type<_Rep1, _Rep2>::type
 operator/(const duration<_Rep1, _Period1>& __lhs, const duration<_Rep2, _Period2>& __rhs) {
   typedef typename common_type<duration<_Rep1, _Period1>, duration<_Rep2, _Period2> >::type _Ct;
   return _Ct(__lhs).count() / _Ct(__rhs).count();
@@ -458,7 +468,8 @@ template <class _Rep1,
           __enable_if_t<!__is_duration_v<_Rep2> &&
                             is_convertible<const _Rep2&, typename common_type<_Rep1, _Rep2>::type>::value,
                         int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR duration<typename common_type<_Rep1, _Rep2>::type, _Period>
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI
+_LIBCPP_CONSTEXPR duration<typename common_type<_Rep1, _Rep2>::type, _Period>
 operator%(const duration<_Rep1, _Period>& __d, const _Rep2& __s) {
   typedef typename common_type<_Rep1, _Rep2>::type _Cr;
   typedef duration<_Cr, _Period> _Cd;
@@ -466,7 +477,7 @@ operator%(const duration<_Rep1, _Period>& __d, const _Rep2& __s) {
 }
 
 template <class _Rep1, class _Period1, class _Rep2, class _Period2>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR
 typename common_type<duration<_Rep1, _Period1>, duration<_Rep2, _Period2> >::type
 operator%(const duration<_Rep1, _Period1>& __lhs, const duration<_Rep2, _Period2>& __rhs) {
   typedef typename common_type<_Rep1, _Rep2>::type _Cr;
@@ -481,51 +492,53 @@ operator%(const duration<_Rep1, _Period1>& __lhs, const duration<_Rep2, _Period2
 inline namespace literals {
 inline namespace chrono_literals {
 
-_LIBCPP_HIDE_FROM_ABI constexpr chrono::hours operator""h(unsigned long long __h) {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI constexpr chrono::hours operator""h(unsigned long long __h) {
   return chrono::hours(static_cast<chrono::hours::rep>(__h));
 }
 
-_LIBCPP_HIDE_FROM_ABI constexpr chrono::duration<long double, ratio<3600, 1>> operator""h(long double __h) {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI constexpr chrono::duration<long double, ratio<3600, 1>>
+operator""h(long double __h) {
   return chrono::duration<long double, ratio<3600, 1>>(__h);
 }
 
-_LIBCPP_HIDE_FROM_ABI constexpr chrono::minutes operator""min(unsigned long long __m) {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI constexpr chrono::minutes operator""min(unsigned long long __m) {
   return chrono::minutes(static_cast<chrono::minutes::rep>(__m));
 }
 
-_LIBCPP_HIDE_FROM_ABI constexpr chrono::duration<long double, ratio<60, 1>> operator""min(long double __m) {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI constexpr chrono::duration<long double, ratio<60, 1>>
+operator""min(long double __m) {
   return chrono::duration<long double, ratio<60, 1>>(__m);
 }
 
-_LIBCPP_HIDE_FROM_ABI constexpr chrono::seconds operator""s(unsigned long long __s) {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI constexpr chrono::seconds operator""s(unsigned long long __s) {
   return chrono::seconds(static_cast<chrono::seconds::rep>(__s));
 }
 
-_LIBCPP_HIDE_FROM_ABI constexpr chrono::duration<long double> operator""s(long double __s) {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI constexpr chrono::duration<long double> operator""s(long double __s) {
   return chrono::duration<long double>(__s);
 }
 
-_LIBCPP_HIDE_FROM_ABI constexpr chrono::milliseconds operator""ms(unsigned long long __ms) {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI constexpr chrono::milliseconds operator""ms(unsigned long long __ms) {
   return chrono::milliseconds(static_cast<chrono::milliseconds::rep>(__ms));
 }
 
-_LIBCPP_HIDE_FROM_ABI constexpr chrono::duration<long double, milli> operator""ms(long double __ms) {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI constexpr chrono::duration<long double, milli> operator""ms(long double __ms) {
   return chrono::duration<long double, milli>(__ms);
 }
 
-_LIBCPP_HIDE_FROM_ABI constexpr chrono::microseconds operator""us(unsigned long long __us) {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI constexpr chrono::microseconds operator""us(unsigned long long __us) {
   return chrono::microseconds(static_cast<chrono::microseconds::rep>(__us));
 }
 
-_LIBCPP_HIDE_FROM_ABI constexpr chrono::duration<long double, micro> operator""us(long double __us) {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI constexpr chrono::duration<long double, micro> operator""us(long double __us) {
   return chrono::duration<long double, micro>(__us);
 }
 
-_LIBCPP_HIDE_FROM_ABI constexpr chrono::nanoseconds operator""ns(unsigned long long __ns) {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI constexpr chrono::nanoseconds operator""ns(unsigned long long __ns) {
   return chrono::nanoseconds(static_cast<chrono::nanoseconds::rep>(__ns));
 }
 
-_LIBCPP_HIDE_FROM_ABI constexpr chrono::duration<long double, nano> operator""ns(long double __ns) {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI constexpr chrono::duration<long double, nano> operator""ns(long double __ns) {
   return chrono::duration<long double, nano>(__ns);
 }
 
@@ -538,6 +551,18 @@ using namespace literals::chrono_literals;
 
 #endif // _LIBCPP_STD_VER >= 14
 
+#if _LIBCPP_STD_VER >= 26
+
+template <class _Rep, class _Period>
+  requires __has_enabled_hash<_Rep>::value
+struct hash<chrono::duration<_Rep, _Period>> {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static size_t operator()(const chrono::duration<_Rep, _Period>& __d) {
+    return hash<_Rep>{}(__d.count());
+  }
+};
+
+#endif // _LIBCPP_STD_VER >= 26
+
 _LIBCPP_END_NAMESPACE_STD
 
 _LIBCPP_POP_MACROS
diff --git a/lib/libcxx/include/__chrono/file_clock.h b/lib/libcxx/include/__chrono/file_clock.h
index b4b7e9dc14..968f652f79 100644
--- a/lib/libcxx/include/__chrono/file_clock.h
+++ b/lib/libcxx/include/__chrono/file_clock.h
@@ -60,16 +60,18 @@ struct _FilesystemClock {
 
   _LIBCPP_EXPORTED_FROM_ABI static _LIBCPP_CONSTEXPR_SINCE_CXX14 const bool is_steady = false;
 
-  _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY _LIBCPP_EXPORTED_FROM_ABI static time_point now() noexcept;
+  [[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI static time_point now() noexcept;
 
 #  if _LIBCPP_STD_VER >= 20
   template <class _Duration>
+  [[nodiscard]]
   _LIBCPP_HIDE_FROM_ABI static chrono::sys_time<_Duration> to_sys(const chrono::file_time<_Duration>& __t) {
     return chrono::sys_time<_Duration>(__t.time_since_epoch());
   }
 
   template <class _Duration>
-  _LIBCPP_HIDE_FROM_ABI static chrono::file_time<_Duration> from_sys(const chrono::sys_time<_Duration>& __t) {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static chrono::file_time<_Duration>
+  from_sys(const chrono::sys_time<_Duration>& __t) {
     return chrono::file_time<_Duration>(__t.time_since_epoch());
   }
 #  endif // _LIBCPP_STD_VER >= 20
diff --git a/lib/libcxx/include/__chrono/is_clock.h b/lib/libcxx/include/__chrono/is_clock.h
new file mode 100644
index 0000000000..e63b8485d0
--- /dev/null
+++ b/lib/libcxx/include/__chrono/is_clock.h
@@ -0,0 +1,72 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___CHRONO_IS_CLOCK_H
+#define _LIBCPP___CHRONO_IS_CLOCK_H
+
+#include <__config>
+
+#include <__chrono/duration.h>
+#include <__chrono/time_point.h>
+#include <__concepts/same_as.h>
+#include <__type_traits/integral_constant.h>
+#include <__type_traits/is_arithmetic.h>
+#include <__type_traits/is_class.h>
+#include <__type_traits/is_union.h>
+#include <ratio>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+#if _LIBCPP_STD_VER >= 20
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+namespace chrono {
+
+// Helper to check that _Tp::time_point has the form time_point<_, typename _Tp::duration>.
+template <class _TimePoint, class _ClockType>
+inline constexpr bool __is_valid_clock_time_point_v = false;
+
+template <class _TimePointClock, class _ClockType>
+inline constexpr bool
+    __is_valid_clock_time_point_v<time_point<_TimePointClock, typename _ClockType::duration>, _ClockType> = true;
+
+// Check if a clock satisfies the Cpp17Clock requirements as defined in [time.clock.req]
+template <class _Tp>
+_LIBCPP_NO_SPECIALIZATIONS inline constexpr bool is_clock_v = requires {
+  typename _Tp::rep;
+  requires is_arithmetic_v<typename _Tp::rep> || is_class_v<typename _Tp::rep> || is_union_v<typename _Tp::rep>;
+
+  typename _Tp::period;
+  requires __is_ratio_v<typename _Tp::period>;
+
+  typename _Tp::duration;
+  requires same_as<typename _Tp::duration, duration<typename _Tp::rep, typename _Tp::period>>;
+
+  typename _Tp::time_point;
+  requires __is_valid_clock_time_point_v<typename _Tp::time_point, _Tp>;
+
+  _Tp::is_steady;
+  requires same_as<decltype((_Tp::is_steady)), const bool&>;
+
+  _Tp::now();
+  requires same_as<decltype(_Tp::now()), typename _Tp::time_point>;
+};
+
+template <class _Tp>
+struct _LIBCPP_NO_SPECIALIZATIONS is_clock : bool_constant<is_clock_v<_Tp>> {};
+
+} // namespace chrono
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP_STD_VER
+#endif // _LIBCPP___CHRONO_IS_CLOCK_H
diff --git a/lib/libcxx/include/__chrono/leap_second.h b/lib/libcxx/include/__chrono/leap_second.h
index 1857bef803..9e9df6b595 100644
--- a/lib/libcxx/include/__chrono/leap_second.h
+++ b/lib/libcxx/include/__chrono/leap_second.h
@@ -22,6 +22,8 @@
 #  include <__compare/ordering.h>
 #  include <__compare/three_way_comparable.h>
 #  include <__config>
+#  include <__cstddef/size_t.h>
+#  include <__functional/hash.h>
 #  include <__utility/private_constructor_tag.h>
 
 #  if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -122,6 +124,17 @@ private:
 
 } // namespace chrono
 
+#    if _LIBCPP_STD_VER >= 26
+
+template <>
+struct hash<chrono::leap_second> {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static size_t operator()(const chrono::leap_second& __lp) noexcept {
+    return std::__hash_combine(hash<chrono::sys_seconds>{}(__lp.date()), hash<chrono::seconds>{}(__lp.value()));
+  }
+};
+
+#    endif // _LIBCPP_STD_VER >= 26
+
 #  endif // _LIBCPP_STD_VER >= 20
 
 _LIBCPP_END_NAMESPACE_STD
diff --git a/lib/libcxx/include/__chrono/month.h b/lib/libcxx/include/__chrono/month.h
index 77c67d0954..669ac66360 100644
--- a/lib/libcxx/include/__chrono/month.h
+++ b/lib/libcxx/include/__chrono/month.h
@@ -13,6 +13,8 @@
 #include <__chrono/duration.h>
 #include <__compare/ordering.h>
 #include <__config>
+#include <__cstddef/size_t.h>
+#include <__functional/hash.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
@@ -108,6 +110,17 @@ inline constexpr month December{12};
 
 } // namespace chrono
 
+#  if _LIBCPP_STD_VER >= 26
+
+template <>
+struct hash<chrono::month> {
+  _LIBCPP_HIDE_FROM_ABI static size_t operator()(const chrono::month& __m) noexcept {
+    return static_cast<unsigned>(__m);
+  }
+};
+
+#  endif // _LIBCPP_STD_VER >= 26
+
 _LIBCPP_END_NAMESPACE_STD
 
 #endif // _LIBCPP_STD_VER >= 20
diff --git a/lib/libcxx/include/__chrono/month_weekday.h b/lib/libcxx/include/__chrono/month_weekday.h
index 7919879655..edb7d38606 100644
--- a/lib/libcxx/include/__chrono/month_weekday.h
+++ b/lib/libcxx/include/__chrono/month_weekday.h
@@ -13,6 +13,8 @@
 #include <__chrono/month.h>
 #include <__chrono/weekday.h>
 #include <__config>
+#include <__cstddef/size_t.h>
+#include <__functional/hash.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
@@ -98,6 +100,26 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr month_weekday_last operator/(const weekda
 }
 } // namespace chrono
 
+#  if _LIBCPP_STD_VER >= 26
+
+template <>
+struct hash<chrono::month_weekday> {
+  _LIBCPP_HIDE_FROM_ABI static size_t operator()(const chrono::month_weekday& __mw) noexcept {
+    return std::__hash_combine(
+        hash<chrono::month>{}(__mw.month()), hash<chrono::weekday_indexed>{}(__mw.weekday_indexed()));
+  }
+};
+
+template <>
+struct hash<chrono::month_weekday_last> {
+  _LIBCPP_HIDE_FROM_ABI static size_t operator()(const chrono::month_weekday_last& __mwl) noexcept {
+    return std::__hash_combine(
+        hash<chrono::month>{}(__mwl.month()), hash<chrono::weekday_last>{}(__mwl.weekday_last()));
+  }
+};
+
+#  endif // _LIBCPP_STD_VER >= 26
+
 _LIBCPP_END_NAMESPACE_STD
 
 #endif // _LIBCPP_STD_VER >= 20
diff --git a/lib/libcxx/include/__chrono/monthday.h b/lib/libcxx/include/__chrono/monthday.h
index 57712cf0b6..2a7262be09 100644
--- a/lib/libcxx/include/__chrono/monthday.h
+++ b/lib/libcxx/include/__chrono/monthday.h
@@ -15,6 +15,8 @@
 #include <__chrono/month.h>
 #include <__compare/ordering.h>
 #include <__config>
+#include <__cstddef/size_t.h>
+#include <__functional/hash.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
@@ -126,6 +128,24 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr month_day_last operator/(last_spec, int _
 
 } // namespace chrono
 
+#  if _LIBCPP_STD_VER >= 26
+
+template <>
+struct hash<chrono::month_day> {
+  _LIBCPP_HIDE_FROM_ABI static size_t operator()(const chrono::month_day& __md) noexcept {
+    return std::__hash_combine(hash<chrono::month>{}(__md.month()), hash<chrono::day>{}(__md.day()));
+  }
+};
+
+template <>
+struct hash<chrono::month_day_last> {
+  _LIBCPP_HIDE_FROM_ABI static size_t operator()(const chrono::month_day_last& __mdl) noexcept {
+    return hash<chrono::month>{}(__mdl.month());
+  }
+};
+
+#  endif // _LIBCPP_STD_VER >= 26
+
 _LIBCPP_END_NAMESPACE_STD
 
 #endif // _LIBCPP_STD_VER >= 20
diff --git a/lib/libcxx/include/__chrono/steady_clock.h b/lib/libcxx/include/__chrono/steady_clock.h
index 1b247b2c28..8e68c9a3c2 100644
--- a/lib/libcxx/include/__chrono/steady_clock.h
+++ b/lib/libcxx/include/__chrono/steady_clock.h
@@ -31,7 +31,7 @@ public:
   typedef chrono::time_point<steady_clock, duration> time_point;
   static _LIBCPP_CONSTEXPR_SINCE_CXX14 const bool is_steady = true;
 
-  static time_point now() _NOEXCEPT;
+  [[__nodiscard__]] static time_point now() _NOEXCEPT;
 };
 #endif
 
diff --git a/lib/libcxx/include/__chrono/system_clock.h b/lib/libcxx/include/__chrono/system_clock.h
index 5a9eb65bda..e3ef75ae50 100644
--- a/lib/libcxx/include/__chrono/system_clock.h
+++ b/lib/libcxx/include/__chrono/system_clock.h
@@ -31,9 +31,9 @@ public:
   typedef chrono::time_point<system_clock> time_point;
   static _LIBCPP_CONSTEXPR_SINCE_CXX14 const bool is_steady = false;
 
-  static time_point now() _NOEXCEPT;
-  static time_t to_time_t(const time_point& __t) _NOEXCEPT;
-  static time_point from_time_t(time_t __t) _NOEXCEPT;
+  [[__nodiscard__]] static time_point now() _NOEXCEPT;
+  [[__nodiscard__]] static time_t to_time_t(const time_point& __t) _NOEXCEPT;
+  [[__nodiscard__]] static time_point from_time_t(time_t __t) _NOEXCEPT;
 };
 
 #if _LIBCPP_STD_VER >= 20
diff --git a/lib/libcxx/include/__chrono/time_point.h b/lib/libcxx/include/__chrono/time_point.h
index fc4408d23d..d393e34cdb 100644
--- a/lib/libcxx/include/__chrono/time_point.h
+++ b/lib/libcxx/include/__chrono/time_point.h
@@ -14,6 +14,8 @@
 #include <__compare/ordering.h>
 #include <__compare/three_way_comparable.h>
 #include <__config>
+#include <__cstddef/size_t.h>
+#include <__functional/hash.h>
 #include <__type_traits/common_type.h>
 #include <__type_traits/enable_if.h>
 #include <__type_traits/is_convertible.h>
@@ -54,7 +56,9 @@ public:
 
   // observer
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 duration time_since_epoch() const { return __d_; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 duration time_since_epoch() const {
+    return __d_;
+  }
 
   // arithmetic
 
@@ -82,8 +86,12 @@ public:
 
   // special values
 
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR time_point min() _NOEXCEPT { return time_point(duration::min()); }
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR time_point max() _NOEXCEPT { return time_point(duration::max()); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR time_point min() _NOEXCEPT {
+    return time_point(duration::min());
+  }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR time_point max() _NOEXCEPT {
+    return time_point(duration::max());
+  }
 };
 
 } // namespace chrono
@@ -95,30 +103,33 @@ struct common_type<chrono::time_point<_Clock, _Duration1>, chrono::time_point<_C
 
 namespace chrono {
 
-template <class _ToDuration, class _Clock, class _Duration>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 time_point<_Clock, _ToDuration>
+template <class _ToDuration, class _Clock, class _Duration, __enable_if_t<__is_duration_v<_ToDuration>, int> = 0>
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 time_point<_Clock, _ToDuration>
 time_point_cast(const time_point<_Clock, _Duration>& __t) {
   return time_point<_Clock, _ToDuration>(chrono::duration_cast<_ToDuration>(__t.time_since_epoch()));
 }
 
 #if _LIBCPP_STD_VER >= 17
 template <class _ToDuration, class _Clock, class _Duration, enable_if_t<__is_duration_v<_ToDuration>, int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI constexpr time_point<_Clock, _ToDuration> floor(const time_point<_Clock, _Duration>& __t) {
+[[nodiscard]] inline
+    _LIBCPP_HIDE_FROM_ABI constexpr time_point<_Clock, _ToDuration> floor(const time_point<_Clock, _Duration>& __t) {
   return time_point<_Clock, _ToDuration>{chrono::floor<_ToDuration>(__t.time_since_epoch())};
 }
 
 template <class _ToDuration, class _Clock, class _Duration, enable_if_t<__is_duration_v<_ToDuration>, int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI constexpr time_point<_Clock, _ToDuration> ceil(const time_point<_Clock, _Duration>& __t) {
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI constexpr time_point<_Clock, _ToDuration>
+ceil(const time_point<_Clock, _Duration>& __t) {
   return time_point<_Clock, _ToDuration>{chrono::ceil<_ToDuration>(__t.time_since_epoch())};
 }
 
 template <class _ToDuration, class _Clock, class _Duration, enable_if_t<__is_duration_v<_ToDuration>, int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI constexpr time_point<_Clock, _ToDuration> round(const time_point<_Clock, _Duration>& __t) {
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI constexpr time_point<_Clock, _ToDuration>
+round(const time_point<_Clock, _Duration>& __t) {
   return time_point<_Clock, _ToDuration>{chrono::round<_ToDuration>(__t.time_since_epoch())};
 }
 
 template <class _Rep, class _Period, enable_if_t<numeric_limits<_Rep>::is_signed, int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI constexpr duration<_Rep, _Period> abs(duration<_Rep, _Period> __d) {
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI constexpr duration<_Rep, _Period> abs(duration<_Rep, _Period> __d) {
   return __d >= __d.zero() ? +__d : -__d;
 }
 #endif // _LIBCPP_STD_VER >= 17
@@ -188,7 +199,7 @@ operator<=>(const time_point<_Clock, _Duration1>& __lhs, const time_point<_Clock
 // time_point operator+(time_point x, duration y);
 
 template <class _Clock, class _Duration1, class _Rep2, class _Period2>
-inline _LIBCPP_HIDE_FROM_ABI
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI
 _LIBCPP_CONSTEXPR_SINCE_CXX14 time_point<_Clock, typename common_type<_Duration1, duration<_Rep2, _Period2> >::type>
 operator+(const time_point<_Clock, _Duration1>& __lhs, const duration<_Rep2, _Period2>& __rhs) {
   typedef time_point<_Clock, typename common_type<_Duration1, duration<_Rep2, _Period2> >::type> _Tr;
@@ -198,7 +209,7 @@ operator+(const time_point<_Clock, _Duration1>& __lhs, const duration<_Rep2, _Pe
 // time_point operator+(duration x, time_point y);
 
 template <class _Rep1, class _Period1, class _Clock, class _Duration2>
-inline _LIBCPP_HIDE_FROM_ABI
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI
 _LIBCPP_CONSTEXPR_SINCE_CXX14 time_point<_Clock, typename common_type<duration<_Rep1, _Period1>, _Duration2>::type>
 operator+(const duration<_Rep1, _Period1>& __lhs, const time_point<_Clock, _Duration2>& __rhs) {
   return __rhs + __lhs;
@@ -207,7 +218,7 @@ operator+(const duration<_Rep1, _Period1>& __lhs, const time_point<_Clock, _Dura
 // time_point operator-(time_point x, duration y);
 
 template <class _Clock, class _Duration1, class _Rep2, class _Period2>
-inline _LIBCPP_HIDE_FROM_ABI
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI
 _LIBCPP_CONSTEXPR_SINCE_CXX14 time_point<_Clock, typename common_type<_Duration1, duration<_Rep2, _Period2> >::type>
 operator-(const time_point<_Clock, _Duration1>& __lhs, const duration<_Rep2, _Period2>& __rhs) {
   typedef time_point<_Clock, typename common_type<_Duration1, duration<_Rep2, _Period2> >::type> _Ret;
@@ -217,13 +228,26 @@ operator-(const time_point<_Clock, _Duration1>& __lhs, const duration<_Rep2, _Pe
 // duration operator-(time_point x, time_point y);
 
 template <class _Clock, class _Duration1, class _Duration2>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 typename common_type<_Duration1, _Duration2>::type
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14
+typename common_type<_Duration1, _Duration2>::type
 operator-(const time_point<_Clock, _Duration1>& __lhs, const time_point<_Clock, _Duration2>& __rhs) {
   return __lhs.time_since_epoch() - __rhs.time_since_epoch();
 }
 
 } // namespace chrono
 
+#if _LIBCPP_STD_VER >= 26
+
+template <class _Clock, class _Duration>
+  requires __has_enabled_hash<_Duration>::value
+struct hash<chrono::time_point<_Clock, _Duration>> {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static size_t operator()(const chrono::time_point<_Clock, _Duration>& __tp) {
+    return hash<_Duration>{}(__tp.time_since_epoch());
+  }
+};
+
+#endif // _LIBCPP_STD_VER >= 26
+
 _LIBCPP_END_NAMESPACE_STD
 
 _LIBCPP_POP_MACROS
diff --git a/lib/libcxx/include/__chrono/weekday.h b/lib/libcxx/include/__chrono/weekday.h
index 728cbb8446..143803495d 100644
--- a/lib/libcxx/include/__chrono/weekday.h
+++ b/lib/libcxx/include/__chrono/weekday.h
@@ -15,6 +15,8 @@
 #include <__chrono/system_clock.h>
 #include <__chrono/time_point.h>
 #include <__config>
+#include <__cstddef/size_t.h>
+#include <__functional/hash.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
@@ -160,6 +162,29 @@ inline constexpr weekday Saturday{6};
 
 } // namespace chrono
 
+#  if _LIBCPP_STD_VER >= 26
+
+template <>
+struct hash<chrono::weekday> {
+  _LIBCPP_HIDE_FROM_ABI static size_t operator()(const chrono::weekday& __w) noexcept { return __w.c_encoding(); }
+};
+
+template <>
+struct hash<chrono::weekday_indexed> {
+  _LIBCPP_HIDE_FROM_ABI static size_t operator()(const chrono::weekday_indexed& __wi) noexcept {
+    return std::__hash_combine(hash<chrono::weekday>{}(__wi.weekday()), __wi.index());
+  }
+};
+
+template <>
+struct hash<chrono::weekday_last> {
+  _LIBCPP_HIDE_FROM_ABI static size_t operator()(const chrono::weekday_last& __wl) noexcept {
+    return hash<chrono::weekday>{}(__wl.weekday());
+  }
+};
+
+#  endif // _LIBCPP_STD_VER >= 26
+
 _LIBCPP_END_NAMESPACE_STD
 
 #endif // _LIBCPP_STD_VER >= 20
diff --git a/lib/libcxx/include/__chrono/year.h b/lib/libcxx/include/__chrono/year.h
index 2ae5180cb8..aaef38acd9 100644
--- a/lib/libcxx/include/__chrono/year.h
+++ b/lib/libcxx/include/__chrono/year.h
@@ -13,6 +13,8 @@
 #include <__chrono/duration.h>
 #include <__compare/ordering.h>
 #include <__config>
+#include <__cstddef/size_t.h>
+#include <__functional/hash.h>
 #include <limits>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -109,6 +111,15 @@ _LIBCPP_HIDE_FROM_ABI constexpr bool year::ok() const noexcept {
 
 } // namespace chrono
 
+#  if _LIBCPP_STD_VER >= 26
+
+template <>
+struct hash<chrono::year> {
+  _LIBCPP_HIDE_FROM_ABI static size_t operator()(const chrono::year& __y) noexcept { return static_cast<int>(__y); }
+};
+
+#  endif // _LIBCPP_STD_VER >= 26
+
 _LIBCPP_END_NAMESPACE_STD
 
 #endif // _LIBCPP_STD_VER >= 20
diff --git a/lib/libcxx/include/__chrono/year_month.h b/lib/libcxx/include/__chrono/year_month.h
index cf9234bdb4..e36091c021 100644
--- a/lib/libcxx/include/__chrono/year_month.h
+++ b/lib/libcxx/include/__chrono/year_month.h
@@ -15,6 +15,8 @@
 #include <__chrono/year.h>
 #include <__compare/ordering.h>
 #include <__config>
+#include <__cstddef/size_t.h>
+#include <__functional/hash.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
@@ -116,6 +118,17 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr year_month& year_month::operator-=(const
 
 } // namespace chrono
 
+#  if _LIBCPP_STD_VER >= 26
+
+template <>
+struct hash<chrono::year_month> {
+  _LIBCPP_HIDE_FROM_ABI static size_t operator()(const chrono::year_month& __ym) noexcept {
+    return std::__hash_combine(hash<chrono::year>{}(__ym.year()), hash<chrono::month>{}(__ym.month()));
+  }
+};
+
+#  endif // _LIBCPP_STD_VER >= 26
+
 _LIBCPP_END_NAMESPACE_STD
 
 #endif // _LIBCPP_STD_VER >= 20
diff --git a/lib/libcxx/include/__chrono/year_month_day.h b/lib/libcxx/include/__chrono/year_month_day.h
index a0510a14f4..0a2aaedd60 100644
--- a/lib/libcxx/include/__chrono/year_month_day.h
+++ b/lib/libcxx/include/__chrono/year_month_day.h
@@ -21,6 +21,8 @@
 #include <__chrono/year_month.h>
 #include <__compare/ordering.h>
 #include <__config>
+#include <__cstddef/size_t.h>
+#include <__functional/hash.h>
 #include <limits>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -330,6 +332,27 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr bool year_month_day::ok() const noexcept
 
 } // namespace chrono
 
+#  if _LIBCPP_STD_VER >= 26
+
+template <>
+struct hash<chrono::year_month_day> {
+  _LIBCPP_HIDE_FROM_ABI static size_t operator()(const chrono::year_month_day& __ymd) noexcept {
+    return std::__hash_combine(
+        hash<chrono::year>{}(__ymd.year()),
+        std::__hash_combine(hash<chrono::month>{}(__ymd.month()), hash<chrono::day>{}(__ymd.day())));
+  }
+};
+
+template <>
+struct hash<chrono::year_month_day_last> {
+  _LIBCPP_HIDE_FROM_ABI static size_t operator()(const chrono::year_month_day_last& __ymdl) noexcept {
+    return std::__hash_combine(
+        hash<chrono::year>{}(__ymdl.year()), hash<chrono::month_day_last>{}(__ymdl.month_day_last()));
+  }
+};
+
+#  endif // _LIBCPP_STD_VER >= 26
+
 _LIBCPP_END_NAMESPACE_STD
 
 #endif // _LIBCPP_STD_VER >= 20
diff --git a/lib/libcxx/include/__chrono/year_month_weekday.h b/lib/libcxx/include/__chrono/year_month_weekday.h
index 0c3dd494c8..6ed1e21fe9 100644
--- a/lib/libcxx/include/__chrono/year_month_weekday.h
+++ b/lib/libcxx/include/__chrono/year_month_weekday.h
@@ -22,6 +22,8 @@
 #include <__chrono/year_month.h>
 #include <__chrono/year_month_day.h>
 #include <__config>
+#include <__cstddef/size_t.h>
+#include <__functional/hash.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
@@ -280,6 +282,30 @@ year_month_weekday_last::operator-=(const years& __dy) noexcept {
 
 } // namespace chrono
 
+#  if _LIBCPP_STD_VER >= 26
+
+template <>
+struct hash<chrono::year_month_weekday> {
+  _LIBCPP_HIDE_FROM_ABI static size_t operator()(const chrono::year_month_weekday& __ymw) noexcept {
+    return std::__hash_combine(
+        hash<chrono::year>{}(__ymw.year()),
+        std::__hash_combine(
+            hash<chrono::month>{}(__ymw.month()), hash<chrono::weekday_indexed>{}(__ymw.weekday_indexed())));
+  }
+};
+
+template <>
+struct hash<chrono::year_month_weekday_last> {
+  _LIBCPP_HIDE_FROM_ABI static size_t operator()(const chrono::year_month_weekday_last& __ymwl) noexcept {
+    return std::__hash_combine(
+        hash<chrono::year>{}(__ymwl.year()),
+        std::__hash_combine(
+            hash<chrono::month>{}(__ymwl.month()), hash<chrono::weekday_last>{}(__ymwl.weekday_last())));
+  }
+};
+
+#  endif // _LIBCPP_STD_VER >= 26
+
 _LIBCPP_END_NAMESPACE_STD
 
 #endif // _LIBCPP_STD_VER >= 20
diff --git a/lib/libcxx/include/__chrono/zoned_time.h b/lib/libcxx/include/__chrono/zoned_time.h
index 8db687a422..e63528e57d 100644
--- a/lib/libcxx/include/__chrono/zoned_time.h
+++ b/lib/libcxx/include/__chrono/zoned_time.h
@@ -24,6 +24,8 @@
 #  include <__chrono/tzdb_list.h>
 #  include <__concepts/constructible.h>
 #  include <__config>
+#  include <__cstddef/size_t.h>
+#  include <__functional/hash.h>
 #  include <__type_traits/common_type.h>
 #  include <__type_traits/conditional.h>
 #  include <__type_traits/remove_cvref.h>
@@ -216,6 +218,20 @@ operator==(const zoned_time<_Duration1, _TimeZonePtr>& __lhs, const zoned_time<_
 
 } // namespace chrono
 
+#    if _LIBCPP_STD_VER >= 26
+
+template <class _Duration, class _TimeZonePtr>
+  requires __has_enabled_hash<_Duration>::value && __has_enabled_hash<_TimeZonePtr>::value
+struct hash<chrono::zoned_time<_Duration, _TimeZonePtr>> {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static size_t
+  operator()(const chrono::zoned_time<_Duration, _TimeZonePtr>& __zt) {
+    return std::__hash_combine(
+        hash<chrono::sys_time<_Duration>>{}(__zt.get_sys_time()), hash<_TimeZonePtr>{}(__zt.get_time_zone()));
+  }
+};
+
+#    endif // _LIBCPP_STD_VER >= 26
+
 #  endif // _LIBCPP_STD_VER >= 20 && _LIBCPP_HAS_TIME_ZONE_DATABASE && _LIBCPP_HAS_FILESYSTEM &&
          // _LIBCPP_HAS_LOCALIZATION
 
diff --git a/lib/libcxx/include/__compare/is_eq.h b/lib/libcxx/include/__compare/is_eq.h
index 9a82df1ebe..ee4d11bc7c 100644
--- a/lib/libcxx/include/__compare/is_eq.h
+++ b/lib/libcxx/include/__compare/is_eq.h
@@ -20,12 +20,12 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 #if _LIBCPP_STD_VER >= 20
 
-_LIBCPP_HIDE_FROM_ABI inline constexpr bool is_eq(partial_ordering __c) noexcept { return __c == 0; }
-_LIBCPP_HIDE_FROM_ABI inline constexpr bool is_neq(partial_ordering __c) noexcept { return __c != 0; }
-_LIBCPP_HIDE_FROM_ABI inline constexpr bool is_lt(partial_ordering __c) noexcept { return __c < 0; }
-_LIBCPP_HIDE_FROM_ABI inline constexpr bool is_lteq(partial_ordering __c) noexcept { return __c <= 0; }
-_LIBCPP_HIDE_FROM_ABI inline constexpr bool is_gt(partial_ordering __c) noexcept { return __c > 0; }
-_LIBCPP_HIDE_FROM_ABI inline constexpr bool is_gteq(partial_ordering __c) noexcept { return __c >= 0; }
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr bool is_eq(partial_ordering __c) noexcept { return __c == 0; }
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr bool is_neq(partial_ordering __c) noexcept { return __c != 0; }
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr bool is_lt(partial_ordering __c) noexcept { return __c < 0; }
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr bool is_lteq(partial_ordering __c) noexcept { return __c <= 0; }
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr bool is_gt(partial_ordering __c) noexcept { return __c > 0; }
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr bool is_gteq(partial_ordering __c) noexcept { return __c >= 0; }
 
 #endif // _LIBCPP_STD_VER >= 20
 
diff --git a/lib/libcxx/include/__compare/strong_order.h b/lib/libcxx/include/__compare/strong_order.h
index 8c363b5638..ba6de44643 100644
--- a/lib/libcxx/include/__compare/strong_order.h
+++ b/lib/libcxx/include/__compare/strong_order.h
@@ -13,7 +13,6 @@
 #include <__compare/compare_three_way.h>
 #include <__compare/ordering.h>
 #include <__config>
-#include <__math/exponential_functions.h>
 #include <__math/traits.h>
 #include <__type_traits/conditional.h>
 #include <__type_traits/decay.h>
@@ -53,38 +52,21 @@ struct __fn {
   template <class _Tp, class _Up, class _Dp = decay_t<_Tp>>
     requires is_same_v<_Dp, decay_t<_Up>> && is_floating_point_v<_Dp>
   _LIBCPP_HIDE_FROM_ABI static constexpr strong_ordering __go(_Tp&& __t, _Up&& __u, __priority_tag<1>) noexcept {
-    if constexpr (numeric_limits<_Dp>::is_iec559 && sizeof(_Dp) == sizeof(int32_t)) {
-      int32_t __rx = std::bit_cast<int32_t>(__t);
-      int32_t __ry = std::bit_cast<int32_t>(__u);
-      __rx         = (__rx < 0) ? (numeric_limits<int32_t>::min() - __rx - 1) : __rx;
-      __ry         = (__ry < 0) ? (numeric_limits<int32_t>::min() - __ry - 1) : __ry;
-      return (__rx <=> __ry);
-    } else if constexpr (numeric_limits<_Dp>::is_iec559 && sizeof(_Dp) == sizeof(int64_t)) {
-      int64_t __rx = std::bit_cast<int64_t>(__t);
-      int64_t __ry = std::bit_cast<int64_t>(__u);
-      __rx         = (__rx < 0) ? (numeric_limits<int64_t>::min() - __rx - 1) : __rx;
-      __ry         = (__ry < 0) ? (numeric_limits<int64_t>::min() - __ry - 1) : __ry;
+    if constexpr (numeric_limits<_Dp>::is_iec559 &&
+                  (sizeof(_Dp) == sizeof(int32_t) || sizeof(_Dp) == sizeof(int64_t))) {
+      using _IntT = conditional_t<sizeof(_Dp) == sizeof(int32_t), int32_t, int64_t>;
+      _IntT __rx  = std::bit_cast<_IntT>(__t);
+      _IntT __ry  = std::bit_cast<_IntT>(__u);
+      __rx        = (__rx < 0) ? (numeric_limits<_IntT>::min() - __rx - 1) : __rx;
+      __ry        = (__ry < 0) ? (numeric_limits<_IntT>::min() - __ry - 1) : __ry;
       return (__rx <=> __ry);
     } else if (__t < __u) {
       return strong_ordering::less;
     } else if (__t > __u) {
       return strong_ordering::greater;
     } else if (__t == __u) {
-      if constexpr (numeric_limits<_Dp>::radix == 2) {
-        return __math::signbit(__u) <=> __math::signbit(__t);
-      } else {
-        // This is bullet 3 of the IEEE754 algorithm, relevant
-        // only for decimal floating-point;
-        // see https://stackoverflow.com/questions/69068075/
-        if (__t == 0 || __math::isinf(__t)) {
-          return __math::signbit(__u) <=> __math::signbit(__t);
-        } else {
-          int __texp, __uexp;
-          (void)__math::frexp(__t, &__texp);
-          (void)__math::frexp(__u, &__uexp);
-          return (__t < 0) ? (__texp <=> __uexp) : (__uexp <=> __texp);
-        }
-      }
+      static_assert(numeric_limits<_Dp>::radix == 2, "floating point type with a radix other than 2?");
+      return __math::signbit(__u) <=> __math::signbit(__t);
     } else {
       // They're unordered, so one of them must be a NAN.
       // The order is -QNAN, -SNAN, numbers, +SNAN, +QNAN.
@@ -93,9 +75,9 @@ struct __fn {
       bool __t_is_negative = __math::signbit(__t);
       bool __u_is_negative = __math::signbit(__u);
       using _IntType =
-          conditional_t< sizeof(__t) == sizeof(int32_t),
-                         int32_t,
-                         conditional_t< sizeof(__t) == sizeof(int64_t), int64_t, void> >;
+          conditional_t<sizeof(__t) == sizeof(int32_t),
+                        int32_t,
+                        conditional_t<sizeof(__t) == sizeof(int64_t), int64_t, void>>;
       if constexpr (is_same_v<_IntType, void>) {
         static_assert(sizeof(_Dp) == 0, "std::strong_order is unimplemented for this floating-point type");
       } else if (__t_is_nan && __u_is_nan) {
diff --git a/lib/libcxx/include/__compare/three_way_comparable.h b/lib/libcxx/include/__compare/three_way_comparable.h
index 7a44ea9158..ad6d05a681 100644
--- a/lib/libcxx/include/__compare/three_way_comparable.h
+++ b/lib/libcxx/include/__compare/three_way_comparable.h
@@ -12,6 +12,7 @@
 #include <__compare/common_comparison_category.h>
 #include <__compare/ordering.h>
 #include <__concepts/common_reference_with.h>
+#include <__concepts/comparison_common_type.h>
 #include <__concepts/equality_comparable.h>
 #include <__concepts/same_as.h>
 #include <__concepts/totally_ordered.h>
@@ -39,8 +40,7 @@ concept three_way_comparable =
 
 template <class _Tp, class _Up, class _Cat = partial_ordering>
 concept three_way_comparable_with =
-    three_way_comparable<_Tp, _Cat> && three_way_comparable<_Up, _Cat> &&
-    common_reference_with<__make_const_lvalue_ref<_Tp>, __make_const_lvalue_ref<_Up>> &&
+    three_way_comparable<_Tp, _Cat> && three_way_comparable<_Up, _Cat> && __comparison_common_type_with<_Tp, _Up> &&
     three_way_comparable<common_reference_t<__make_const_lvalue_ref<_Tp>, __make_const_lvalue_ref<_Up>>, _Cat> &&
     __weakly_equality_comparable_with<_Tp, _Up> && __partially_ordered_with<_Tp, _Up> &&
     requires(__make_const_lvalue_ref<_Tp> __t, __make_const_lvalue_ref<_Up> __u) {
diff --git a/lib/libcxx/include/__concepts/comparison_common_type.h b/lib/libcxx/include/__concepts/comparison_common_type.h
new file mode 100644
index 0000000000..3f0d770511
--- /dev/null
+++ b/lib/libcxx/include/__concepts/comparison_common_type.h
@@ -0,0 +1,40 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___CONCEPTS_COMPARISON_COMMON_TYPE_H
+#define _LIBCPP___CONCEPTS_COMPARISON_COMMON_TYPE_H
+
+#include <__concepts/convertible_to.h>
+#include <__concepts/same_as.h>
+#include <__config>
+#include <__type_traits/common_reference.h>
+#include <__type_traits/remove_cvref.h>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER >= 20
+
+template <class _Tp, class _Up, class _CommonRef = common_reference_t<const _Tp&, const _Up&>>
+concept __comparison_common_type_with_impl =
+    same_as<common_reference_t<const _Tp&, const _Up&>, common_reference_t<const _Up&, const _Tp&>> && requires {
+      requires convertible_to<const _Tp&, const _CommonRef&> || convertible_to<_Tp, const _CommonRef&>;
+      requires convertible_to<const _Up&, const _CommonRef&> || convertible_to<_Up, const _CommonRef&>;
+    };
+
+template <class _Tp, class _Up>
+concept __comparison_common_type_with = __comparison_common_type_with_impl<remove_cvref_t<_Tp>, remove_cvref_t<_Up>>;
+
+#endif // _LIBCPP_STD_VER >= 20
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___CONCEPTS_COMPARISON_COMMON_TYPE_H
diff --git a/lib/libcxx/include/__concepts/equality_comparable.h b/lib/libcxx/include/__concepts/equality_comparable.h
index 278fc76409..56fc6f8b68 100644
--- a/lib/libcxx/include/__concepts/equality_comparable.h
+++ b/lib/libcxx/include/__concepts/equality_comparable.h
@@ -11,6 +11,7 @@
 
 #include <__concepts/boolean_testable.h>
 #include <__concepts/common_reference_with.h>
+#include <__concepts/comparison_common_type.h>
 #include <__config>
 #include <__type_traits/common_reference.h>
 #include <__type_traits/make_const_lvalue_ref.h>
@@ -41,7 +42,7 @@ concept equality_comparable = __weakly_equality_comparable_with<_Tp, _Tp>;
 template <class _Tp, class _Up>
 concept equality_comparable_with =
     equality_comparable<_Tp> && equality_comparable<_Up> &&
-    common_reference_with<__make_const_lvalue_ref<_Tp>, __make_const_lvalue_ref<_Up>> &&
+    __comparison_common_type_with<_Tp, _Up> &&
     equality_comparable<
         common_reference_t<
             __make_const_lvalue_ref<_Tp>,
diff --git a/lib/libcxx/include/__condition_variable/condition_variable.h b/lib/libcxx/include/__condition_variable/condition_variable.h
index 1e8edd5dcb..b7151930e9 100644
--- a/lib/libcxx/include/__condition_variable/condition_variable.h
+++ b/lib/libcxx/include/__condition_variable/condition_variable.h
@@ -170,7 +170,7 @@ public:
   wait_for(unique_lock<mutex>& __lk, const chrono::duration<_Rep, _Period>& __d, _Predicate __pred);
 
   typedef __libcpp_condvar_t* native_handle_type;
-  _LIBCPP_HIDE_FROM_ABI native_handle_type native_handle() { return &__cv_; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI native_handle_type native_handle() { return &__cv_; }
 
 private:
   void
diff --git a/lib/libcxx/include/__config b/lib/libcxx/include/__config
index 03538ff78e..de32af8f76 100644
--- a/lib/libcxx/include/__config
+++ b/lib/libcxx/include/__config
@@ -14,6 +14,8 @@
 #include <__configuration/abi.h>
 #include <__configuration/availability.h>
 #include <__configuration/compiler.h>
+#include <__configuration/experimental.h>
+#include <__configuration/hardening.h>
 #include <__configuration/language.h>
 #include <__configuration/platform.h>
 
@@ -28,7 +30,7 @@
 // _LIBCPP_VERSION represents the version of libc++, which matches the version of LLVM.
 // Given a LLVM release LLVM XX.YY.ZZ (e.g. LLVM 17.0.1 == 17.00.01), _LIBCPP_VERSION is
 // defined to XXYYZZ.
-#  define _LIBCPP_VERSION 210100
+#  define _LIBCPP_VERSION 220104
 
 #  define _LIBCPP_CONCAT_IMPL(_X, _Y) _X##_Y
 #  define _LIBCPP_CONCAT(_X, _Y) _LIBCPP_CONCAT_IMPL(_X, _Y)
@@ -38,195 +40,6 @@
 #    define _LIBCPP_FREESTANDING
 #  endif
 
-// NOLINTNEXTLINE(libcpp-cpp-version-check)
-#  if __cplusplus < 201103L
-#    define _LIBCPP_CXX03_LANG
-#  endif
-
-#  if __has_feature(experimental_library)
-#    ifndef _LIBCPP_ENABLE_EXPERIMENTAL
-#      define _LIBCPP_ENABLE_EXPERIMENTAL
-#    endif
-#  endif
-
-// Incomplete features get their own specific disabling flags. This makes it
-// easier to grep for target specific flags once the feature is complete.
-#  if defined(_LIBCPP_ENABLE_EXPERIMENTAL) || defined(_LIBCPP_BUILDING_LIBRARY)
-#    define _LIBCPP_HAS_EXPERIMENTAL_LIBRARY 1
-#  else
-#    define _LIBCPP_HAS_EXPERIMENTAL_LIBRARY 0
-#  endif
-
-#  define _LIBCPP_HAS_EXPERIMENTAL_PSTL _LIBCPP_HAS_EXPERIMENTAL_LIBRARY
-#  define _LIBCPP_HAS_EXPERIMENTAL_TZDB _LIBCPP_HAS_EXPERIMENTAL_LIBRARY
-#  define _LIBCPP_HAS_EXPERIMENTAL_SYNCSTREAM _LIBCPP_HAS_EXPERIMENTAL_LIBRARY
-#  define _LIBCPP_HAS_EXPERIMENTAL_HARDENING_OBSERVE_SEMANTIC _LIBCPP_HAS_EXPERIMENTAL_LIBRARY
-
-// HARDENING {
-
-// TODO(LLVM 23): Remove this. We're making these an error to catch folks who might not have migrated.
-//       Since hardening went through several changes (many of which impacted user-facing macros),
-//       we're keeping these checks around for a bit longer than usual. Failure to properly configure
-//       hardening results in checks being dropped silently, which is a pretty big deal.
-#  if defined(_LIBCPP_ENABLE_ASSERTIONS)
-#    error "_LIBCPP_ENABLE_ASSERTIONS has been removed, please use _LIBCPP_HARDENING_MODE=<mode> instead (see docs)"
-#  endif
-#  if defined(_LIBCPP_ENABLE_HARDENED_MODE)
-#    error "_LIBCPP_ENABLE_HARDENED_MODE has been removed, please use _LIBCPP_HARDENING_MODE=<mode> instead (see docs)"
-#  endif
-#  if defined(_LIBCPP_ENABLE_SAFE_MODE)
-#    error "_LIBCPP_ENABLE_SAFE_MODE has been removed, please use _LIBCPP_HARDENING_MODE=<mode> instead (see docs)"
-#  endif
-#  if defined(_LIBCPP_ENABLE_DEBUG_MODE)
-#    error "_LIBCPP_ENABLE_DEBUG_MODE has been removed, please use _LIBCPP_HARDENING_MODE=<mode> instead (see docs)"
-#  endif
-
-// The library provides the macro `_LIBCPP_HARDENING_MODE` which can be set to one of the following values:
-//
-// - `_LIBCPP_HARDENING_MODE_NONE`;
-// - `_LIBCPP_HARDENING_MODE_FAST`;
-// - `_LIBCPP_HARDENING_MODE_EXTENSIVE`;
-// - `_LIBCPP_HARDENING_MODE_DEBUG`.
-//
-// These values have the following effects:
-//
-// - `_LIBCPP_HARDENING_MODE_NONE` -- sets the hardening mode to "none" which disables all runtime hardening checks;
-//
-// - `_LIBCPP_HARDENING_MODE_FAST` -- sets that hardening mode to "fast". The fast mode enables security-critical checks
-//   that can be done with relatively little runtime overhead in constant time;
-//
-// - `_LIBCPP_HARDENING_MODE_EXTENSIVE` -- sets the hardening mode to "extensive". The extensive mode is a superset of
-//   the fast mode that additionally enables checks that are relatively cheap and prevent common types of logic errors
-//   but are not necessarily security-critical;
-//
-// - `_LIBCPP_HARDENING_MODE_DEBUG` -- sets the hardening mode to "debug". The debug mode is a superset of the extensive
-//   mode and enables all checks available in the library, including internal assertions. Checks that are part of the
-//   debug mode can be very expensive and thus the debug mode is intended to be used for testing, not in production.
-
-// Inside the library, assertions are categorized so they can be cherry-picked based on the chosen hardening mode. These
-// macros are only for internal use -- users should only pick one of the high-level hardening modes described above.
-//
-// - `_LIBCPP_ASSERT_VALID_INPUT_RANGE` -- checks that ranges (whether expressed as an iterator pair, an iterator and
-//   a sentinel, an iterator and a count, or a `std::range`) given as input to library functions are valid:
-//   - the sentinel is reachable from the begin iterator;
-//   - TODO(hardening): both iterators refer to the same container.
-//
-// - `_LIBCPP_ASSERT_VALID_ELEMENT_ACCESS` -- checks that any attempts to access a container element, whether through
-//   the container object or through an iterator, are valid and do not attempt to go out of bounds or otherwise access
-//   a non-existent element. For iterator checks to work, bounded iterators must be enabled in the ABI. Types like
-//   `optional` and `function` are considered one-element containers for the purposes of this check.
-//
-// - `_LIBCPP_ASSERT_NON_NULL` -- checks that the pointer being dereferenced is not null. On most modern platforms zero
-//   address does not refer to an actual location in memory, so a null pointer dereference would not compromize the
-//   memory security of a program (however, it is still undefined behavior that can result in strange errors due to
-//   compiler optimizations).
-//
-// - `_LIBCPP_ASSERT_NON_OVERLAPPING_RANGES` -- for functions that take several ranges as arguments, checks that the
-//   given ranges do not overlap.
-//
-// - `_LIBCPP_ASSERT_VALID_DEALLOCATION` -- checks that an attempt to deallocate memory is valid (e.g. the given object
-//   was allocated by the given allocator). Violating this category typically results in a memory leak.
-//
-// - `_LIBCPP_ASSERT_VALID_EXTERNAL_API_CALL` -- checks that a call to an external API doesn't fail in
-//   an unexpected manner. This includes triggering documented cases of undefined behavior in an external library (like
-//   attempting to unlock an unlocked mutex in pthreads). Any API external to the library falls under this category
-//   (from system calls to compiler intrinsics). We generally don't expect these failures to compromize memory safety or
-//   otherwise create an immediate security issue.
-//
-// - `_LIBCPP_ASSERT_COMPATIBLE_ALLOCATOR` -- checks any operations that exchange nodes between containers to make sure
-//   the containers have compatible allocators.
-//
-// - `_LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN` -- checks that the given argument is within the domain of valid arguments
-//   for the function. Violating this typically produces an incorrect result (e.g. the clamp algorithm returns the
-//   original value without clamping it due to incorrect functors) or puts an object into an invalid state (e.g.
-//   a string view where only a subset of elements is possible to access). This category is for assertions violating
-//   which doesn't cause any immediate issues in the library -- whatever the consequences are, they will happen in the
-//   user code.
-//
-// - `_LIBCPP_ASSERT_PEDANTIC` -- checks prerequisites which are imposed by the Standard, but violating which happens to
-//   be benign in our implementation.
-//
-// - `_LIBCPP_ASSERT_SEMANTIC_REQUIREMENT` -- checks that the given argument satisfies the semantic requirements imposed
-//   by the Standard. Typically, there is no simple way to completely prove that a semantic requirement is satisfied;
-//   thus, this would often be a heuristic check and it might be quite expensive.
-//
-// - `_LIBCPP_ASSERT_INTERNAL` -- checks that internal invariants of the library hold. These assertions don't depend on
-//   user input.
-//
-// - `_LIBCPP_ASSERT_UNCATEGORIZED` -- for assertions that haven't been properly classified yet.
-
-// clang-format off
-#  define _LIBCPP_HARDENING_MODE_NONE      (1 << 1)
-#  define _LIBCPP_HARDENING_MODE_FAST      (1 << 2)
-#  define _LIBCPP_HARDENING_MODE_EXTENSIVE (1 << 4) // Deliberately not ordered.
-#  define _LIBCPP_HARDENING_MODE_DEBUG     (1 << 3)
-// clang-format on
-
-#  ifndef _LIBCPP_HARDENING_MODE
-
-#    ifndef _LIBCPP_HARDENING_MODE_DEFAULT
-#      error _LIBCPP_HARDENING_MODE_DEFAULT is not defined. This definition should be set at configuration time in the \
-`__config_site` header, please make sure your installation of libc++ is not broken.
-#    endif
-
-#    define _LIBCPP_HARDENING_MODE _LIBCPP_HARDENING_MODE_DEFAULT
-#  endif
-
-#  if _LIBCPP_HARDENING_MODE != _LIBCPP_HARDENING_MODE_NONE &&                                                         \
-      _LIBCPP_HARDENING_MODE != _LIBCPP_HARDENING_MODE_FAST &&                                                         \
-      _LIBCPP_HARDENING_MODE != _LIBCPP_HARDENING_MODE_EXTENSIVE &&                                                    \
-      _LIBCPP_HARDENING_MODE != _LIBCPP_HARDENING_MODE_DEBUG
-#    error _LIBCPP_HARDENING_MODE must be set to one of the following values: \
-_LIBCPP_HARDENING_MODE_NONE, \
-_LIBCPP_HARDENING_MODE_FAST, \
-_LIBCPP_HARDENING_MODE_EXTENSIVE, \
-_LIBCPP_HARDENING_MODE_DEBUG
-#  endif
-
-// Hardening assertion semantics generally mirror the evaluation semantics of C++26 Contracts:
-// - `ignore` evaluates the assertion but doesn't do anything if it fails (note that it differs from the Contracts
-//   `ignore` semantic which wouldn't evaluate the assertion at all);
-// - `observe` logs an error (indicating, if possible, that the error is fatal) and continues execution;
-// - `quick-enforce` terminates the program as fast as possible (via trapping);
-// - `enforce` logs an error and then terminates the program.
-//
-// Notes:
-// - Continuing execution after a hardening check fails results in undefined behavior; the `observe` semantic is meant
-//   to make adopting hardening easier but should not be used outside of this scenario;
-// - C++26 wording for Library Hardening precludes a conforming Hardened implementation from using the Contracts
-//   `ignore` semantic when evaluating hardened preconditions in the Library. Libc++ allows using this semantic for
-//   hardened preconditions, however, be aware that using `ignore` does not produce a conforming "Hardened"
-//   implementation, unlike the other semantics above.
-// clang-format off
-#  define _LIBCPP_ASSERTION_SEMANTIC_IGNORE        (1 << 1)
-#  define _LIBCPP_ASSERTION_SEMANTIC_OBSERVE       (1 << 2)
-#  define _LIBCPP_ASSERTION_SEMANTIC_QUICK_ENFORCE (1 << 3)
-#  define _LIBCPP_ASSERTION_SEMANTIC_ENFORCE       (1 << 4)
-// clang-format on
-
-// Allow users to define an arbitrary assertion semantic; otherwise, use the default mapping from modes to semantics.
-// The default is for production-capable modes to use `quick-enforce` (i.e., trap) and for the `debug` mode to use
-// `enforce` (i.e., log and abort).
-#  ifndef _LIBCPP_ASSERTION_SEMANTIC
-
-#    if _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_DEBUG
-#      define _LIBCPP_ASSERTION_SEMANTIC _LIBCPP_ASSERTION_SEMANTIC_ENFORCE
-#    else
-#      define _LIBCPP_ASSERTION_SEMANTIC _LIBCPP_ASSERTION_SEMANTIC_QUICK_ENFORCE
-#    endif
-
-#  else
-#    if !_LIBCPP_HAS_EXPERIMENTAL_LIBRARY
-#      error "Assertion semantics are an experimental feature."
-#    endif
-#    if defined(_LIBCPP_CXX03_LANG)
-#      error "Assertion semantics are not available in the C++03 mode."
-#    endif
-
-#  endif // _LIBCPP_ASSERTION_SEMANTIC
-
-// } HARDENING
-
 #  define _LIBCPP_TOSTRING2(x) #x
 #  define _LIBCPP_TOSTRING(x) _LIBCPP_TOSTRING2(x)
 
@@ -320,13 +133,6 @@ _LIBCPP_HARDENING_MODE_DEBUG
 //      When this option is used, the token passed to `std::random_device`'s
 //      constructor *must* be "/dev/urandom" -- anything else is an error.
 //
-// _LIBCPP_USING_NACL_RANDOM
-//      NaCl's sandbox (which PNaCl also runs in) doesn't allow filesystem access,
-//      including accesses to the special files under `/dev`. This implementation
-//      uses the NaCL syscall `nacl_secure_random_init()` to get entropy.
-//      When this option is used, the token passed to `std::random_device`'s
-//      constructor *must* be "/dev/urandom" -- anything else is an error.
-//
 // _LIBCPP_USING_WIN32_RANDOM
 //      Use rand_s(), for use on Windows.
 //      When this option is used, the token passed to `std::random_device`'s
@@ -338,8 +144,6 @@ _LIBCPP_HARDENING_MODE_DEBUG
 #    define _LIBCPP_USING_GETENTROPY
 #  elif defined(__Fuchsia__)
 #    define _LIBCPP_USING_FUCHSIA_CPRNG
-#  elif defined(__native_client__)
-#    define _LIBCPP_USING_NACL_RANDOM
 #  elif defined(_LIBCPP_WIN32API)
 #    define _LIBCPP_USING_WIN32_RANDOM
 #  else
@@ -348,7 +152,7 @@ _LIBCPP_HARDENING_MODE_DEBUG
 
 #  ifndef _LIBCPP_CXX03_LANG
 
-#    define _LIBCPP_ALIGNOF(_Tp) alignof(_Tp)
+#    define _LIBCPP_ALIGNOF(...) alignof(__VA_ARGS__)
 #    define _ALIGNAS_TYPE(x) alignas(x)
 #    define _ALIGNAS(x) alignas(x)
 #    define _NOEXCEPT noexcept
@@ -357,7 +161,7 @@ _LIBCPP_HARDENING_MODE_DEBUG
 
 #  else
 
-#    define _LIBCPP_ALIGNOF(_Tp) _Alignof(_Tp)
+#    define _LIBCPP_ALIGNOF(...) _Alignof(__VA_ARGS__)
 #    define _ALIGNAS_TYPE(x) __attribute__((__aligned__(_LIBCPP_ALIGNOF(x))))
 #    define _ALIGNAS(x) __attribute__((__aligned__(x)))
 #    define nullptr __nullptr
@@ -471,6 +275,12 @@ typedef __char32_t char32_t;
 #    define _LIBCPP_GCC_DIAGNOSTIC_IGNORED(str)
 #  endif
 
+// Macros to enter and leave a state where deprecation warnings are suppressed.
+#  define _LIBCPP_SUPPRESS_DEPRECATED_PUSH                                                                             \
+    _LIBCPP_DIAGNOSTIC_PUSH _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wdeprecated")                                           \
+        _LIBCPP_GCC_DIAGNOSTIC_IGNORED("-Wdeprecated-declarations")
+#  define _LIBCPP_SUPPRESS_DEPRECATED_POP _LIBCPP_DIAGNOSTIC_POP
+
 #  if _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_FAST
 #    define _LIBCPP_HARDENING_SIG f
 #  elif _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_EXTENSIVE
@@ -481,6 +291,16 @@ typedef __char32_t char32_t;
 #    define _LIBCPP_HARDENING_SIG n // "none"
 #  endif
 
+#  if _LIBCPP_ASSERTION_SEMANTIC == _LIBCPP_ASSERTION_SEMANTIC_OBSERVE
+#    define _LIBCPP_ASSERTION_SEMANTIC_SIG o
+#  elif _LIBCPP_ASSERTION_SEMANTIC == _LIBCPP_ASSERTION_SEMANTIC_QUICK_ENFORCE
+#    define _LIBCPP_ASSERTION_SEMANTIC_SIG q
+#  elif _LIBCPP_ASSERTION_SEMANTIC == _LIBCPP_ASSERTION_SEMANTIC_ENFORCE
+#    define _LIBCPP_ASSERTION_SEMANTIC_SIG e
+#  else
+#    define _LIBCPP_ASSERTION_SEMANTIC_SIG i // `ignore`
+#  endif
+
 #  if !_LIBCPP_HAS_EXCEPTIONS
 #    define _LIBCPP_EXCEPTIONS_SIG n
 #  else
@@ -488,7 +308,9 @@ typedef __char32_t char32_t;
 #  endif
 
 #  define _LIBCPP_ODR_SIGNATURE                                                                                        \
-    _LIBCPP_CONCAT(_LIBCPP_CONCAT(_LIBCPP_HARDENING_SIG, _LIBCPP_EXCEPTIONS_SIG), _LIBCPP_VERSION)
+    _LIBCPP_CONCAT(                                                                                                    \
+        _LIBCPP_CONCAT(_LIBCPP_CONCAT(_LIBCPP_HARDENING_SIG, _LIBCPP_ASSERTION_SEMANTIC_SIG), _LIBCPP_EXCEPTIONS_SIG), \
+        _LIBCPP_VERSION)
 
 // This macro marks a symbol as being hidden from libc++'s ABI. This is achieved
 // on two levels:
@@ -550,16 +372,6 @@ typedef __char32_t char32_t;
 #  endif
 #  define _LIBCPP_HIDE_FROM_ABI_VIRTUAL _LIBCPP_HIDDEN _LIBCPP_EXCLUDE_FROM_EXPLICIT_INSTANTIATION
 
-#  ifdef _LIBCPP_BUILDING_LIBRARY
-#    if _LIBCPP_ABI_VERSION > 1
-#      define _LIBCPP_HIDE_FROM_ABI_AFTER_V1 _LIBCPP_HIDE_FROM_ABI
-#    else
-#      define _LIBCPP_HIDE_FROM_ABI_AFTER_V1
-#    endif
-#  else
-#    define _LIBCPP_HIDE_FROM_ABI_AFTER_V1 _LIBCPP_HIDE_FROM_ABI
-#  endif
-
 // Clang modules take a significant compile time hit when pushing and popping diagnostics.
 // Since all the headers are marked as system headers unless _LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER is defined, we can
 // simply disable this pushing and popping when _LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER isn't defined.
@@ -676,27 +488,6 @@ typedef __char32_t char32_t;
 #    endif
 #  endif
 
-// It is not yet possible to use aligned_alloc() on all Apple platforms since
-// 10.15 was the first version to ship an implementation of aligned_alloc().
-#  if defined(__APPLE__)
-#    if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) &&                                                     \
-         __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 101500) ||                                                    \
-        (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) &&                                                    \
-         __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 130000) ||                                                   \
-        (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) &&                                                     \
-         __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 60000) ||                                                     \
-        (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 130000)
-#      define _LIBCPP_HAS_C11_ALIGNED_ALLOC 0
-#    else
-#      define _LIBCPP_HAS_C11_ALIGNED_ALLOC 1
-#    endif
-#  elif defined(__ANDROID__) && __ANDROID_API__ < 28
-// Android only provides aligned_alloc when targeting API 28 or higher.
-#    define _LIBCPP_HAS_C11_ALIGNED_ALLOC 0
-#  else
-#    define _LIBCPP_HAS_C11_ALIGNED_ALLOC 1
-#  endif
-
 #  if defined(__APPLE__) || defined(__FreeBSD__)
 #    define _LIBCPP_WCTYPE_IS_MASK
 #  endif
@@ -727,6 +518,15 @@ typedef __char32_t char32_t;
 #    define _LIBCPP_DEPRECATED_(m)
 #  endif
 
+// FIXME: using `#warning` causes diagnostics from system headers which include deprecated headers. This can only be
+// enabled again once https://github.com/llvm/llvm-project/pull/168041 (or a similar feature) has landed, since that
+// allows suppression in system headers.
+#  if defined(__DEPRECATED) && __DEPRECATED && !defined(_LIBCPP_DISABLE_DEPRECATION_WARNINGS) && 0
+#    define _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS 1
+#  else
+#    define _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS 0
+#  endif
+
 #  if !defined(_LIBCPP_CXX03_LANG)
 #    define _LIBCPP_DEPRECATED_IN_CXX11 _LIBCPP_DEPRECATED
 #  else
@@ -771,17 +571,6 @@ typedef __char32_t char32_t;
 #    define _LIBCPP_DEPRECATED_WITH_CHAR8_T
 #  endif
 
-// Macros to enter and leave a state where deprecation warnings are suppressed.
-#  if defined(_LIBCPP_COMPILER_CLANG_BASED) || defined(_LIBCPP_COMPILER_GCC)
-#    define _LIBCPP_SUPPRESS_DEPRECATED_PUSH                                                                           \
-      _Pragma("GCC diagnostic push") _Pragma("GCC diagnostic ignored \"-Wdeprecated\"")                                \
-          _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")
-#    define _LIBCPP_SUPPRESS_DEPRECATED_POP _Pragma("GCC diagnostic pop")
-#  else
-#    define _LIBCPP_SUPPRESS_DEPRECATED_PUSH
-#    define _LIBCPP_SUPPRESS_DEPRECATED_POP
-#  endif
-
 #  if _LIBCPP_STD_VER <= 11
 #    define _LIBCPP_EXPLICIT_SINCE_CXX14
 #  else
@@ -861,18 +650,10 @@ typedef __char32_t char32_t;
 #    endif // _LIBCPP_HAS_THREAD_API
 #  endif   // _LIBCPP_HAS_THREADS
 
-#  if _LIBCPP_HAS_THREAD_API_PTHREAD
-#    if defined(__ANDROID__) && __ANDROID_API__ >= 30
-#      define _LIBCPP_HAS_COND_CLOCKWAIT 1
-#    elif defined(_LIBCPP_GLIBC_PREREQ)
-#      if _LIBCPP_GLIBC_PREREQ(2, 30)
-#        define _LIBCPP_HAS_COND_CLOCKWAIT 1
-#      else
-#        define _LIBCPP_HAS_COND_CLOCKWAIT 0
-#      endif
-#    else
-#      define _LIBCPP_HAS_COND_CLOCKWAIT 0
-#    endif
+#  if !_LIBCPP_HAS_THREAD_API_PTHREAD
+#    define _LIBCPP_HAS_COND_CLOCKWAIT 0
+#  elif (defined(__ANDROID__) && __ANDROID_API__ >= 30) || _LIBCPP_GLIBC_PREREQ(2, 30)
+#    define _LIBCPP_HAS_COND_CLOCKWAIT 1
 #  else
 #    define _LIBCPP_HAS_COND_CLOCKWAIT 0
 #  endif
@@ -951,8 +732,8 @@ typedef __char32_t char32_t;
 #    endif
 #  endif
 
-#  if defined(__FreeBSD__) && defined(__clang__) && __has_attribute(__no_thread_safety_analysis__)
-#    define _LIBCPP_NO_THREAD_SAFETY_ANALYSIS __attribute__((__no_thread_safety_analysis__))
+#  if __has_cpp_attribute(_Clang::__no_thread_safety_analysis__)
+#    define _LIBCPP_NO_THREAD_SAFETY_ANALYSIS [[_Clang::__no_thread_safety_analysis__]]
 #  else
 #    define _LIBCPP_NO_THREAD_SAFETY_ANALYSIS
 #  endif
@@ -1038,12 +819,8 @@ typedef __char32_t char32_t;
 // the latter depends on internal GNU libc details that are not appropriate
 // to depend on here, so any declarations present when __cpp_char8_t is not
 // defined are ignored.
-#  if defined(_LIBCPP_GLIBC_PREREQ)
-#    if _LIBCPP_GLIBC_PREREQ(2, 36) && defined(__cpp_char8_t)
-#      define _LIBCPP_HAS_C8RTOMB_MBRTOC8 1
-#    else
-#      define _LIBCPP_HAS_C8RTOMB_MBRTOC8 0
-#    endif
+#  if _LIBCPP_GLIBC_PREREQ(2, 36) && defined(__cpp_char8_t)
+#    define _LIBCPP_HAS_C8RTOMB_MBRTOC8 1
 #  else
 #    define _LIBCPP_HAS_C8RTOMB_MBRTOC8 0
 #  endif
@@ -1067,8 +844,7 @@ typedef __char32_t char32_t;
 #    define _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(_ClassName) static_assert(true, "")
 #  endif
 
-// TODO(LLVM 22): Remove the workaround
-#  if defined(__OBJC__) && (!defined(_LIBCPP_CLANG_VER) || _LIBCPP_CLANG_VER < 2001)
+#  if defined(__OBJC__) && defined(_LIBCPP_APPLE_CLANG_VER)
 #    define _LIBCPP_WORKAROUND_OBJCXX_COMPILER_INTRINSICS
 #  endif
 
@@ -1153,12 +929,33 @@ typedef __char32_t char32_t;
 #    define _LIBCPP_DIAGNOSE_WARNING(...)
 #  endif
 
+#  if __has_attribute(__diagnose_if__) && !defined(_LIBCPP_APPLE_CLANG_VER) &&                                         \
+      (!defined(_LIBCPP_CLANG_VER) || _LIBCPP_CLANG_VER >= 2001)
+#    define _LIBCPP_DIAGNOSE_IF(...) __attribute__((__diagnose_if__(__VA_ARGS__)))
+#  else
+#    define _LIBCPP_DIAGNOSE_IF(...)
+#  endif
+
+#  define _LIBCPP_DIAGNOSE_NULLPTR_IF(condition, condition_description)                                                \
+    _LIBCPP_DIAGNOSE_IF(                                                                                               \
+        condition,                                                                                                     \
+        "null passed to callee that requires a non-null argument" condition_description,                               \
+        "warning",                                                                                                     \
+        "nonnull")
+
 #  if __has_cpp_attribute(_Clang::__lifetimebound__)
 #    define _LIBCPP_LIFETIMEBOUND [[_Clang::__lifetimebound__]]
 #  else
 #    define _LIBCPP_LIFETIMEBOUND
 #  endif
 
+// This is to work around https://llvm.org/PR156809
+#  ifndef _LIBCPP_CXX03_LANG
+#    define _LIBCPP_CTOR_LIFETIMEBOUND _LIBCPP_LIFETIMEBOUND
+#  else
+#    define _LIBCPP_CTOR_LIFETIMEBOUND
+#  endif
+
 #  if __has_cpp_attribute(_Clang::__noescape__)
 #    define _LIBCPP_NOESCAPE [[_Clang::__noescape__]]
 #  else
@@ -1172,12 +969,6 @@ typedef __char32_t char32_t;
 #    define _LIBCPP_NO_SPECIALIZATIONS
 #  endif
 
-#  if __has_cpp_attribute(_Clang::__standalone_debug__)
-#    define _LIBCPP_STANDALONE_DEBUG [[_Clang::__standalone_debug__]]
-#  else
-#    define _LIBCPP_STANDALONE_DEBUG
-#  endif
-
 #  if __has_cpp_attribute(_Clang::__preferred_name__)
 #    define _LIBCPP_PREFERRED_NAME(x) [[_Clang::__preferred_name__(x)]]
 #  else
@@ -1257,14 +1048,6 @@ typedef __char32_t char32_t;
 #    define _LIBCPP_DIAGNOSE_NULLPTR
 #  endif
 
-// TODO(LLVM 22): Remove this macro once LLVM19 support ends. __cpp_explicit_this_parameter has been set in LLVM20.
-// Clang-18 has support for deducing this, but it does not set the FTM.
-#  if defined(__cpp_explicit_this_parameter) || (defined(_LIBCPP_CLANG_VER) && _LIBCPP_CLANG_VER >= 1800)
-#    define _LIBCPP_HAS_EXPLICIT_THIS_PARAMETER 1
-#  else
-#    define _LIBCPP_HAS_EXPLICIT_THIS_PARAMETER 0
-#  endif
-
 #endif // __cplusplus
 
 #endif // _LIBCPP___CONFIG
diff --git a/lib/libcxx/include/__configuration/abi.h b/lib/libcxx/include/__configuration/abi.h
index 2f7c548465..d9623df71d 100644
--- a/lib/libcxx/include/__configuration/abi.h
+++ b/lib/libcxx/include/__configuration/abi.h
@@ -61,19 +61,9 @@
 // According to the Standard, `bitset::operator[] const` returns bool
 #  define _LIBCPP_ABI_BITSET_VECTOR_BOOL_CONST_SUBSCRIPT_RETURN_BOOL
 
-// In LLVM 20, we've changed to take these ABI breaks unconditionally. These flags only exist in case someone is running
-// into the static_asserts we added to catch the ABI break and don't care that it is one.
-// TODO(LLVM 22): Remove these flags
-#  define _LIBCPP_ABI_LIST_REMOVE_NODE_POINTER_UB
-#  define _LIBCPP_ABI_TREE_REMOVE_NODE_POINTER_UB
-#  define _LIBCPP_ABI_FIX_UNORDERED_NODE_POINTER_UB
-#  define _LIBCPP_ABI_FORWARD_LIST_REMOVE_NODE_POINTER_UB
-
 // These flags are documented in ABIGuarantees.rst
 #  define _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT
-#  define _LIBCPP_ABI_DO_NOT_EXPORT_BASIC_STRING_COMMON
-#  define _LIBCPP_ABI_DO_NOT_EXPORT_VECTOR_BASE_COMMON
-#  define _LIBCPP_ABI_DO_NOT_EXPORT_TO_CHARS_BASE_10
+#  define _LIBCPP_ABI_ATOMIC_WAIT_NATIVE_BY_SIZE
 #  define _LIBCPP_ABI_ENABLE_SHARED_PTR_TRIVIAL_ABI
 #  define _LIBCPP_ABI_ENABLE_UNIQUE_PTR_TRIVIAL_ABI
 #  define _LIBCPP_ABI_FIX_CITYHASH_IMPLEMENTATION
@@ -84,25 +74,16 @@
 #  define _LIBCPP_ABI_NO_FILESYSTEM_INLINE_NAMESPACE
 #  define _LIBCPP_ABI_NO_ITERATOR_BASES
 #  define _LIBCPP_ABI_NO_RANDOM_DEVICE_COMPATIBILITY_LAYOUT
+#  define _LIBCPP_ABI_NO_REVERSE_ITERATOR_SECOND_MEMBER
 #  define _LIBCPP_ABI_OPTIMIZED_FUNCTION
 #  define _LIBCPP_ABI_REGEX_CONSTANTS_NONZERO
 #  define _LIBCPP_ABI_STRING_OPTIMIZED_EXTERNAL_INSTANTIATION
 #  define _LIBCPP_ABI_USE_WRAP_ITER_IN_STD_ARRAY
 #  define _LIBCPP_ABI_USE_WRAP_ITER_IN_STD_STRING_VIEW
 #  define _LIBCPP_ABI_VARIANT_INDEX_TYPE_OPTIMIZATION
+#  define _LIBCPP_ABI_TRIVIALLY_COPYABLE_BIT_ITERATOR
 
 #elif _LIBCPP_ABI_VERSION == 1
-#  if !(defined(_LIBCPP_OBJECT_FORMAT_COFF) || defined(_LIBCPP_OBJECT_FORMAT_XCOFF))
-// Enable compiling copies of now inline methods into the dylib to support
-// applications compiled against older libraries. This is unnecessary with
-// COFF dllexport semantics, since dllexport forces a non-inline definition
-// of inline functions to be emitted anyway. Our own non-inline copy would
-// conflict with the dllexport-emitted copy, so we disable it. For XCOFF,
-// the linker will take issue with the symbols in the shared object if the
-// weak inline methods get visibility (such as from -fvisibility-inlines-hidden),
-// so disable it.
-#    define _LIBCPP_DEPRECATED_ABI_LEGACY_LIBRARY_DEFINITIONS_FOR_INLINE_FUNCTIONS
-#  endif
 // Feature macros for disabling pre ABI v1 features. All of these options
 // are deprecated.
 #  if defined(__FreeBSD__)
@@ -110,11 +91,20 @@
 #  endif
 #endif
 
+// TODO(LLVM 22): Remove this check
+#if defined(_LIBCPP_ABI_NO_ITERATOR_BASES) && !defined(_LIBCPP_ABI_NO_REVERSE_ITERATOR_SECOND_MEMBER)
+#  ifndef _LIBCPP_ONLY_NO_ITERATOR_BASES
+#    error "You probably want to define _LIBCPP_ABI_NO_REVERSE_ITERATOR_SECOND_MEMBER. This has been split out from"   \
+ " _LIBCPP_ABI_NO_ITERATOR_BASES to allow only removing the second iterator member, since they aren't really related." \
+ "If you actually want this ABI configuration, please define _LIBCPP_ONLY_NO_ITERATOR_BASES instead."
+#  endif
+#endif
+
 // We had some bugs where we use [[no_unique_address]] together with construct_at,
 // which causes UB as the call on construct_at could write to overlapping subobjects
 //
-// https://github.com/llvm/llvm-project/issues/70506
-// https://github.com/llvm/llvm-project/issues/70494
+// https://llvm.org/PR70506
+// https://llvm.org/PR70494
 //
 // To fix the bug we had to change the ABI of some classes to remove [[no_unique_address]] under certain conditions.
 // The macro below is used for all classes whose ABI have changed as part of fixing these bugs.
diff --git a/lib/libcxx/include/__configuration/availability.h b/lib/libcxx/include/__configuration/availability.h
index ae58e36b50..40e11b3314 100644
--- a/lib/libcxx/include/__configuration/availability.h
+++ b/lib/libcxx/include/__configuration/availability.h
@@ -17,62 +17,17 @@
 #  pragma GCC system_header
 #endif
 
-// Libc++ is shipped by various vendors. In particular, it is used as a system
-// library on macOS, iOS and other Apple platforms. In order for users to be
-// able to compile a binary that is intended to be deployed to an older version
-// of a platform, Clang provides availability attributes [1]. These attributes
-// can be placed on declarations and are used to describe the life cycle of a
-// symbol in the library.
-//
-// The main goal is to ensure a compile-time error if a symbol that hasn't been
-// introduced in a previously released library is used in a program that targets
-// that previously released library. Normally, this would be a load-time error
-// when one tries to launch the program against the older library.
-//
-// For example, the filesystem library was introduced in the dylib in LLVM 9.
-// On Apple platforms, this corresponds to macOS 10.15. If a user compiles on
-// a macOS 10.15 host but targets macOS 10.13 with their program, the compiler
-// would normally not complain (because the required declarations are in the
-// headers), but the dynamic loader would fail to find the symbols when actually
-// trying to launch the program on macOS 10.13. To turn this into a compile-time
-// issue instead, declarations are annotated with when they were introduced, and
-// the compiler can produce a diagnostic if the program references something that
-// isn't available on the deployment target.
-//
-// This mechanism is general in nature, and any vendor can add their markup to
-// the library (see below). Whenever a new feature is added that requires support
-// in the shared library, two macros are added below to allow marking the feature
-// as unavailable:
-// 1. A macro named `_LIBCPP_AVAILABILITY_HAS_<feature>` which must be defined
-//    to `_LIBCPP_INTRODUCED_IN_<version>` for the appropriate LLVM version.
-// 2. A macro named `_LIBCPP_AVAILABILITY_<feature>`, which must be defined to
-//    `_LIBCPP_INTRODUCED_IN_<version>_MARKUP` for the appropriate LLVM version.
-//
-// When vendors decide to ship the feature as part of their shared library, they
-// can update the `_LIBCPP_INTRODUCED_IN_<version>` macro (and the markup counterpart)
-// based on the platform version they shipped that version of LLVM in. The library
-// will then use this markup to provide an optimal user experience on these platforms.
-//
-// Furthermore, many features in the standard library have corresponding
-// feature-test macros. The `_LIBCPP_AVAILABILITY_HAS_<feature>` macros
-// are checked by the corresponding feature-test macros generated by
-// generate_feature_test_macro_components.py to ensure that the library
-// doesn't announce a feature as being implemented if it is unavailable on
-// the deployment target.
-//
-// Note that this mechanism is disabled by default in the "upstream" libc++.
-// Availability annotations are only meaningful when shipping libc++ inside
-// a platform (i.e. as a system library), and so vendors that want them should
-// turn those annotations on at CMake configuration time.
-//
-// [1]: https://clang.llvm.org/docs/AttributeReference.html#availability
+// This file defines a framework that can be used by vendors to encode the version of an operating system that various
+// features of libc++ has been shipped in. This is primarily intended to allow safely deploying an executable built with
+// a new version of the library on a platform containing an older version of the built library.
+// Detailed documentation for this can be found at https://libcxx.llvm.org/VendorDocumentation.html#availability-markup
 
 // Availability markup is disabled when building the library, or when a non-Clang
 // compiler is used because only Clang supports the necessary attributes.
 //
 // We also allow users to force-disable availability markup via the `_LIBCPP_DISABLE_AVAILABILITY`
 // macro because that is the only way to work around a Clang bug related to availability
-// attributes: https://github.com/llvm/llvm-project/issues/134151.
+// attributes: https://llvm.org/PR134151.
 // Once that bug has been fixed, we should remove the macro.
 #if defined(_LIBCPP_BUILDING_LIBRARY) || defined(_LIBCXXABI_BUILDING_LIBRARY) ||                                       \
     !defined(_LIBCPP_COMPILER_CLANG_BASED) || defined(_LIBCPP_DISABLE_AVAILABILITY)
@@ -84,6 +39,9 @@
 // in all versions of the library are available.
 #if !_LIBCPP_HAS_VENDOR_AVAILABILITY_ANNOTATIONS
 
+#  define _LIBCPP_INTRODUCED_IN_LLVM_22 1
+#  define _LIBCPP_INTRODUCED_IN_LLVM_22_ATTRIBUTE /* nothing */
+
 #  define _LIBCPP_INTRODUCED_IN_LLVM_21 1
 #  define _LIBCPP_INTRODUCED_IN_LLVM_21_ATTRIBUTE /* nothing */
 
@@ -108,32 +66,55 @@
 #  define _LIBCPP_INTRODUCED_IN_LLVM_12 1
 #  define _LIBCPP_INTRODUCED_IN_LLVM_12_ATTRIBUTE /* nothing */
 
-#  define _LIBCPP_INTRODUCED_IN_LLVM_11 1
-#  define _LIBCPP_INTRODUCED_IN_LLVM_11_ATTRIBUTE /* nothing */
-
-#  define _LIBCPP_INTRODUCED_IN_LLVM_9 1
-#  define _LIBCPP_INTRODUCED_IN_LLVM_9_ATTRIBUTE      /* nothing */
-#  define _LIBCPP_INTRODUCED_IN_LLVM_9_ATTRIBUTE_PUSH /* nothing */
-#  define _LIBCPP_INTRODUCED_IN_LLVM_9_ATTRIBUTE_POP  /* nothing */
-
 #elif defined(__APPLE__)
 
 // clang-format off
 
+// LLVM 22
+// TODO: Fill this in
+#  define _LIBCPP_INTRODUCED_IN_LLVM_22 0
+#  define _LIBCPP_INTRODUCED_IN_LLVM_22_ATTRIBUTE __attribute__((unavailable))
+
 // LLVM 21
 // TODO: Fill this in
 #  define _LIBCPP_INTRODUCED_IN_LLVM_21 0
 #  define _LIBCPP_INTRODUCED_IN_LLVM_21_ATTRIBUTE __attribute__((unavailable))
 
 // LLVM 20
-// TODO: Fill this in
-#  define _LIBCPP_INTRODUCED_IN_LLVM_20 0
-#  define _LIBCPP_INTRODUCED_IN_LLVM_20_ATTRIBUTE __attribute__((unavailable))
+//
+// Note that versions for most Apple OSes were bumped forward and aligned in that release.
+#  if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 260000) ||       \
+      (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 260000) ||     \
+      (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 260000) ||             \
+      (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 260000) ||       \
+      (defined(__ENVIRONMENT_BRIDGE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_BRIDGE_OS_VERSION_MIN_REQUIRED__ < 100000)
+#    define _LIBCPP_INTRODUCED_IN_LLVM_20 0
+#  else
+#    define _LIBCPP_INTRODUCED_IN_LLVM_20 1
+#  endif
+#  define _LIBCPP_INTRODUCED_IN_LLVM_20_ATTRIBUTE                                                                 \
+    __attribute__((availability(macos, strict, introduced = 26.0)))                                               \
+    __attribute__((availability(ios, strict, introduced = 26.0)))                                                 \
+    __attribute__((availability(tvos, strict, introduced = 26.0)))                                                \
+    __attribute__((availability(watchos, strict, introduced = 26.0)))                                             \
+    __attribute__((availability(bridgeos, strict, introduced = 10.0)))
 
 // LLVM 19
-// TODO: Fill this in
-#  define _LIBCPP_INTRODUCED_IN_LLVM_19 0
-#  define _LIBCPP_INTRODUCED_IN_LLVM_19_ATTRIBUTE __attribute__((unavailable))
+#  if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 150400) ||       \
+      (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 180400) ||     \
+      (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 180400) ||             \
+      (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 110400) ||       \
+      (defined(__ENVIRONMENT_BRIDGE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_BRIDGE_OS_VERSION_MIN_REQUIRED__ < 90400)
+#    define _LIBCPP_INTRODUCED_IN_LLVM_19 0
+#  else
+#    define _LIBCPP_INTRODUCED_IN_LLVM_19 1
+#  endif
+#  define _LIBCPP_INTRODUCED_IN_LLVM_19_ATTRIBUTE                                                                 \
+    __attribute__((availability(macos, strict, introduced = 15.4)))                                               \
+    __attribute__((availability(ios, strict, introduced = 18.4)))                                                 \
+    __attribute__((availability(tvos, strict, introduced = 18.4)))                                                \
+    __attribute__((availability(watchos, strict, introduced = 11.4)))                                             \
+    __attribute__((availability(bridgeos, strict, introduced = 9.4)))
 
 // LLVM 18
 #  if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 150000) ||       \
@@ -215,47 +196,13 @@
     __attribute__((availability(bridgeos, strict, introduced = 6.0)))                                             \
     __attribute__((availability(driverkit, strict, introduced = 21.3)))
 
-// LLVM 11
-#  if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 110000) ||   \
-      (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 140000) || \
-      (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 140000) ||         \
-      (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 70000)
-#    define _LIBCPP_INTRODUCED_IN_LLVM_11 0
-#  else
-#    define _LIBCPP_INTRODUCED_IN_LLVM_11 1
+#  if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__)  && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__  < 110000) || \
+      (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 150000) || \
+      (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__)     && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__     < 150000) || \
+      (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__)  && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__  < 80000)  || \
+      (defined(__ENVIRONMENT_DRIVERKIT_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_DRIVERKIT_VERSION_MIN_REQUIRED__ < 200000)
+#  warning "The selected platform is no longer supported by libc++."
 #  endif
-#  define _LIBCPP_INTRODUCED_IN_LLVM_11_ATTRIBUTE                                                                 \
-    __attribute__((availability(macos, strict, introduced = 11.0)))                                               \
-    __attribute__((availability(ios, strict, introduced = 14.0)))                                                 \
-    __attribute__((availability(tvos, strict, introduced = 14.0)))                                                \
-    __attribute__((availability(watchos, strict, introduced = 7.0)))
-
-// LLVM 9
-#  if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 101500) ||   \
-      (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 130000) || \
-      (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 130000) ||         \
-      (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 60000)
-#    define _LIBCPP_INTRODUCED_IN_LLVM_9 0
-#  else
-#    define _LIBCPP_INTRODUCED_IN_LLVM_9 1
-#  endif
-#  define _LIBCPP_INTRODUCED_IN_LLVM_9_ATTRIBUTE                                                                  \
-    __attribute__((availability(macos, strict, introduced = 10.15)))                                              \
-    __attribute__((availability(ios, strict, introduced = 13.0)))                                                 \
-    __attribute__((availability(tvos, strict, introduced = 13.0)))                                                \
-    __attribute__((availability(watchos, strict, introduced = 6.0)))
-#  define _LIBCPP_INTRODUCED_IN_LLVM_9_ATTRIBUTE_PUSH                                                                            \
-    _Pragma("clang attribute push(__attribute__((availability(macos,strict,introduced=10.15))), apply_to=any(function,record))") \
-    _Pragma("clang attribute push(__attribute__((availability(ios,strict,introduced=13.0))), apply_to=any(function,record))")    \
-    _Pragma("clang attribute push(__attribute__((availability(tvos,strict,introduced=13.0))), apply_to=any(function,record))")   \
-    _Pragma("clang attribute push(__attribute__((availability(watchos,strict,introduced=6.0))), apply_to=any(function,record))")
-#  define _LIBCPP_INTRODUCED_IN_LLVM_9_ATTRIBUTE_POP                                                                    \
-    _Pragma("clang attribute pop") \
-    _Pragma("clang attribute pop") \
-    _Pragma("clang attribute pop") \
-    _Pragma("clang attribute pop")
-
-// clang-format on
 
 #else
 
@@ -266,19 +213,12 @@
 
 #endif
 
-// These macros control the availability of all parts of <filesystem> that
-// depend on something in the dylib.
-#define _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_LIBRARY _LIBCPP_INTRODUCED_IN_LLVM_9
-#define _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY _LIBCPP_INTRODUCED_IN_LLVM_9_ATTRIBUTE
-#define _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY_PUSH _LIBCPP_INTRODUCED_IN_LLVM_9_ATTRIBUTE_PUSH
-#define _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY_POP _LIBCPP_INTRODUCED_IN_LLVM_9_ATTRIBUTE_POP
-
-// This controls the availability of the C++20 synchronization library,
-// which requires shared library support for various operations
-// (see libcxx/src/atomic.cpp). This includes <barier>, <latch>,
-// <semaphore>, and notification functions on std::atomic.
-#define _LIBCPP_AVAILABILITY_HAS_SYNC _LIBCPP_INTRODUCED_IN_LLVM_11
-#define _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INTRODUCED_IN_LLVM_11_ATTRIBUTE
+// This controls the availability of new implementation of std::atomic's
+// wait, notify_one and notify_all. The new implementation uses
+// the native atomic wait/notify operations on platforms that support them
+// based on the size of the atomic type, instead of the type itself.
+#define _LIBCPP_AVAILABILITY_HAS_NEW_SYNC _LIBCPP_INTRODUCED_IN_LLVM_22
+#define _LIBCPP_AVAILABILITY_NEW_SYNC _LIBCPP_INTRODUCED_IN_LLVM_22_ATTRIBUTE
 
 // Enable additional explicit instantiations of iostreams components. This
 // reduces the number of weak definitions generated in programs that use
@@ -307,7 +247,7 @@
 // This controls the availability of the C++17 std::pmr library,
 // which is implemented in large part in the built library.
 //
-// TODO: Enable std::pmr markup once https://github.com/llvm/llvm-project/issues/40340 has been fixed
+// TODO: Enable std::pmr markup once https://llvm.org/PR40340 has been fixed
 //       Until then, it is possible for folks to try to use `std::pmr` when back-deploying to targets that don't support
 //       it and it'll be a load-time error, but we don't have a good alternative because the library won't compile if we
 //       use availability annotations until that bug has been fixed.
@@ -364,4 +304,11 @@
 #  define _LIBCPP_AVAILABILITY_INIT_PRIMARY_EXCEPTION
 #endif
 
+// Only define a bunch of symbols in the dylib if we need to be compatible with LLVM 7 headers or older
+#  if defined(_LIBCPP_BUILDING_LIBRARY) && _LIBCPP_AVAILABILITY_MINIMUM_HEADER_VERSION < 8
+#    define _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8
+#  else
+#    define _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 _LIBCPP_HIDE_FROM_ABI
+#  endif
+
 #endif // _LIBCPP___CONFIGURATION_AVAILABILITY_H
diff --git a/lib/libcxx/include/__configuration/compiler.h b/lib/libcxx/include/__configuration/compiler.h
index 54025c5b22..302b7ced67 100644
--- a/lib/libcxx/include/__configuration/compiler.h
+++ b/lib/libcxx/include/__configuration/compiler.h
@@ -33,16 +33,16 @@
 // Warn if a compiler version is used that is not supported anymore
 // LLVM RELEASE Update the minimum compiler versions
 #  if defined(_LIBCPP_CLANG_VER)
-#    if _LIBCPP_CLANG_VER < 1900
-#      warning "Libc++ only supports Clang 19 and later"
+#    if _LIBCPP_CLANG_VER < 2001
+#      warning "Libc++ only supports Clang 20 and later"
 #    endif
 #  elif defined(_LIBCPP_APPLE_CLANG_VER)
-#    if _LIBCPP_APPLE_CLANG_VER < 1500
-#      warning "Libc++ only supports AppleClang 15 and later"
+#    if _LIBCPP_APPLE_CLANG_VER < 1700
+#      warning "Libc++ only supports AppleClang 26 and later"
 #    endif
 #  elif defined(_LIBCPP_GCC_VER)
-#    if _LIBCPP_GCC_VER < 1400
-#      warning "Libc++ only supports GCC 14 and later"
+#    if _LIBCPP_GCC_VER < 1500
+#      warning "Libc++ only supports GCC 15 and later"
 #    endif
 #  endif
 
diff --git a/lib/libcxx/include/__configuration/experimental.h b/lib/libcxx/include/__configuration/experimental.h
new file mode 100644
index 0000000000..c688b017da
--- /dev/null
+++ b/lib/libcxx/include/__configuration/experimental.h
@@ -0,0 +1,38 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___CONFIGURATION_EXPERIMENTAL_H
+#define _LIBCPP___CONFIGURATION_EXPERIMENTAL_H
+
+/* zig patch: instead of including __config_site, zig adds -D flags when compiling */
+
+#ifndef _LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER
+#  pragma GCC system_header
+#endif
+
+#if __has_feature(experimental_library)
+#  ifndef _LIBCPP_ENABLE_EXPERIMENTAL
+#    define _LIBCPP_ENABLE_EXPERIMENTAL
+#  endif
+#endif
+
+// Incomplete features get their own specific disabling flags. This makes it
+// easier to grep for target specific flags once the feature is complete.
+#if defined(_LIBCPP_ENABLE_EXPERIMENTAL) || defined(_LIBCPP_BUILDING_LIBRARY)
+#  define _LIBCPP_HAS_EXPERIMENTAL_LIBRARY 1
+#else
+#  define _LIBCPP_HAS_EXPERIMENTAL_LIBRARY 0
+#endif
+
+#define _LIBCPP_HAS_EXPERIMENTAL_PSTL _LIBCPP_HAS_EXPERIMENTAL_LIBRARY
+#define _LIBCPP_HAS_EXPERIMENTAL_TZDB _LIBCPP_HAS_EXPERIMENTAL_LIBRARY
+#define _LIBCPP_HAS_EXPERIMENTAL_SYNCSTREAM _LIBCPP_HAS_EXPERIMENTAL_LIBRARY
+#define _LIBCPP_HAS_EXPERIMENTAL_HARDENING_OBSERVE_SEMANTIC _LIBCPP_HAS_EXPERIMENTAL_LIBRARY
+#define _LIBCPP_HAS_EXPERIMENTAL_OPTIONAL_ITERATOR _LIBCPP_HAS_EXPERIMENTAL_LIBRARY
+
+#endif // _LIBCPP___CONFIGURATION_EXPERIMENTAL_H
diff --git a/lib/libcxx/include/__configuration/hardening.h b/lib/libcxx/include/__configuration/hardening.h
new file mode 100644
index 0000000000..bd172d99ac
--- /dev/null
+++ b/lib/libcxx/include/__configuration/hardening.h
@@ -0,0 +1,215 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___CONFIGURATION_HARDENING_H
+#define _LIBCPP___CONFIGURATION_HARDENING_H
+
+/* zig patch: instead of including __config_site, zig adds -D flags when compiling */
+#include <__configuration/experimental.h>
+#include <__configuration/language.h>
+
+#ifndef _LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER
+#  pragma GCC system_header
+#endif
+
+// TODO(LLVM 23): Remove this. We're making these an error to catch folks who might not have migrated.
+//       Since hardening went through several changes (many of which impacted user-facing macros),
+//       we're keeping these checks around for a bit longer than usual. Failure to properly configure
+//       hardening results in checks being dropped silently, which is a pretty big deal.
+#if defined(_LIBCPP_ENABLE_ASSERTIONS)
+#  error "_LIBCPP_ENABLE_ASSERTIONS has been removed, please use _LIBCPP_HARDENING_MODE=<mode> instead (see docs)"
+#endif
+#if defined(_LIBCPP_ENABLE_HARDENED_MODE)
+#  error "_LIBCPP_ENABLE_HARDENED_MODE has been removed, please use _LIBCPP_HARDENING_MODE=<mode> instead (see docs)"
+#endif
+#if defined(_LIBCPP_ENABLE_SAFE_MODE)
+#  error "_LIBCPP_ENABLE_SAFE_MODE has been removed, please use _LIBCPP_HARDENING_MODE=<mode> instead (see docs)"
+#endif
+#if defined(_LIBCPP_ENABLE_DEBUG_MODE)
+#  error "_LIBCPP_ENABLE_DEBUG_MODE has been removed, please use _LIBCPP_HARDENING_MODE=<mode> instead (see docs)"
+#endif
+
+// The library provides the macro `_LIBCPP_HARDENING_MODE` which can be set to one of the following values:
+//
+// - `_LIBCPP_HARDENING_MODE_NONE`;
+// - `_LIBCPP_HARDENING_MODE_FAST`;
+// - `_LIBCPP_HARDENING_MODE_EXTENSIVE`;
+// - `_LIBCPP_HARDENING_MODE_DEBUG`.
+//
+// These values have the following effects:
+//
+// - `_LIBCPP_HARDENING_MODE_NONE` -- sets the hardening mode to "none" which disables all runtime hardening checks;
+//
+// - `_LIBCPP_HARDENING_MODE_FAST` -- sets that hardening mode to "fast". The fast mode enables security-critical checks
+//   that can be done with relatively little runtime overhead in constant time;
+//
+// - `_LIBCPP_HARDENING_MODE_EXTENSIVE` -- sets the hardening mode to "extensive". The extensive mode is a superset of
+//   the fast mode that additionally enables checks that are relatively cheap and prevent common types of logic errors
+//   but are not necessarily security-critical;
+//
+// - `_LIBCPP_HARDENING_MODE_DEBUG` -- sets the hardening mode to "debug". The debug mode is a superset of the extensive
+//   mode and enables all checks available in the library, including internal assertions. Checks that are part of the
+//   debug mode can be very expensive and thus the debug mode is intended to be used for testing, not in production.
+
+// Inside the library, assertions are categorized so they can be cherry-picked based on the chosen hardening mode. These
+// macros are only for internal use -- users should only pick one of the high-level hardening modes described above.
+//
+// - `_LIBCPP_ASSERT_VALID_INPUT_RANGE` -- checks that ranges (whether expressed as an iterator pair, an iterator and
+//   a sentinel, an iterator and a count, or a `std::range`) given as input to library functions are valid:
+//   - the sentinel is reachable from the begin iterator;
+//   - TODO(hardening): both iterators refer to the same container.
+//
+// - `_LIBCPP_ASSERT_VALID_ELEMENT_ACCESS` -- checks that any attempts to access a container element, whether through
+//   the container object or through an iterator, are valid and do not attempt to go out of bounds or otherwise access
+//   a non-existent element. For iterator checks to work, bounded iterators must be enabled in the ABI. Types like
+//   `optional` and `function` are considered one-element containers for the purposes of this check.
+//
+// - `_LIBCPP_ASSERT_NON_NULL` -- checks that the pointer being dereferenced is not null. On most modern platforms zero
+//   address does not refer to an actual location in memory, so a null pointer dereference would not compromize the
+//   memory security of a program (however, it is still undefined behavior that can result in strange errors due to
+//   compiler optimizations).
+//
+// - `_LIBCPP_ASSERT_NON_OVERLAPPING_RANGES` -- for functions that take several ranges as arguments, checks that the
+//   given ranges do not overlap.
+//
+// - `_LIBCPP_ASSERT_VALID_DEALLOCATION` -- checks that an attempt to deallocate memory is valid (e.g. the given object
+//   was allocated by the given allocator). Violating this category typically results in a memory leak.
+//
+// - `_LIBCPP_ASSERT_VALID_EXTERNAL_API_CALL` -- checks that a call to an external API doesn't fail in
+//   an unexpected manner. This includes triggering documented cases of undefined behavior in an external library (like
+//   attempting to unlock an unlocked mutex in pthreads). Any API external to the library falls under this category
+//   (from system calls to compiler intrinsics). We generally don't expect these failures to compromize memory safety or
+//   otherwise create an immediate security issue.
+//
+// - `_LIBCPP_ASSERT_COMPATIBLE_ALLOCATOR` -- checks any operations that exchange nodes between containers to make sure
+//   the containers have compatible allocators.
+//
+// - `_LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN` -- checks that the given argument is within the domain of valid arguments
+//   for the function. Violating this typically produces an incorrect result (e.g. the clamp algorithm returns the
+//   original value without clamping it due to incorrect functors) or puts an object into an invalid state (e.g.
+//   a string view where only a subset of elements is possible to access). This category is for assertions violating
+//   which doesn't cause any immediate issues in the library -- whatever the consequences are, they will happen in the
+//   user code.
+//
+// - `_LIBCPP_ASSERT_PEDANTIC` -- checks prerequisites which are imposed by the Standard, but violating which happens to
+//   be benign in our implementation.
+//
+// - `_LIBCPP_ASSERT_SEMANTIC_REQUIREMENT` -- checks that the given argument satisfies the semantic requirements imposed
+//   by the Standard. Typically, there is no simple way to completely prove that a semantic requirement is satisfied;
+//   thus, this would often be a heuristic check and it might be quite expensive.
+//
+// - `_LIBCPP_ASSERT_INTERNAL` -- checks that internal invariants of the library hold. These assertions don't depend on
+//   user input.
+//
+// - `_LIBCPP_ASSERT_UNCATEGORIZED` -- for assertions that haven't been properly classified yet.
+
+// clang-format off
+#  define _LIBCPP_HARDENING_MODE_NONE      (1 << 1)
+#  define _LIBCPP_HARDENING_MODE_FAST      (1 << 2)
+#  define _LIBCPP_HARDENING_MODE_EXTENSIVE (1 << 4) // Deliberately not ordered.
+#  define _LIBCPP_HARDENING_MODE_DEBUG     (1 << 3)
+// clang-format on
+
+#ifndef _LIBCPP_HARDENING_MODE
+
+#  ifndef _LIBCPP_HARDENING_MODE_DEFAULT
+#    error _LIBCPP_HARDENING_MODE_DEFAULT is not defined. This definition should be set at configuration time in the \
+`__config_site` header, please make sure your installation of libc++ is not broken.
+#  endif
+
+#  define _LIBCPP_HARDENING_MODE _LIBCPP_HARDENING_MODE_DEFAULT
+#endif
+
+#if _LIBCPP_HARDENING_MODE != _LIBCPP_HARDENING_MODE_NONE && _LIBCPP_HARDENING_MODE != _LIBCPP_HARDENING_MODE_FAST &&  \
+    _LIBCPP_HARDENING_MODE != _LIBCPP_HARDENING_MODE_EXTENSIVE &&                                                      \
+    _LIBCPP_HARDENING_MODE != _LIBCPP_HARDENING_MODE_DEBUG
+#  error _LIBCPP_HARDENING_MODE must be set to one of the following values: \
+_LIBCPP_HARDENING_MODE_NONE, \
+_LIBCPP_HARDENING_MODE_FAST, \
+_LIBCPP_HARDENING_MODE_EXTENSIVE, \
+_LIBCPP_HARDENING_MODE_DEBUG
+#endif
+
+// The library provides the macro `_LIBCPP_ASSERTION_SEMANTIC` for configuring the assertion semantic used by hardening;
+// it can be set to one of the following values:
+//
+// - `_LIBCPP_ASSERTION_SEMANTIC_IGNORE`;
+// - `_LIBCPP_ASSERTION_SEMANTIC_OBSERVE`;
+// - `_LIBCPP_ASSERTION_SEMANTIC_QUICK_ENFORCE`;
+// - `_LIBCPP_ASSERTION_SEMANTIC_ENFORCE`.
+//
+// libc++ assertion semantics generally mirror the evaluation semantics of C++26 Contracts:
+// - `ignore` evaluates the assertion but doesn't do anything if it fails (note that it differs from the Contracts
+//   `ignore` semantic which wouldn't evaluate the assertion at all);
+// - `observe` logs an error (indicating, if possible, that the error is fatal) and continues execution;
+// - `quick-enforce` terminates the program as fast as possible (via trapping);
+// - `enforce` logs an error and then terminates the program.
+//
+// Additionally, a special `hardening-dependent` value selects the assertion semantic based on the hardening mode in
+// effect: the production-capable modes (`fast` and `extensive`) map to `quick_enforce` and the `debug` mode maps to
+// `enforce`. The `hardening-dependent` semantic cannot be selected explicitly, it is only used when no assertion
+// semantic is provided by the user _and_ the library's default semantic is configured to be dependent on hardening.
+//
+// Notes:
+// - Continuing execution after a hardening check fails results in undefined behavior; the `observe` semantic is meant
+//   to make adopting hardening easier but should not be used outside of this scenario;
+// - C++26 wording for Library Hardening precludes a conforming Hardened implementation from using the Contracts
+//   `ignore` semantic when evaluating hardened preconditions in the Library. Libc++ allows using this semantic for
+//   hardened preconditions, however, be aware that using `ignore` does not produce a conforming "Hardened"
+//   implementation, unlike the other semantics above.
+// clang-format off
+#  define _LIBCPP_ASSERTION_SEMANTIC_HARDENING_DEPENDENT (1 << 1)
+#  define _LIBCPP_ASSERTION_SEMANTIC_IGNORE              (1 << 2)
+#  define _LIBCPP_ASSERTION_SEMANTIC_OBSERVE             (1 << 3)
+#  define _LIBCPP_ASSERTION_SEMANTIC_QUICK_ENFORCE       (1 << 4)
+#  define _LIBCPP_ASSERTION_SEMANTIC_ENFORCE             (1 << 5)
+// clang-format on
+
+// If the user attempts to configure the assertion semantic, check that it is allowed in the current environment.
+#if defined(_LIBCPP_ASSERTION_SEMANTIC)
+#  if !_LIBCPP_HAS_EXPERIMENTAL_LIBRARY
+#    error "Assertion semantics are an experimental feature."
+#  endif
+#  if defined(_LIBCPP_CXX03_LANG)
+#    error "Assertion semantics are not available in the C++03 mode."
+#  endif
+#endif // defined(_LIBCPP_ASSERTION_SEMANTIC)
+
+// User-provided semantic takes top priority -- don't override if set.
+#ifndef _LIBCPP_ASSERTION_SEMANTIC
+
+#  ifndef _LIBCPP_ASSERTION_SEMANTIC_DEFAULT
+#    error _LIBCPP_ASSERTION_SEMANTIC_DEFAULT is not defined. This definition should be set at configuration time in \
+the `__config_site` header, please make sure your installation of libc++ is not broken.
+#  endif
+
+#  if _LIBCPP_ASSERTION_SEMANTIC_DEFAULT != _LIBCPP_ASSERTION_SEMANTIC_HARDENING_DEPENDENT
+#    define _LIBCPP_ASSERTION_SEMANTIC _LIBCPP_ASSERTION_SEMANTIC_DEFAULT
+#  else
+#    if _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_DEBUG
+#      define _LIBCPP_ASSERTION_SEMANTIC _LIBCPP_ASSERTION_SEMANTIC_ENFORCE
+#    else
+#      define _LIBCPP_ASSERTION_SEMANTIC _LIBCPP_ASSERTION_SEMANTIC_QUICK_ENFORCE
+#    endif
+#  endif // _LIBCPP_ASSERTION_SEMANTIC_DEFAULT != _LIBCPP_ASSERTION_SEMANTIC_HARDENING_DEPENDENT
+
+#endif // #ifndef _LIBCPP_ASSERTION_SEMANTIC
+
+// Finally, validate the selected semantic (in case the user tries setting it to an incorrect value):
+#if _LIBCPP_ASSERTION_SEMANTIC != _LIBCPP_ASSERTION_SEMANTIC_IGNORE &&                                                 \
+    _LIBCPP_ASSERTION_SEMANTIC != _LIBCPP_ASSERTION_SEMANTIC_OBSERVE &&                                                \
+    _LIBCPP_ASSERTION_SEMANTIC != _LIBCPP_ASSERTION_SEMANTIC_QUICK_ENFORCE &&                                          \
+    _LIBCPP_ASSERTION_SEMANTIC != _LIBCPP_ASSERTION_SEMANTIC_ENFORCE
+#  error _LIBCPP_ASSERTION_SEMANTIC must be set to one of the following values: \
+_LIBCPP_ASSERTION_SEMANTIC_IGNORE, \
+_LIBCPP_ASSERTION_SEMANTIC_OBSERVE, \
+_LIBCPP_ASSERTION_SEMANTIC_QUICK_ENFORCE, \
+_LIBCPP_ASSERTION_SEMANTIC_ENFORCE
+#endif
+
+#endif // _LIBCPP___CONFIGURATION_HARDENING_H
diff --git a/lib/libcxx/include/__configuration/language.h b/lib/libcxx/include/__configuration/language.h
index 6cf5805f2b..6fef4f396b 100644
--- a/lib/libcxx/include/__configuration/language.h
+++ b/lib/libcxx/include/__configuration/language.h
@@ -18,6 +18,9 @@
 
 // NOLINTBEGIN(libcpp-cpp-version-check)
 #ifdef __cplusplus
+#  if __cplusplus < 201103L
+#    define _LIBCPP_CXX03_LANG
+#  endif
 #  if __cplusplus <= 201103L
 #    define _LIBCPP_STD_VER 11
 #  elif __cplusplus <= 201402L
diff --git a/lib/libcxx/include/__configuration/platform.h b/lib/libcxx/include/__configuration/platform.h
index 1a83b0dc27..7492346fa1 100644
--- a/lib/libcxx/include/__configuration/platform.h
+++ b/lib/libcxx/include/__configuration/platform.h
@@ -31,22 +31,15 @@
 #endif
 
 // Need to detect which libc we're using if we're on Linux.
-#if defined(__linux__) || defined(__AMDGPU__) || defined(__NVPTX__)
-#  if __has_include(<features.h>)
-#    include <features.h>
-#    if defined(__GLIBC_PREREQ)
-#      define _LIBCPP_GLIBC_PREREQ(a, b) __GLIBC_PREREQ(a, b)
-#    else
-#      define _LIBCPP_GLIBC_PREREQ(a, b) 0
-#    endif // defined(__GLIBC_PREREQ)
-#  endif
-#endif
-
-// This is required in order for _NEWLIB_VERSION to be defined in places where we use it.
-// TODO: We shouldn't be including arbitrarily-named headers from libc++ since this can break valid
-//       user code. Move code paths that need _NEWLIB_VERSION to another customization mechanism.
-#if __has_include(<picolibc.h>)
-#  include <picolibc.h>
+#if (defined(__linux__) || defined(__AMDGPU__) || defined(__NVPTX__)) && __has_include(<features.h>)
+#  include <features.h>
+#  if defined(__GLIBC_PREREQ)
+#    define _LIBCPP_GLIBC_PREREQ(a, b) __GLIBC_PREREQ(a, b)
+#  else
+#    define _LIBCPP_GLIBC_PREREQ(a, b) 0
+#  endif // defined(__GLIBC_PREREQ)
+#else
+#  define _LIBCPP_GLIBC_PREREQ(a, b) 0
 #endif
 
 #ifndef __BYTE_ORDER__
diff --git a/lib/libcxx/include/__coroutine/coroutine_handle.h b/lib/libcxx/include/__coroutine/coroutine_handle.h
index b7add25851..b26a650748 100644
--- a/lib/libcxx/include/__coroutine/coroutine_handle.h
+++ b/lib/libcxx/include/__coroutine/coroutine_handle.h
@@ -44,9 +44,9 @@ public:
   }
 
   // [coroutine.handle.export.import], export/import
-  _LIBCPP_HIDE_FROM_ABI constexpr void* address() const noexcept { return __handle_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr void* address() const noexcept { return __handle_; }
 
-  _LIBCPP_HIDE_FROM_ABI static constexpr coroutine_handle from_address(void* __addr) noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr coroutine_handle from_address(void* __addr) noexcept {
     coroutine_handle __tmp;
     __tmp.__handle_ = __addr;
     return __tmp;
@@ -55,7 +55,7 @@ public:
   // [coroutine.handle.observers], observers
   _LIBCPP_HIDE_FROM_ABI constexpr explicit operator bool() const noexcept { return __handle_ != nullptr; }
 
-  _LIBCPP_HIDE_FROM_ABI bool done() const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool done() const {
     _LIBCPP_ASSERT_VALID_EXTERNAL_API_CALL(__is_suspended(), "done() can be called only on suspended coroutines");
     return __builtin_coro_done(__handle_);
   }
@@ -100,7 +100,7 @@ public:
 
   _LIBCPP_HIDE_FROM_ABI constexpr coroutine_handle(nullptr_t) noexcept {}
 
-  _LIBCPP_HIDE_FROM_ABI static coroutine_handle from_promise(_Promise& __promise) {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static coroutine_handle from_promise(_Promise& __promise) {
     using _RawPromise = __remove_cv_t<_Promise>;
     coroutine_handle __tmp;
     __tmp.__handle_ =
@@ -114,9 +114,9 @@ public:
   }
 
   // [coroutine.handle.export.import], export/import
-  _LIBCPP_HIDE_FROM_ABI constexpr void* address() const noexcept { return __handle_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr void* address() const noexcept { return __handle_; }
 
-  _LIBCPP_HIDE_FROM_ABI static constexpr coroutine_handle from_address(void* __addr) noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr coroutine_handle from_address(void* __addr) noexcept {
     coroutine_handle __tmp;
     __tmp.__handle_ = __addr;
     return __tmp;
@@ -130,7 +130,7 @@ public:
   // [coroutine.handle.observers], observers
   _LIBCPP_HIDE_FROM_ABI constexpr explicit operator bool() const noexcept { return __handle_ != nullptr; }
 
-  _LIBCPP_HIDE_FROM_ABI bool done() const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool done() const {
     _LIBCPP_ASSERT_VALID_EXTERNAL_API_CALL(__is_suspended(), "done() can be called only on suspended coroutines");
     return __builtin_coro_done(__handle_);
   }
@@ -150,7 +150,7 @@ public:
   }
 
   // [coroutine.handle.promise], promise access
-  _LIBCPP_HIDE_FROM_ABI _Promise& promise() const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _Promise& promise() const {
     return *static_cast<_Promise*>(__builtin_coro_promise(this->__handle_, alignof(_Promise), false));
   }
 
@@ -165,7 +165,7 @@ private:
 // [coroutine.handle.hash]
 template <class _Tp>
 struct hash<coroutine_handle<_Tp>> {
-  _LIBCPP_HIDE_FROM_ABI size_t operator()(const coroutine_handle<_Tp>& __v) const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI size_t operator()(const coroutine_handle<_Tp>& __v) const noexcept {
     return hash<void*>()(__v.address());
   }
 };
diff --git a/lib/libcxx/include/__coroutine/noop_coroutine_handle.h b/lib/libcxx/include/__coroutine/noop_coroutine_handle.h
index 2b2838b6bf..b9c54d3b42 100644
--- a/lib/libcxx/include/__coroutine/noop_coroutine_handle.h
+++ b/lib/libcxx/include/__coroutine/noop_coroutine_handle.h
@@ -20,8 +20,6 @@
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-#  if __has_builtin(__builtin_coro_noop) || defined(_LIBCPP_COMPILER_GCC)
-
 // [coroutine.noop]
 // [coroutine.promise.noop]
 struct noop_coroutine_promise {};
@@ -37,7 +35,7 @@ public:
 
   // [coroutine.handle.noop.observers], observers
   _LIBCPP_HIDE_FROM_ABI constexpr explicit operator bool() const noexcept { return true; }
-  _LIBCPP_HIDE_FROM_ABI constexpr bool done() const noexcept { return false; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool done() const noexcept { return false; }
 
   // [coroutine.handle.noop.resumption], resumption
   _LIBCPP_HIDE_FROM_ABI constexpr void operator()() const noexcept {}
@@ -45,23 +43,23 @@ public:
   _LIBCPP_HIDE_FROM_ABI constexpr void destroy() const noexcept {}
 
   // [coroutine.handle.noop.promise], promise access
-  _LIBCPP_HIDE_FROM_ABI noop_coroutine_promise& promise() const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI noop_coroutine_promise& promise() const noexcept {
     return *static_cast<noop_coroutine_promise*>(
         __builtin_coro_promise(this->__handle_, alignof(noop_coroutine_promise), false));
   }
 
   // [coroutine.handle.noop.address], address
-  _LIBCPP_HIDE_FROM_ABI constexpr void* address() const noexcept { return __handle_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr void* address() const noexcept { return __handle_; }
 
 private:
   _LIBCPP_HIDE_FROM_ABI friend coroutine_handle<noop_coroutine_promise> noop_coroutine() noexcept;
 
-#    if __has_builtin(__builtin_coro_noop)
+#  if __has_builtin(__builtin_coro_noop)
   _LIBCPP_HIDE_FROM_ABI coroutine_handle() noexcept { this->__handle_ = __builtin_coro_noop(); }
 
   void* __handle_ = nullptr;
 
-#    elif defined(_LIBCPP_COMPILER_GCC)
+#  elif defined(_LIBCPP_COMPILER_GCC)
   // GCC doesn't implement __builtin_coro_noop().
   // Construct the coroutine frame manually instead.
   struct __noop_coroutine_frame_ty_ {
@@ -78,19 +76,19 @@ private:
 
   _LIBCPP_HIDE_FROM_ABI coroutine_handle() noexcept = default;
 
-#    endif // __has_builtin(__builtin_coro_noop)
+#  endif // __has_builtin(__builtin_coro_noop)
 };
 
 using noop_coroutine_handle = coroutine_handle<noop_coroutine_promise>;
 
-#    if defined(_LIBCPP_COMPILER_GCC)
+#  if defined(_LIBCPP_COMPILER_GCC)
 inline noop_coroutine_handle::__noop_coroutine_frame_ty_ noop_coroutine_handle::__noop_coroutine_frame_{};
-#    endif
+#  endif
 
 // [coroutine.noop.coroutine]
-inline _LIBCPP_HIDE_FROM_ABI noop_coroutine_handle noop_coroutine() noexcept { return noop_coroutine_handle(); }
-
-#  endif // __has_builtin(__builtin_coro_noop) || defined(_LIBCPP_COMPILER_GCC)
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI noop_coroutine_handle noop_coroutine() noexcept {
+  return noop_coroutine_handle();
+}
 
 _LIBCPP_END_NAMESPACE_STD
 
diff --git a/lib/libcxx/include/__debug_utils/strict_weak_ordering_check.h b/lib/libcxx/include/__debug_utils/strict_weak_ordering_check.h
index 3a9d887284..3724ca95c5 100644
--- a/lib/libcxx/include/__debug_utils/strict_weak_ordering_check.h
+++ b/lib/libcxx/include/__debug_utils/strict_weak_ordering_check.h
@@ -27,7 +27,7 @@ template <class _RandomAccessIterator, class _Comp>
 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void
 __check_strict_weak_ordering_sorted(_RandomAccessIterator __first, _RandomAccessIterator __last, _Comp& __comp) {
 #if _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_DEBUG
-  using __diff_t  = __iter_diff_t<_RandomAccessIterator>;
+  using __diff_t  = __iterator_difference_type<_RandomAccessIterator>;
   using _Comp_ref = __comp_ref_type<_Comp>;
   if (!__libcpp_is_constant_evaluated()) {
     // Check if the range is actually sorted.
diff --git a/lib/libcxx/include/__exception/exception.h b/lib/libcxx/include/__exception/exception.h
index f7dab6e83a..ddc34b0fa8 100644
--- a/lib/libcxx/include/__exception/exception.h
+++ b/lib/libcxx/include/__exception/exception.h
@@ -48,13 +48,15 @@ public:
     __data_._DoFree = true;
   }
 
-  exception(exception const&) _NOEXCEPT {}
+  exception(exception const&) _NOEXCEPT : __data_() {}
 
   exception& operator=(exception const&) _NOEXCEPT { return *this; }
 
   virtual ~exception() _NOEXCEPT {}
 
-  virtual char const* what() const _NOEXCEPT { return __data_._What ? __data_._What : "Unknown exception"; }
+  [[__nodiscard__]] virtual char const* what() const _NOEXCEPT {
+    return __data_._What ? __data_._What : "Unknown exception";
+  }
 
 private:
   __std_exception_data __data_;
@@ -76,7 +78,7 @@ public:
   _LIBCPP_HIDE_FROM_ABI exception& operator=(const exception&) _NOEXCEPT = default;
 
   virtual ~exception() _NOEXCEPT;
-  virtual const char* what() const _NOEXCEPT;
+  [[__nodiscard__]] virtual const char* what() const _NOEXCEPT;
 };
 
 class _LIBCPP_EXPORTED_FROM_ABI bad_exception : public exception {
@@ -85,7 +87,7 @@ public:
   _LIBCPP_HIDE_FROM_ABI bad_exception(const bad_exception&) _NOEXCEPT            = default;
   _LIBCPP_HIDE_FROM_ABI bad_exception& operator=(const bad_exception&) _NOEXCEPT = default;
   ~bad_exception() _NOEXCEPT override;
-  const char* what() const _NOEXCEPT override;
+  [[__nodiscard__]] const char* what() const _NOEXCEPT override;
 };
 #endif // !_LIBCPP_ABI_VCRUNTIME
 
diff --git a/lib/libcxx/include/__exception/exception_ptr.h b/lib/libcxx/include/__exception/exception_ptr.h
index 796fa924be..92ff5c701e 100644
--- a/lib/libcxx/include/__exception/exception_ptr.h
+++ b/lib/libcxx/include/__exception/exception_ptr.h
@@ -11,18 +11,24 @@
 
 #include <__config>
 #include <__cstddef/nullptr_t.h>
+#include <__cstddef/size_t.h>
 #include <__exception/operations.h>
 #include <__memory/addressof.h>
 #include <__memory/construct_at.h>
 #include <__type_traits/decay.h>
 #include <__type_traits/is_pointer.h>
-#include <cstdlib>
+#include <__utility/move.h>
+#include <__utility/swap.h>
+#include <__verbose_abort>
 #include <typeinfo>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
 #endif
 
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
 #ifndef _LIBCPP_ABI_MICROSOFT
 
 #  if _LIBCPP_AVAILABILITY_HAS_INIT_PRIMARY_EXCEPTION
@@ -30,7 +36,7 @@
 namespace __cxxabiv1 {
 
 extern "C" {
-_LIBCPP_OVERRIDABLE_FUNC_VIS void* __cxa_allocate_exception(size_t) throw();
+_LIBCPP_OVERRIDABLE_FUNC_VIS void* __cxa_allocate_exception(std::size_t) throw();
 _LIBCPP_OVERRIDABLE_FUNC_VIS void __cxa_free_exception(void*) throw();
 
 struct __cxa_exception;
@@ -57,6 +63,8 @@ _LIBCPP_BEGIN_UNVERSIONED_NAMESPACE_STD
 
 #ifndef _LIBCPP_ABI_MICROSOFT
 
+inline _LIBCPP_HIDE_FROM_ABI void swap(exception_ptr& __x, exception_ptr& __y) _NOEXCEPT;
+
 class _LIBCPP_EXPORTED_FROM_ABI exception_ptr {
   void* __ptr_;
 
@@ -67,15 +75,21 @@ class _LIBCPP_EXPORTED_FROM_ABI exception_ptr {
 
 public:
   // exception_ptr is basically a COW string so it is trivially relocatable.
-  // It is also replaceable because assignment has normal value semantics.
   using __trivially_relocatable _LIBCPP_NODEBUG = exception_ptr;
-  using __replaceable _LIBCPP_NODEBUG           = exception_ptr;
 
   _LIBCPP_HIDE_FROM_ABI exception_ptr() _NOEXCEPT : __ptr_() {}
   _LIBCPP_HIDE_FROM_ABI exception_ptr(nullptr_t) _NOEXCEPT : __ptr_() {}
 
   exception_ptr(const exception_ptr&) _NOEXCEPT;
+  _LIBCPP_HIDE_FROM_ABI exception_ptr(exception_ptr&& __other) _NOEXCEPT : __ptr_(__other.__ptr_) {
+    __other.__ptr_ = nullptr;
+  }
   exception_ptr& operator=(const exception_ptr&) _NOEXCEPT;
+  _LIBCPP_HIDE_FROM_ABI exception_ptr& operator=(exception_ptr&& __other) _NOEXCEPT {
+    exception_ptr __tmp(std::move(__other));
+    std::swap(__tmp, *this);
+    return *this;
+  }
   ~exception_ptr() _NOEXCEPT;
 
   _LIBCPP_HIDE_FROM_ABI explicit operator bool() const _NOEXCEPT { return __ptr_ != nullptr; }
@@ -88,10 +102,16 @@ public:
     return !(__x == __y);
   }
 
+  friend _LIBCPP_HIDE_FROM_ABI void swap(exception_ptr& __x, exception_ptr& __y) _NOEXCEPT;
+
   friend _LIBCPP_EXPORTED_FROM_ABI exception_ptr current_exception() _NOEXCEPT;
   friend _LIBCPP_EXPORTED_FROM_ABI void rethrow_exception(exception_ptr);
 };
 
+inline _LIBCPP_HIDE_FROM_ABI void swap(exception_ptr& __x, exception_ptr& __y) _NOEXCEPT {
+  std::swap(__x.__ptr_, __y.__ptr_);
+}
+
 #  if _LIBCPP_HAS_EXCEPTIONS
 #    if _LIBCPP_AVAILABILITY_HAS_INIT_PRIMARY_EXCEPTION
 template <class _Ep>
@@ -153,7 +173,7 @@ _LIBCPP_HIDE_FROM_ABI exception_ptr make_exception_ptr(_Ep __e) _NOEXCEPT {
 #  else  // !_LIBCPP_HAS_EXCEPTIONS
 template <class _Ep>
 _LIBCPP_HIDE_FROM_ABI exception_ptr make_exception_ptr(_Ep) _NOEXCEPT {
-  std::abort();
+  _LIBCPP_VERBOSE_ABORT("make_exception_ptr was called in -fno-exceptions mode");
 }
 #  endif // _LIBCPP_HAS_EXCEPTIONS
 
@@ -201,4 +221,6 @@ _LIBCPP_HIDE_FROM_ABI exception_ptr make_exception_ptr(_Ep __e) _NOEXCEPT {
 #endif // _LIBCPP_ABI_MICROSOFT
 _LIBCPP_END_UNVERSIONED_NAMESPACE_STD
 
+_LIBCPP_POP_MACROS
+
 #endif // _LIBCPP___EXCEPTION_EXCEPTION_PTR_H
diff --git a/lib/libcxx/include/__exception/nested_exception.h b/lib/libcxx/include/__exception/nested_exception.h
index 90b14158d5..dd84efbccd 100644
--- a/lib/libcxx/include/__exception/nested_exception.h
+++ b/lib/libcxx/include/__exception/nested_exception.h
@@ -40,7 +40,7 @@ public:
 
   // access functions
   [[__noreturn__]] void rethrow_nested() const;
-  _LIBCPP_HIDE_FROM_ABI exception_ptr nested_ptr() const _NOEXCEPT { return __ptr_; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI exception_ptr nested_ptr() const _NOEXCEPT { return __ptr_; }
 };
 
 template <class _Tp>
@@ -73,7 +73,7 @@ template <class _Tp>
   __throw_with_nested<_Tp,
                       _Up,
                       is_class<_Up>::value && !is_base_of<nested_exception, _Up>::value &&
-                          !__libcpp_is_final<_Up>::value>::__do_throw(std::forward<_Tp>(__t));
+                          !__is_final_v<_Up> >::__do_throw(std::forward<_Tp>(__t));
 #else
   ((void)__t);
   // FIXME: Make this abort
diff --git a/lib/libcxx/include/__exception/operations.h b/lib/libcxx/include/__exception/operations.h
index 29d5c698a9..2b93ad260c 100644
--- a/lib/libcxx/include/__exception/operations.h
+++ b/lib/libcxx/include/__exception/operations.h
@@ -20,22 +20,22 @@ _LIBCPP_BEGIN_UNVERSIONED_NAMESPACE_STD
     defined(_LIBCPP_BUILDING_LIBRARY)
 using unexpected_handler = void (*)();
 _LIBCPP_EXPORTED_FROM_ABI unexpected_handler set_unexpected(unexpected_handler) _NOEXCEPT;
-_LIBCPP_EXPORTED_FROM_ABI unexpected_handler get_unexpected() _NOEXCEPT;
+[[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI unexpected_handler get_unexpected() _NOEXCEPT;
 [[__noreturn__]] _LIBCPP_EXPORTED_FROM_ABI void unexpected();
 #endif
 
 using terminate_handler = void (*)();
 _LIBCPP_EXPORTED_FROM_ABI terminate_handler set_terminate(terminate_handler) _NOEXCEPT;
-_LIBCPP_EXPORTED_FROM_ABI terminate_handler get_terminate() _NOEXCEPT;
+[[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI terminate_handler get_terminate() _NOEXCEPT;
 
 #if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_UNCAUGHT_EXCEPTION)
-_LIBCPP_EXPORTED_FROM_ABI _LIBCPP_DEPRECATED_IN_CXX17 bool uncaught_exception() _NOEXCEPT;
+[[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI _LIBCPP_DEPRECATED_IN_CXX17 bool uncaught_exception() _NOEXCEPT;
 #endif // _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_UNCAUGHT_EXCEPTION)
-_LIBCPP_EXPORTED_FROM_ABI int uncaught_exceptions() _NOEXCEPT;
+[[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI int uncaught_exceptions() _NOEXCEPT;
 
 class _LIBCPP_EXPORTED_FROM_ABI exception_ptr;
 
-_LIBCPP_EXPORTED_FROM_ABI exception_ptr current_exception() _NOEXCEPT;
+[[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI exception_ptr current_exception() _NOEXCEPT;
 [[__noreturn__]] _LIBCPP_EXPORTED_FROM_ABI void rethrow_exception(exception_ptr);
 _LIBCPP_END_UNVERSIONED_NAMESPACE_STD
 
diff --git a/lib/libcxx/include/__expected/bad_expected_access.h b/lib/libcxx/include/__expected/bad_expected_access.h
index 1b734389e8..b1958101d5 100644
--- a/lib/libcxx/include/__expected/bad_expected_access.h
+++ b/lib/libcxx/include/__expected/bad_expected_access.h
@@ -43,9 +43,11 @@ protected:
 
 public:
 #  if _LIBCPP_AVAILABILITY_HAS_BAD_EXPECTED_ACCESS_KEY_FUNCTION
-  const char* what() const noexcept override;
+  [[nodiscard]] const char* what() const noexcept override;
 #  else
-  _LIBCPP_HIDE_FROM_ABI_VIRTUAL const char* what() const noexcept override { return "bad access to std::expected"; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI_VIRTUAL const char* what() const noexcept override {
+    return "bad access to std::expected";
+  }
 #  endif
 };
 _LIBCPP_DIAGNOSTIC_POP
@@ -55,10 +57,10 @@ class bad_expected_access : public bad_expected_access<void> {
 public:
   _LIBCPP_HIDE_FROM_ABI explicit bad_expected_access(_Err __e) : __unex_(std::move(__e)) {}
 
-  _LIBCPP_HIDE_FROM_ABI _Err& error() & noexcept { return __unex_; }
-  _LIBCPP_HIDE_FROM_ABI const _Err& error() const& noexcept { return __unex_; }
-  _LIBCPP_HIDE_FROM_ABI _Err&& error() && noexcept { return std::move(__unex_); }
-  _LIBCPP_HIDE_FROM_ABI const _Err&& error() const&& noexcept { return std::move(__unex_); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _Err& error() & noexcept { return __unex_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI const _Err& error() const& noexcept { return __unex_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _Err&& error() && noexcept { return std::move(__unex_); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI const _Err&& error() const&& noexcept { return std::move(__unex_); }
 
 private:
   _Err __unex_;
diff --git a/lib/libcxx/include/__expected/expected.h b/lib/libcxx/include/__expected/expected.h
index 0f446b8707..24ae33d4e3 100644
--- a/lib/libcxx/include/__expected/expected.h
+++ b/lib/libcxx/include/__expected/expected.h
@@ -30,7 +30,6 @@
 #include <__type_traits/is_nothrow_assignable.h>
 #include <__type_traits/is_nothrow_constructible.h>
 #include <__type_traits/is_reference.h>
-#include <__type_traits/is_replaceable.h>
 #include <__type_traits/is_same.h>
 #include <__type_traits/is_swappable.h>
 #include <__type_traits/is_trivially_constructible.h>
@@ -472,8 +471,6 @@ public:
       __conditional_t<__libcpp_is_trivially_relocatable<_Tp>::value && __libcpp_is_trivially_relocatable<_Err>::value,
                       expected,
                       void>;
-  using __replaceable _LIBCPP_NODEBUG =
-      __conditional_t<__is_replaceable_v<_Tp> && __is_replaceable_v<_Err>, expected, void>;
 
   template <class _Up>
   using rebind = expected<_Up, error_type>;
@@ -555,9 +552,10 @@ public:
           is_nothrow_constructible_v<_Tp, _Up> && is_nothrow_constructible_v<_Err, _OtherErr>) // strengthened
       : __base(__other.__has_val(), std::move(__other.__union())) {}
 
-  template <class _Up = _Tp>
+  template <class _Up = remove_cv_t<_Tp>>
     requires(!is_same_v<remove_cvref_t<_Up>, in_place_t> && !is_same_v<expected, remove_cvref_t<_Up>> &&
-             is_constructible_v<_Tp, _Up> && !__is_std_unexpected<remove_cvref_t<_Up>>::value &&
+             !is_same_v<remove_cvref_t<_Up>, unexpect_t> && is_constructible_v<_Tp, _Up> &&
+             !__is_std_unexpected<remove_cvref_t<_Up>>::value &&
              (!is_same_v<remove_cv_t<_Tp>, bool> || !__is_std_expected<remove_cvref_t<_Up>>::value))
   _LIBCPP_HIDE_FROM_ABI constexpr explicit(!is_convertible_v<_Up, _Tp>)
       expected(_Up&& __u) noexcept(is_nothrow_constructible_v<_Tp, _Up>) // strengthened
@@ -668,7 +666,7 @@ public:
     return *this;
   }
 
-  template <class _Up = _Tp>
+  template <class _Up = remove_cv_t<_Tp>>
   _LIBCPP_HIDE_FROM_ABI constexpr expected& operator=(_Up&& __v)
     requires(!is_same_v<expected, remove_cvref_t<_Up>> && !__is_std_unexpected<remove_cvref_t<_Up>>::value &&
              is_constructible_v<_Tp, _Up> && is_assignable_v<_Tp&, _Up> &&
@@ -800,25 +798,25 @@ public:
     return std::addressof(this->__val());
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr const _Tp& operator*() const& noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const _Tp& operator*() const& noexcept {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(
         this->__has_val(), "expected::operator* requires the expected to contain a value");
     return this->__val();
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr _Tp& operator*() & noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp& operator*() & noexcept {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(
         this->__has_val(), "expected::operator* requires the expected to contain a value");
     return this->__val();
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr const _Tp&& operator*() const&& noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const _Tp&& operator*() const&& noexcept {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(
         this->__has_val(), "expected::operator* requires the expected to contain a value");
     return std::move(this->__val());
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr _Tp&& operator*() && noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp&& operator*() && noexcept {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(
         this->__has_val(), "expected::operator* requires the expected to contain a value");
     return std::move(this->__val());
@@ -826,9 +824,9 @@ public:
 
   _LIBCPP_HIDE_FROM_ABI constexpr explicit operator bool() const noexcept { return this->__has_val(); }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr bool has_value() const noexcept { return this->__has_val(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool has_value() const noexcept { return this->__has_val(); }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr const _Tp& value() const& {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const _Tp& value() const& {
     static_assert(is_copy_constructible_v<_Err>, "error_type has to be copy constructible");
     if (!this->__has_val()) {
       std::__throw_bad_expected_access<_Err>(std::as_const(error()));
@@ -836,7 +834,7 @@ public:
     return this->__val();
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr _Tp& value() & {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp& value() & {
     static_assert(is_copy_constructible_v<_Err>, "error_type has to be copy constructible");
     if (!this->__has_val()) {
       std::__throw_bad_expected_access<_Err>(std::as_const(error()));
@@ -844,7 +842,7 @@ public:
     return this->__val();
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr const _Tp&& value() const&& {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const _Tp&& value() const&& {
     static_assert(is_copy_constructible_v<_Err> && is_constructible_v<_Err, decltype(std::move(error()))>,
                   "error_type has to be both copy constructible and constructible from decltype(std::move(error()))");
     if (!this->__has_val()) {
@@ -853,7 +851,7 @@ public:
     return std::move(this->__val());
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr _Tp&& value() && {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp&& value() && {
     static_assert(is_copy_constructible_v<_Err> && is_constructible_v<_Err, decltype(std::move(error()))>,
                   "error_type has to be both copy constructible and constructible from decltype(std::move(error()))");
     if (!this->__has_val()) {
@@ -862,46 +860,46 @@ public:
     return std::move(this->__val());
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr const _Err& error() const& noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const _Err& error() const& noexcept {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(
         !this->__has_val(), "expected::error requires the expected to contain an error");
     return this->__unex();
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr _Err& error() & noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Err& error() & noexcept {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(
         !this->__has_val(), "expected::error requires the expected to contain an error");
     return this->__unex();
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr const _Err&& error() const&& noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const _Err&& error() const&& noexcept {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(
         !this->__has_val(), "expected::error requires the expected to contain an error");
     return std::move(this->__unex());
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr _Err&& error() && noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Err&& error() && noexcept {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(
         !this->__has_val(), "expected::error requires the expected to contain an error");
     return std::move(this->__unex());
   }
 
-  template <class _Up>
-  _LIBCPP_HIDE_FROM_ABI constexpr _Tp value_or(_Up&& __v) const& {
+  template <class _Up = remove_cv_t<_Tp>>
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp value_or(_Up&& __v) const& {
     static_assert(is_copy_constructible_v<_Tp>, "value_type has to be copy constructible");
     static_assert(is_convertible_v<_Up, _Tp>, "argument has to be convertible to value_type");
     return this->__has_val() ? this->__val() : static_cast<_Tp>(std::forward<_Up>(__v));
   }
 
-  template <class _Up>
-  _LIBCPP_HIDE_FROM_ABI constexpr _Tp value_or(_Up&& __v) && {
+  template <class _Up = remove_cv_t<_Tp>>
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp value_or(_Up&& __v) && {
     static_assert(is_move_constructible_v<_Tp>, "value_type has to be move constructible");
     static_assert(is_convertible_v<_Up, _Tp>, "argument has to be convertible to value_type");
     return this->__has_val() ? std::move(this->__val()) : static_cast<_Tp>(std::forward<_Up>(__v));
   }
 
   template <class _Up = _Err>
-  _LIBCPP_HIDE_FROM_ABI constexpr _Err error_or(_Up&& __error) const& {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Err error_or(_Up&& __error) const& {
     static_assert(is_copy_constructible_v<_Err>, "error_type has to be copy constructible");
     static_assert(is_convertible_v<_Up, _Err>, "argument has to be convertible to error_type");
     if (has_value())
@@ -910,7 +908,7 @@ public:
   }
 
   template <class _Up = _Err>
-  _LIBCPP_HIDE_FROM_ABI constexpr _Err error_or(_Up&& __error) && {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Err error_or(_Up&& __error) && {
     static_assert(is_move_constructible_v<_Err>, "error_type has to be move constructible");
     static_assert(is_convertible_v<_Up, _Err>, "argument has to be convertible to error_type");
     if (has_value())
@@ -921,7 +919,7 @@ public:
   // [expected.void.monadic], monadic
   template <class _Func>
     requires is_constructible_v<_Err, _Err&>
-  _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) & {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) & {
     using _Up = remove_cvref_t<invoke_result_t<_Func, _Tp&>>;
     static_assert(__is_std_expected<_Up>::value, "The result of f(value()) must be a specialization of std::expected");
     static_assert(is_same_v<typename _Up::error_type, _Err>,
@@ -934,7 +932,7 @@ public:
 
   template <class _Func>
     requires is_constructible_v<_Err, const _Err&>
-  _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) const& {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) const& {
     using _Up = remove_cvref_t<invoke_result_t<_Func, const _Tp&>>;
     static_assert(__is_std_expected<_Up>::value, "The result of f(value()) must be a specialization of std::expected");
     static_assert(is_same_v<typename _Up::error_type, _Err>,
@@ -947,7 +945,7 @@ public:
 
   template <class _Func>
     requires is_constructible_v<_Err, _Err&&>
-  _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) && {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) && {
     using _Up = remove_cvref_t<invoke_result_t<_Func, _Tp&&>>;
     static_assert(
         __is_std_expected<_Up>::value, "The result of f(std::move(value())) must be a specialization of std::expected");
@@ -961,7 +959,7 @@ public:
 
   template <class _Func>
     requires is_constructible_v<_Err, const _Err&&>
-  _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) const&& {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) const&& {
     using _Up = remove_cvref_t<invoke_result_t<_Func, const _Tp&&>>;
     static_assert(
         __is_std_expected<_Up>::value, "The result of f(std::move(value())) must be a specialization of std::expected");
@@ -975,7 +973,7 @@ public:
 
   template <class _Func>
     requires is_constructible_v<_Tp, _Tp&>
-  _LIBCPP_HIDE_FROM_ABI constexpr auto or_else(_Func&& __f) & {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto or_else(_Func&& __f) & {
     using _Gp = remove_cvref_t<invoke_result_t<_Func, _Err&>>;
     static_assert(__is_std_expected<_Gp>::value, "The result of f(error()) must be a specialization of std::expected");
     static_assert(is_same_v<typename _Gp::value_type, _Tp>,
@@ -988,7 +986,7 @@ public:
 
   template <class _Func>
     requires is_constructible_v<_Tp, const _Tp&>
-  _LIBCPP_HIDE_FROM_ABI constexpr auto or_else(_Func&& __f) const& {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto or_else(_Func&& __f) const& {
     using _Gp = remove_cvref_t<invoke_result_t<_Func, const _Err&>>;
     static_assert(__is_std_expected<_Gp>::value, "The result of f(error()) must be a specialization of std::expected");
     static_assert(is_same_v<typename _Gp::value_type, _Tp>,
@@ -1001,7 +999,7 @@ public:
 
   template <class _Func>
     requires is_constructible_v<_Tp, _Tp&&>
-  _LIBCPP_HIDE_FROM_ABI constexpr auto or_else(_Func&& __f) && {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto or_else(_Func&& __f) && {
     using _Gp = remove_cvref_t<invoke_result_t<_Func, _Err&&>>;
     static_assert(
         __is_std_expected<_Gp>::value, "The result of f(std::move(error())) must be a specialization of std::expected");
@@ -1015,7 +1013,7 @@ public:
 
   template <class _Func>
     requires is_constructible_v<_Tp, const _Tp&&>
-  _LIBCPP_HIDE_FROM_ABI constexpr auto or_else(_Func&& __f) const&& {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto or_else(_Func&& __f) const&& {
     using _Gp = remove_cvref_t<invoke_result_t<_Func, const _Err&&>>;
     static_assert(
         __is_std_expected<_Gp>::value, "The result of f(std::move(error())) must be a specialization of std::expected");
@@ -1029,7 +1027,7 @@ public:
 
   template <class _Func>
     requires is_constructible_v<_Err, _Err&>
-  _LIBCPP_HIDE_FROM_ABI constexpr auto transform(_Func&& __f) & {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto transform(_Func&& __f) & {
     using _Up = remove_cv_t<invoke_result_t<_Func, _Tp&>>;
     if (!has_value()) {
       return expected<_Up, _Err>(unexpect, error());
@@ -1045,7 +1043,7 @@ public:
 
   template <class _Func>
     requires is_constructible_v<_Err, const _Err&>
-  _LIBCPP_HIDE_FROM_ABI constexpr auto transform(_Func&& __f) const& {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto transform(_Func&& __f) const& {
     using _Up = remove_cv_t<invoke_result_t<_Func, const _Tp&>>;
     if (!has_value()) {
       return expected<_Up, _Err>(unexpect, error());
@@ -1061,7 +1059,7 @@ public:
 
   template <class _Func>
     requires is_constructible_v<_Err, _Err&&>
-  _LIBCPP_HIDE_FROM_ABI constexpr auto transform(_Func&& __f) && {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto transform(_Func&& __f) && {
     using _Up = remove_cv_t<invoke_result_t<_Func, _Tp&&>>;
     if (!has_value()) {
       return expected<_Up, _Err>(unexpect, std::move(error()));
@@ -1077,7 +1075,7 @@ public:
 
   template <class _Func>
     requires is_constructible_v<_Err, const _Err&&>
-  _LIBCPP_HIDE_FROM_ABI constexpr auto transform(_Func&& __f) const&& {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto transform(_Func&& __f) const&& {
     using _Up = remove_cv_t<invoke_result_t<_Func, const _Tp&&>>;
     if (!has_value()) {
       return expected<_Up, _Err>(unexpect, std::move(error()));
@@ -1093,7 +1091,7 @@ public:
 
   template <class _Func>
     requires is_constructible_v<_Tp, _Tp&>
-  _LIBCPP_HIDE_FROM_ABI constexpr auto transform_error(_Func&& __f) & {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto transform_error(_Func&& __f) & {
     using _Gp = remove_cv_t<invoke_result_t<_Func, _Err&>>;
     static_assert(__valid_std_unexpected<_Gp>::value,
                   "The result of f(error()) must be a valid template argument for unexpected");
@@ -1105,7 +1103,7 @@ public:
 
   template <class _Func>
     requires is_constructible_v<_Tp, const _Tp&>
-  _LIBCPP_HIDE_FROM_ABI constexpr auto transform_error(_Func&& __f) const& {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto transform_error(_Func&& __f) const& {
     using _Gp = remove_cv_t<invoke_result_t<_Func, const _Err&>>;
     static_assert(__valid_std_unexpected<_Gp>::value,
                   "The result of f(error()) must be a valid template argument for unexpected");
@@ -1117,7 +1115,7 @@ public:
 
   template <class _Func>
     requires is_constructible_v<_Tp, _Tp&&>
-  _LIBCPP_HIDE_FROM_ABI constexpr auto transform_error(_Func&& __f) && {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto transform_error(_Func&& __f) && {
     using _Gp = remove_cv_t<invoke_result_t<_Func, _Err&&>>;
     static_assert(__valid_std_unexpected<_Gp>::value,
                   "The result of f(std::move(error())) must be a valid template argument for unexpected");
@@ -1130,7 +1128,7 @@ public:
 
   template <class _Func>
     requires is_constructible_v<_Tp, const _Tp&&>
-  _LIBCPP_HIDE_FROM_ABI constexpr auto transform_error(_Func&& __f) const&& {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto transform_error(_Func&& __f) const&& {
     using _Gp = remove_cv_t<invoke_result_t<_Func, const _Err&&>>;
     static_assert(__valid_std_unexpected<_Gp>::value,
                   "The result of f(std::move(error())) must be a valid template argument for unexpected");
@@ -1597,7 +1595,7 @@ public:
   // [expected.void.obs], observers
   _LIBCPP_HIDE_FROM_ABI constexpr explicit operator bool() const noexcept { return this->__has_val(); }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr bool has_value() const noexcept { return this->__has_val(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool has_value() const noexcept { return this->__has_val(); }
 
   _LIBCPP_HIDE_FROM_ABI constexpr void operator*() const noexcept {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(
@@ -1618,32 +1616,32 @@ public:
     }
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr const _Err& error() const& noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const _Err& error() const& noexcept {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(
         !this->__has_val(), "expected::error requires the expected to contain an error");
     return this->__unex();
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr _Err& error() & noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Err& error() & noexcept {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(
         !this->__has_val(), "expected::error requires the expected to contain an error");
     return this->__unex();
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr const _Err&& error() const&& noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const _Err&& error() const&& noexcept {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(
         !this->__has_val(), "expected::error requires the expected to contain an error");
     return std::move(this->__unex());
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr _Err&& error() && noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Err&& error() && noexcept {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(
         !this->__has_val(), "expected::error requires the expected to contain an error");
     return std::move(this->__unex());
   }
 
   template <class _Up = _Err>
-  _LIBCPP_HIDE_FROM_ABI constexpr _Err error_or(_Up&& __error) const& {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Err error_or(_Up&& __error) const& {
     static_assert(is_copy_constructible_v<_Err>, "error_type has to be copy constructible");
     static_assert(is_convertible_v<_Up, _Err>, "argument has to be convertible to error_type");
     if (has_value()) {
@@ -1653,7 +1651,7 @@ public:
   }
 
   template <class _Up = _Err>
-  _LIBCPP_HIDE_FROM_ABI constexpr _Err error_or(_Up&& __error) && {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Err error_or(_Up&& __error) && {
     static_assert(is_move_constructible_v<_Err>, "error_type has to be move constructible");
     static_assert(is_convertible_v<_Up, _Err>, "argument has to be convertible to error_type");
     if (has_value()) {
@@ -1665,7 +1663,7 @@ public:
   // [expected.void.monadic], monadic
   template <class _Func>
     requires is_constructible_v<_Err, _Err&>
-  _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) & {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) & {
     using _Up = remove_cvref_t<invoke_result_t<_Func>>;
     static_assert(__is_std_expected<_Up>::value, "The result of f() must be a specialization of std::expected");
     static_assert(
@@ -1678,7 +1676,7 @@ public:
 
   template <class _Func>
     requires is_constructible_v<_Err, const _Err&>
-  _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) const& {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) const& {
     using _Up = remove_cvref_t<invoke_result_t<_Func>>;
     static_assert(__is_std_expected<_Up>::value, "The result of f() must be a specialization of std::expected");
     static_assert(
@@ -1691,7 +1689,7 @@ public:
 
   template <class _Func>
     requires is_constructible_v<_Err, _Err&&>
-  _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) && {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) && {
     using _Up = remove_cvref_t<invoke_result_t<_Func>>;
     static_assert(__is_std_expected<_Up>::value, "The result of f() must be a specialization of std::expected");
     static_assert(
@@ -1704,7 +1702,7 @@ public:
 
   template <class _Func>
     requires is_constructible_v<_Err, const _Err&&>
-  _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) const&& {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) const&& {
     using _Up = remove_cvref_t<invoke_result_t<_Func>>;
     static_assert(__is_std_expected<_Up>::value, "The result of f() must be a specialization of std::expected");
     static_assert(
@@ -1716,7 +1714,7 @@ public:
   }
 
   template <class _Func>
-  _LIBCPP_HIDE_FROM_ABI constexpr auto or_else(_Func&& __f) & {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto or_else(_Func&& __f) & {
     using _Gp = remove_cvref_t<invoke_result_t<_Func, _Err&>>;
     static_assert(__is_std_expected<_Gp>::value, "The result of f(error()) must be a specialization of std::expected");
     static_assert(is_same_v<typename _Gp::value_type, _Tp>,
@@ -1728,7 +1726,7 @@ public:
   }
 
   template <class _Func>
-  _LIBCPP_HIDE_FROM_ABI constexpr auto or_else(_Func&& __f) const& {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto or_else(_Func&& __f) const& {
     using _Gp = remove_cvref_t<invoke_result_t<_Func, const _Err&>>;
     static_assert(__is_std_expected<_Gp>::value, "The result of f(error()) must be a specialization of std::expected");
     static_assert(is_same_v<typename _Gp::value_type, _Tp>,
@@ -1740,7 +1738,7 @@ public:
   }
 
   template <class _Func>
-  _LIBCPP_HIDE_FROM_ABI constexpr auto or_else(_Func&& __f) && {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto or_else(_Func&& __f) && {
     using _Gp = remove_cvref_t<invoke_result_t<_Func, _Err&&>>;
     static_assert(
         __is_std_expected<_Gp>::value, "The result of f(std::move(error())) must be a specialization of std::expected");
@@ -1753,7 +1751,7 @@ public:
   }
 
   template <class _Func>
-  _LIBCPP_HIDE_FROM_ABI constexpr auto or_else(_Func&& __f) const&& {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto or_else(_Func&& __f) const&& {
     using _Gp = remove_cvref_t<invoke_result_t<_Func, const _Err&&>>;
     static_assert(
         __is_std_expected<_Gp>::value, "The result of f(std::move(error())) must be a specialization of std::expected");
@@ -1767,7 +1765,7 @@ public:
 
   template <class _Func>
     requires is_constructible_v<_Err, _Err&>
-  _LIBCPP_HIDE_FROM_ABI constexpr auto transform(_Func&& __f) & {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto transform(_Func&& __f) & {
     using _Up = remove_cv_t<invoke_result_t<_Func>>;
     if (!has_value()) {
       return expected<_Up, _Err>(unexpect, error());
@@ -1782,7 +1780,7 @@ public:
 
   template <class _Func>
     requires is_constructible_v<_Err, const _Err&>
-  _LIBCPP_HIDE_FROM_ABI constexpr auto transform(_Func&& __f) const& {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto transform(_Func&& __f) const& {
     using _Up = remove_cv_t<invoke_result_t<_Func>>;
     if (!has_value()) {
       return expected<_Up, _Err>(unexpect, error());
@@ -1797,7 +1795,7 @@ public:
 
   template <class _Func>
     requires is_constructible_v<_Err, _Err&&>
-  _LIBCPP_HIDE_FROM_ABI constexpr auto transform(_Func&& __f) && {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto transform(_Func&& __f) && {
     using _Up = remove_cv_t<invoke_result_t<_Func>>;
     if (!has_value()) {
       return expected<_Up, _Err>(unexpect, std::move(error()));
@@ -1812,7 +1810,7 @@ public:
 
   template <class _Func>
     requires is_constructible_v<_Err, const _Err&&>
-  _LIBCPP_HIDE_FROM_ABI constexpr auto transform(_Func&& __f) const&& {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto transform(_Func&& __f) const&& {
     using _Up = remove_cv_t<invoke_result_t<_Func>>;
     if (!has_value()) {
       return expected<_Up, _Err>(unexpect, std::move(error()));
@@ -1826,7 +1824,7 @@ public:
   }
 
   template <class _Func>
-  _LIBCPP_HIDE_FROM_ABI constexpr auto transform_error(_Func&& __f) & {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto transform_error(_Func&& __f) & {
     using _Gp = remove_cv_t<invoke_result_t<_Func, _Err&>>;
     static_assert(__valid_std_unexpected<_Gp>::value,
                   "The result of f(error()) must be a valid template argument for unexpected");
@@ -1837,7 +1835,7 @@ public:
   }
 
   template <class _Func>
-  _LIBCPP_HIDE_FROM_ABI constexpr auto transform_error(_Func&& __f) const& {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto transform_error(_Func&& __f) const& {
     using _Gp = remove_cv_t<invoke_result_t<_Func, const _Err&>>;
     static_assert(__valid_std_unexpected<_Gp>::value,
                   "The result of f(error()) must be a valid template argument for unexpected");
@@ -1848,7 +1846,7 @@ public:
   }
 
   template <class _Func>
-  _LIBCPP_HIDE_FROM_ABI constexpr auto transform_error(_Func&& __f) && {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto transform_error(_Func&& __f) && {
     using _Gp = remove_cv_t<invoke_result_t<_Func, _Err&&>>;
     static_assert(__valid_std_unexpected<_Gp>::value,
                   "The result of f(std::move(error())) must be a valid template argument for unexpected");
@@ -1860,7 +1858,7 @@ public:
   }
 
   template <class _Func>
-  _LIBCPP_HIDE_FROM_ABI constexpr auto transform_error(_Func&& __f) const&& {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto transform_error(_Func&& __f) const&& {
     using _Gp = remove_cv_t<invoke_result_t<_Func, const _Err&&>>;
     static_assert(__valid_std_unexpected<_Gp>::value,
                   "The result of f(std::move(error())) must be a valid template argument for unexpected");
diff --git a/lib/libcxx/include/__expected/unexpected.h b/lib/libcxx/include/__expected/unexpected.h
index 6904889b8c..fc4f52ce14 100644
--- a/lib/libcxx/include/__expected/unexpected.h
+++ b/lib/libcxx/include/__expected/unexpected.h
@@ -89,10 +89,10 @@ public:
   _LIBCPP_HIDE_FROM_ABI constexpr unexpected& operator=(const unexpected&) = default;
   _LIBCPP_HIDE_FROM_ABI constexpr unexpected& operator=(unexpected&&)      = default;
 
-  _LIBCPP_HIDE_FROM_ABI constexpr const _Err& error() const& noexcept { return __unex_; }
-  _LIBCPP_HIDE_FROM_ABI constexpr _Err& error() & noexcept { return __unex_; }
-  _LIBCPP_HIDE_FROM_ABI constexpr const _Err&& error() const&& noexcept { return std::move(__unex_); }
-  _LIBCPP_HIDE_FROM_ABI constexpr _Err&& error() && noexcept { return std::move(__unex_); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const _Err& error() const& noexcept { return __unex_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Err& error() & noexcept { return __unex_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const _Err&& error() const&& noexcept { return std::move(__unex_); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Err&& error() && noexcept { return std::move(__unex_); }
 
   _LIBCPP_HIDE_FROM_ABI constexpr void swap(unexpected& __other) noexcept(is_nothrow_swappable_v<_Err>) {
     static_assert(is_swappable_v<_Err>, "unexpected::swap requires is_swappable_v<E> to be true");
diff --git a/lib/libcxx/include/__filesystem/copy_options.h b/lib/libcxx/include/__filesystem/copy_options.h
index 097eebe611..d9039a6492 100644
--- a/lib/libcxx/include/__filesystem/copy_options.h
+++ b/lib/libcxx/include/__filesystem/copy_options.h
@@ -34,19 +34,19 @@ enum class copy_options : unsigned short {
   __in_recursive_copy = 512,
 };
 
-_LIBCPP_HIDE_FROM_ABI inline constexpr copy_options operator&(copy_options __lhs, copy_options __rhs) {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr copy_options operator&(copy_options __lhs, copy_options __rhs) {
   return static_cast<copy_options>(static_cast<unsigned short>(__lhs) & static_cast<unsigned short>(__rhs));
 }
 
-_LIBCPP_HIDE_FROM_ABI inline constexpr copy_options operator|(copy_options __lhs, copy_options __rhs) {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr copy_options operator|(copy_options __lhs, copy_options __rhs) {
   return static_cast<copy_options>(static_cast<unsigned short>(__lhs) | static_cast<unsigned short>(__rhs));
 }
 
-_LIBCPP_HIDE_FROM_ABI inline constexpr copy_options operator^(copy_options __lhs, copy_options __rhs) {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr copy_options operator^(copy_options __lhs, copy_options __rhs) {
   return static_cast<copy_options>(static_cast<unsigned short>(__lhs) ^ static_cast<unsigned short>(__rhs));
 }
 
-_LIBCPP_HIDE_FROM_ABI inline constexpr copy_options operator~(copy_options __lhs) {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr copy_options operator~(copy_options __lhs) {
   return static_cast<copy_options>(~static_cast<unsigned short>(__lhs));
 }
 
diff --git a/lib/libcxx/include/__filesystem/directory_entry.h b/lib/libcxx/include/__filesystem/directory_entry.h
index 5f236cf264..fab400b439 100644
--- a/lib/libcxx/include/__filesystem/directory_entry.h
+++ b/lib/libcxx/include/__filesystem/directory_entry.h
@@ -40,8 +40,6 @@ _LIBCPP_PUSH_MACROS
 
 _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM
 
-_LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY_PUSH
-
 class directory_entry {
   typedef filesystem::path _Path;
 
@@ -89,80 +87,88 @@ public:
 
   _LIBCPP_HIDE_FROM_ABI void refresh(error_code& __ec) noexcept { __refresh(&__ec); }
 
-  _LIBCPP_HIDE_FROM_ABI _Path const& path() const noexcept { return __p_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _Path const& path() const noexcept { return __p_; }
 
   _LIBCPP_HIDE_FROM_ABI operator const _Path&() const noexcept { return __p_; }
 
-  _LIBCPP_HIDE_FROM_ABI bool exists() const { return filesystem::exists(file_status{__get_ft()}); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool exists() const { return filesystem::exists(file_status{__get_ft()}); }
 
-  _LIBCPP_HIDE_FROM_ABI bool exists(error_code& __ec) const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool exists(error_code& __ec) const noexcept {
     return filesystem::exists(file_status{__get_ft(&__ec)});
   }
 
-  _LIBCPP_HIDE_FROM_ABI bool is_block_file() const { return __get_ft() == file_type::block; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool is_block_file() const { return __get_ft() == file_type::block; }
 
-  _LIBCPP_HIDE_FROM_ABI bool is_block_file(error_code& __ec) const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool is_block_file(error_code& __ec) const noexcept {
     return __get_ft(&__ec) == file_type::block;
   }
 
-  _LIBCPP_HIDE_FROM_ABI bool is_character_file() const { return __get_ft() == file_type::character; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool is_character_file() const { return __get_ft() == file_type::character; }
 
-  _LIBCPP_HIDE_FROM_ABI bool is_character_file(error_code& __ec) const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool is_character_file(error_code& __ec) const noexcept {
     return __get_ft(&__ec) == file_type::character;
   }
 
-  _LIBCPP_HIDE_FROM_ABI bool is_directory() const { return __get_ft() == file_type::directory; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool is_directory() const { return __get_ft() == file_type::directory; }
 
-  _LIBCPP_HIDE_FROM_ABI bool is_directory(error_code& __ec) const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool is_directory(error_code& __ec) const noexcept {
     return __get_ft(&__ec) == file_type::directory;
   }
 
-  _LIBCPP_HIDE_FROM_ABI bool is_fifo() const { return __get_ft() == file_type::fifo; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool is_fifo() const { return __get_ft() == file_type::fifo; }
 
-  _LIBCPP_HIDE_FROM_ABI bool is_fifo(error_code& __ec) const noexcept { return __get_ft(&__ec) == file_type::fifo; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool is_fifo(error_code& __ec) const noexcept {
+    return __get_ft(&__ec) == file_type::fifo;
+  }
 
-  _LIBCPP_HIDE_FROM_ABI bool is_other() const { return filesystem::is_other(file_status{__get_ft()}); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool is_other() const { return filesystem::is_other(file_status{__get_ft()}); }
 
-  _LIBCPP_HIDE_FROM_ABI bool is_other(error_code& __ec) const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool is_other(error_code& __ec) const noexcept {
     return filesystem::is_other(file_status{__get_ft(&__ec)});
   }
 
-  _LIBCPP_HIDE_FROM_ABI bool is_regular_file() const { return __get_ft() == file_type::regular; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool is_regular_file() const { return __get_ft() == file_type::regular; }
 
-  _LIBCPP_HIDE_FROM_ABI bool is_regular_file(error_code& __ec) const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool is_regular_file(error_code& __ec) const noexcept {
     return __get_ft(&__ec) == file_type::regular;
   }
 
-  _LIBCPP_HIDE_FROM_ABI bool is_socket() const { return __get_ft() == file_type::socket; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool is_socket() const { return __get_ft() == file_type::socket; }
 
-  _LIBCPP_HIDE_FROM_ABI bool is_socket(error_code& __ec) const noexcept { return __get_ft(&__ec) == file_type::socket; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool is_socket(error_code& __ec) const noexcept {
+    return __get_ft(&__ec) == file_type::socket;
+  }
 
-  _LIBCPP_HIDE_FROM_ABI bool is_symlink() const { return __get_sym_ft() == file_type::symlink; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool is_symlink() const { return __get_sym_ft() == file_type::symlink; }
 
-  _LIBCPP_HIDE_FROM_ABI bool is_symlink(error_code& __ec) const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool is_symlink(error_code& __ec) const noexcept {
     return __get_sym_ft(&__ec) == file_type::symlink;
   }
-  _LIBCPP_HIDE_FROM_ABI uintmax_t file_size() const { return __get_size(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI uintmax_t file_size() const { return __get_size(); }
 
-  _LIBCPP_HIDE_FROM_ABI uintmax_t file_size(error_code& __ec) const noexcept { return __get_size(&__ec); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI uintmax_t file_size(error_code& __ec) const noexcept { return __get_size(&__ec); }
 
-  _LIBCPP_HIDE_FROM_ABI uintmax_t hard_link_count() const { return __get_nlink(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI uintmax_t hard_link_count() const { return __get_nlink(); }
 
-  _LIBCPP_HIDE_FROM_ABI uintmax_t hard_link_count(error_code& __ec) const noexcept { return __get_nlink(&__ec); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI uintmax_t hard_link_count(error_code& __ec) const noexcept {
+    return __get_nlink(&__ec);
+  }
 
-  _LIBCPP_HIDE_FROM_ABI file_time_type last_write_time() const { return __get_write_time(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI file_time_type last_write_time() const { return __get_write_time(); }
 
-  _LIBCPP_HIDE_FROM_ABI file_time_type last_write_time(error_code& __ec) const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI file_time_type last_write_time(error_code& __ec) const noexcept {
     return __get_write_time(&__ec);
   }
 
-  _LIBCPP_HIDE_FROM_ABI file_status status() const { return __get_status(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI file_status status() const { return __get_status(); }
 
-  _LIBCPP_HIDE_FROM_ABI file_status status(error_code& __ec) const noexcept { return __get_status(&__ec); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI file_status status(error_code& __ec) const noexcept {
+    return __get_status(&__ec);
+  }
 
-  _LIBCPP_HIDE_FROM_ABI file_status symlink_status() const { return __get_symlink_status(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI file_status symlink_status() const { return __get_symlink_status(); }
 
-  _LIBCPP_HIDE_FROM_ABI file_status symlink_status(error_code& __ec) const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI file_status symlink_status(error_code& __ec) const noexcept {
     return __get_symlink_status(&__ec);
   }
 
@@ -459,8 +465,6 @@ private:
   directory_entry __elem_;
 };
 
-_LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY_POP
-
 _LIBCPP_END_NAMESPACE_FILESYSTEM
 
 #endif // _LIBCPP_STD_VER >= 17 && _LIBCPP_HAS_FILESYSTEM
diff --git a/lib/libcxx/include/__filesystem/directory_iterator.h b/lib/libcxx/include/__filesystem/directory_iterator.h
index f5085b39eb..b62129807b 100644
--- a/lib/libcxx/include/__filesystem/directory_iterator.h
+++ b/lib/libcxx/include/__filesystem/directory_iterator.h
@@ -34,8 +34,6 @@ _LIBCPP_PUSH_MACROS
 
 _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM
 
-_LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY_PUSH
-
 class _LIBCPP_HIDDEN __dir_stream;
 class directory_iterator {
 public:
@@ -73,7 +71,7 @@ public:
 
   _LIBCPP_HIDE_FROM_ABI ~directory_iterator() = default;
 
-  _LIBCPP_HIDE_FROM_ABI const directory_entry& operator*() const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI const directory_entry& operator*() const {
     // Note: this check duplicates a check in `__dereference()`.
     _LIBCPP_ASSERT_NON_NULL(__imp_, "The end iterator cannot be dereferenced");
     return __dereference();
@@ -123,23 +121,23 @@ operator!=(const directory_iterator& __lhs, const directory_iterator& __rhs) noe
 }
 
 // enable directory_iterator range-based for statements
-inline _LIBCPP_HIDE_FROM_ABI directory_iterator begin(directory_iterator __iter) noexcept { return __iter; }
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI directory_iterator begin(directory_iterator __iter) noexcept {
+  return __iter;
+}
 
-inline _LIBCPP_HIDE_FROM_ABI directory_iterator end(directory_iterator) noexcept { return directory_iterator(); }
-
-_LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY_POP
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI directory_iterator end(directory_iterator) noexcept {
+  return directory_iterator();
+}
 
 _LIBCPP_END_NAMESPACE_FILESYSTEM
 
 #  if _LIBCPP_STD_VER >= 20
 
 template <>
-_LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY inline constexpr bool
-    std::ranges::enable_borrowed_range<std::filesystem::directory_iterator> = true;
+inline constexpr bool std::ranges::enable_borrowed_range<std::filesystem::directory_iterator> = true;
 
 template <>
-_LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY inline constexpr bool
-    std::ranges::enable_view<std::filesystem::directory_iterator> = true;
+inline constexpr bool std::ranges::enable_view<std::filesystem::directory_iterator> = true;
 
 #  endif // _LIBCPP_STD_VER >= 20
 
diff --git a/lib/libcxx/include/__filesystem/directory_options.h b/lib/libcxx/include/__filesystem/directory_options.h
index d0cd3ebfda..11c7d204ea 100644
--- a/lib/libcxx/include/__filesystem/directory_options.h
+++ b/lib/libcxx/include/__filesystem/directory_options.h
@@ -22,19 +22,22 @@ _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM
 
 enum class directory_options : unsigned char { none = 0, follow_directory_symlink = 1, skip_permission_denied = 2 };
 
-_LIBCPP_HIDE_FROM_ABI inline constexpr directory_options operator&(directory_options __lhs, directory_options __rhs) {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr directory_options
+operator&(directory_options __lhs, directory_options __rhs) {
   return static_cast<directory_options>(static_cast<unsigned char>(__lhs) & static_cast<unsigned char>(__rhs));
 }
 
-_LIBCPP_HIDE_FROM_ABI inline constexpr directory_options operator|(directory_options __lhs, directory_options __rhs) {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr directory_options
+operator|(directory_options __lhs, directory_options __rhs) {
   return static_cast<directory_options>(static_cast<unsigned char>(__lhs) | static_cast<unsigned char>(__rhs));
 }
 
-_LIBCPP_HIDE_FROM_ABI inline constexpr directory_options operator^(directory_options __lhs, directory_options __rhs) {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr directory_options
+operator^(directory_options __lhs, directory_options __rhs) {
   return static_cast<directory_options>(static_cast<unsigned char>(__lhs) ^ static_cast<unsigned char>(__rhs));
 }
 
-_LIBCPP_HIDE_FROM_ABI inline constexpr directory_options operator~(directory_options __lhs) {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr directory_options operator~(directory_options __lhs) {
   return static_cast<directory_options>(~static_cast<unsigned char>(__lhs));
 }
 
diff --git a/lib/libcxx/include/__filesystem/file_status.h b/lib/libcxx/include/__filesystem/file_status.h
index da316c8b02..746cd0f9a6 100644
--- a/lib/libcxx/include/__filesystem/file_status.h
+++ b/lib/libcxx/include/__filesystem/file_status.h
@@ -22,7 +22,7 @@
 
 _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM
 
-class _LIBCPP_EXPORTED_FROM_ABI file_status {
+class file_status {
 public:
   // constructors
   _LIBCPP_HIDE_FROM_ABI file_status() noexcept : file_status(file_type::none) {}
@@ -38,9 +38,9 @@ public:
   _LIBCPP_HIDE_FROM_ABI file_status& operator=(file_status&&) noexcept      = default;
 
   // observers
-  _LIBCPP_HIDE_FROM_ABI file_type type() const noexcept { return __ft_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI file_type type() const noexcept { return __ft_; }
 
-  _LIBCPP_HIDE_FROM_ABI perms permissions() const noexcept { return __prms_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI perms permissions() const noexcept { return __prms_; }
 
   // modifiers
   _LIBCPP_HIDE_FROM_ABI void type(file_type __ft) noexcept { __ft_ = __ft; }
diff --git a/lib/libcxx/include/__filesystem/filesystem_error.h b/lib/libcxx/include/__filesystem/filesystem_error.h
index 73592bba31..6f1daf866a 100644
--- a/lib/libcxx/include/__filesystem/filesystem_error.h
+++ b/lib/libcxx/include/__filesystem/filesystem_error.h
@@ -27,7 +27,7 @@
 
 _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM
 
-class _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY _LIBCPP_EXPORTED_FROM_ABI filesystem_error : public system_error {
+class _LIBCPP_EXPORTED_FROM_ABI filesystem_error : public system_error {
 public:
   _LIBCPP_HIDE_FROM_ABI filesystem_error(const string& __what, error_code __ec)
       : system_error(__ec, __what), __storage_(make_shared<_Storage>(path(), path())) {
@@ -44,15 +44,16 @@ public:
     __create_what(2);
   }
 
-  _LIBCPP_HIDE_FROM_ABI const path& path1() const noexcept { return __storage_->__p1_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI const path& path1() const noexcept { return __storage_->__p1_; }
 
-  _LIBCPP_HIDE_FROM_ABI const path& path2() const noexcept { return __storage_->__p2_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI const path& path2() const noexcept { return __storage_->__p2_; }
 
-  _LIBCPP_HIDE_FROM_ABI filesystem_error(const filesystem_error&) = default;
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI filesystem_error(const filesystem_error&) = default;
   ~filesystem_error() override; // key function
 
-  _LIBCPP_HIDE_FROM_ABI_VIRTUAL
-  const char* what() const noexcept override { return __storage_->__what_.c_str(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI_VIRTUAL const char* what() const noexcept override {
+    return __storage_->__what_.c_str();
+  }
 
   void __create_what(int __num_paths);
 
@@ -69,14 +70,12 @@ private:
 
 #  if _LIBCPP_HAS_EXCEPTIONS
 template <class... _Args>
-[[__noreturn__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY void
-__throw_filesystem_error(_Args&&... __args) {
+[[__noreturn__]] inline _LIBCPP_HIDE_FROM_ABI void __throw_filesystem_error(_Args&&... __args) {
   throw filesystem_error(std::forward<_Args>(__args)...);
 }
 #  else
 template <class... _Args>
-[[__noreturn__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY void
-__throw_filesystem_error(_Args&&...) {
+[[__noreturn__]] inline _LIBCPP_HIDE_FROM_ABI void __throw_filesystem_error(_Args&&...) {
   _LIBCPP_VERBOSE_ABORT("filesystem_error was thrown in -fno-exceptions mode");
 }
 #  endif
diff --git a/lib/libcxx/include/__filesystem/operations.h b/lib/libcxx/include/__filesystem/operations.h
index 29b6c2f798..f536a1a9d4 100644
--- a/lib/libcxx/include/__filesystem/operations.h
+++ b/lib/libcxx/include/__filesystem/operations.h
@@ -31,8 +31,6 @@
 
 _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM
 
-_LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY_PUSH
-
 _LIBCPP_EXPORTED_FROM_ABI path __absolute(const path&, error_code* __ec = nullptr);
 _LIBCPP_EXPORTED_FROM_ABI path __canonical(const path&, error_code* __ec = nullptr);
 _LIBCPP_EXPORTED_FROM_ABI bool
@@ -70,10 +68,14 @@ _LIBCPP_EXPORTED_FROM_ABI bool __fs_is_empty(const path& __p, error_code* __ec =
 _LIBCPP_EXPORTED_FROM_ABI void __permissions(const path&, perms, perm_options, error_code* = nullptr);
 _LIBCPP_EXPORTED_FROM_ABI space_info __space(const path&, error_code* __ec = nullptr);
 
-inline _LIBCPP_HIDE_FROM_ABI path absolute(const path& __p) { return __absolute(__p); }
-inline _LIBCPP_HIDE_FROM_ABI path absolute(const path& __p, error_code& __ec) { return __absolute(__p, &__ec); }
-inline _LIBCPP_HIDE_FROM_ABI path canonical(const path& __p) { return __canonical(__p); }
-inline _LIBCPP_HIDE_FROM_ABI path canonical(const path& __p, error_code& __ec) { return __canonical(__p, &__ec); }
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI path absolute(const path& __p) { return __absolute(__p); }
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI path absolute(const path& __p, error_code& __ec) {
+  return __absolute(__p, &__ec);
+}
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI path canonical(const path& __p) { return __canonical(__p); }
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI path canonical(const path& __p, error_code& __ec) {
+  return __canonical(__p, &__ec);
+}
 inline _LIBCPP_HIDE_FROM_ABI bool copy_file(const path& __from, const path& __to) {
   return __copy_file(__from, __to, copy_options::none);
 }
@@ -137,85 +139,112 @@ inline _LIBCPP_HIDE_FROM_ABI void create_symlink(const path& __target, const pat
 inline _LIBCPP_HIDE_FROM_ABI void create_symlink(const path& __target, const path& __link, error_code& __ec) noexcept {
   return __create_symlink(__target, __link, &__ec);
 }
-inline _LIBCPP_HIDE_FROM_ABI path current_path() { return __current_path(); }
-inline _LIBCPP_HIDE_FROM_ABI path current_path(error_code& __ec) { return __current_path(&__ec); }
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI path current_path() { return __current_path(); }
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI path current_path(error_code& __ec) { return __current_path(&__ec); }
 inline _LIBCPP_HIDE_FROM_ABI void current_path(const path& __p) { __current_path(__p); }
 inline _LIBCPP_HIDE_FROM_ABI void current_path(const path& __p, error_code& __ec) noexcept {
   __current_path(__p, &__ec);
 }
-inline _LIBCPP_HIDE_FROM_ABI bool equivalent(const path& __p1, const path& __p2) { return __equivalent(__p1, __p2); }
-inline _LIBCPP_HIDE_FROM_ABI bool equivalent(const path& __p1, const path& __p2, error_code& __ec) noexcept {
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool equivalent(const path& __p1, const path& __p2) {
+  return __equivalent(__p1, __p2);
+}
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool
+equivalent(const path& __p1, const path& __p2, error_code& __ec) noexcept {
   return __equivalent(__p1, __p2, &__ec);
 }
-inline _LIBCPP_HIDE_FROM_ABI bool status_known(file_status __s) noexcept { return __s.type() != file_type::none; }
-inline _LIBCPP_HIDE_FROM_ABI bool exists(file_status __s) noexcept {
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool status_known(file_status __s) noexcept {
+  return __s.type() != file_type::none;
+}
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool exists(file_status __s) noexcept {
   return status_known(__s) && __s.type() != file_type::not_found;
 }
-inline _LIBCPP_HIDE_FROM_ABI bool exists(const path& __p) { return exists(__status(__p)); }
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool exists(const path& __p) { return exists(__status(__p)); }
 
-inline _LIBCPP_HIDE_FROM_ABI bool exists(const path& __p, error_code& __ec) noexcept {
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool exists(const path& __p, error_code& __ec) noexcept {
   auto __s = __status(__p, &__ec);
   if (status_known(__s))
     __ec.clear();
   return exists(__s);
 }
 
-inline _LIBCPP_HIDE_FROM_ABI uintmax_t file_size(const path& __p) { return __file_size(__p); }
-inline _LIBCPP_HIDE_FROM_ABI uintmax_t file_size(const path& __p, error_code& __ec) noexcept {
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI uintmax_t file_size(const path& __p) { return __file_size(__p); }
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI uintmax_t file_size(const path& __p, error_code& __ec) noexcept {
   return __file_size(__p, &__ec);
 }
-inline _LIBCPP_HIDE_FROM_ABI uintmax_t hard_link_count(const path& __p) { return __hard_link_count(__p); }
-inline _LIBCPP_HIDE_FROM_ABI uintmax_t hard_link_count(const path& __p, error_code& __ec) noexcept {
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI uintmax_t hard_link_count(const path& __p) { return __hard_link_count(__p); }
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI uintmax_t hard_link_count(const path& __p, error_code& __ec) noexcept {
   return __hard_link_count(__p, &__ec);
 }
-inline _LIBCPP_HIDE_FROM_ABI bool is_block_file(file_status __s) noexcept { return __s.type() == file_type::block; }
-inline _LIBCPP_HIDE_FROM_ABI bool is_block_file(const path& __p) { return is_block_file(__status(__p)); }
-inline _LIBCPP_HIDE_FROM_ABI bool is_block_file(const path& __p, error_code& __ec) noexcept {
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_block_file(file_status __s) noexcept {
+  return __s.type() == file_type::block;
+}
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_block_file(const path& __p) { return is_block_file(__status(__p)); }
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_block_file(const path& __p, error_code& __ec) noexcept {
   return is_block_file(__status(__p, &__ec));
 }
-inline _LIBCPP_HIDE_FROM_ABI bool is_character_file(file_status __s) noexcept {
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_character_file(file_status __s) noexcept {
   return __s.type() == file_type::character;
 }
-inline _LIBCPP_HIDE_FROM_ABI bool is_character_file(const path& __p) { return is_character_file(__status(__p)); }
-inline _LIBCPP_HIDE_FROM_ABI bool is_character_file(const path& __p, error_code& __ec) noexcept {
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_character_file(const path& __p) {
+  return is_character_file(__status(__p));
+}
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_character_file(const path& __p, error_code& __ec) noexcept {
   return is_character_file(__status(__p, &__ec));
 }
-inline _LIBCPP_HIDE_FROM_ABI bool is_directory(file_status __s) noexcept { return __s.type() == file_type::directory; }
-inline _LIBCPP_HIDE_FROM_ABI bool is_directory(const path& __p) { return is_directory(__status(__p)); }
-inline _LIBCPP_HIDE_FROM_ABI bool is_directory(const path& __p, error_code& __ec) noexcept {
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_directory(file_status __s) noexcept {
+  return __s.type() == file_type::directory;
+}
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_directory(const path& __p) { return is_directory(__status(__p)); }
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_directory(const path& __p, error_code& __ec) noexcept {
   return is_directory(__status(__p, &__ec));
 }
-inline _LIBCPP_HIDE_FROM_ABI bool is_empty(const path& __p) { return __fs_is_empty(__p); }
-inline _LIBCPP_HIDE_FROM_ABI bool is_empty(const path& __p, error_code& __ec) { return __fs_is_empty(__p, &__ec); }
-inline _LIBCPP_HIDE_FROM_ABI bool is_fifo(file_status __s) noexcept { return __s.type() == file_type::fifo; }
-inline _LIBCPP_HIDE_FROM_ABI bool is_fifo(const path& __p) { return is_fifo(__status(__p)); }
-inline _LIBCPP_HIDE_FROM_ABI bool is_fifo(const path& __p, error_code& __ec) noexcept {
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_empty(const path& __p) { return __fs_is_empty(__p); }
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_empty(const path& __p, error_code& __ec) {
+  return __fs_is_empty(__p, &__ec);
+}
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_fifo(file_status __s) noexcept {
+  return __s.type() == file_type::fifo;
+}
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_fifo(const path& __p) { return is_fifo(__status(__p)); }
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_fifo(const path& __p, error_code& __ec) noexcept {
   return is_fifo(__status(__p, &__ec));
 }
-inline _LIBCPP_HIDE_FROM_ABI bool is_regular_file(file_status __s) noexcept { return __s.type() == file_type::regular; }
-inline _LIBCPP_HIDE_FROM_ABI bool is_regular_file(const path& __p) { return is_regular_file(__status(__p)); }
-inline _LIBCPP_HIDE_FROM_ABI bool is_regular_file(const path& __p, error_code& __ec) noexcept {
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_regular_file(file_status __s) noexcept {
+  return __s.type() == file_type::regular;
+}
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_regular_file(const path& __p) {
+  return is_regular_file(__status(__p));
+}
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_regular_file(const path& __p, error_code& __ec) noexcept {
   return is_regular_file(__status(__p, &__ec));
 }
-inline _LIBCPP_HIDE_FROM_ABI bool is_symlink(file_status __s) noexcept { return __s.type() == file_type::symlink; }
-inline _LIBCPP_HIDE_FROM_ABI bool is_symlink(const path& __p) { return is_symlink(__symlink_status(__p)); }
-inline _LIBCPP_HIDE_FROM_ABI bool is_symlink(const path& __p, error_code& __ec) noexcept {
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_symlink(file_status __s) noexcept {
+  return __s.type() == file_type::symlink;
+}
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_symlink(const path& __p) {
+  return is_symlink(__symlink_status(__p));
+}
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_symlink(const path& __p, error_code& __ec) noexcept {
   return is_symlink(__symlink_status(__p, &__ec));
 }
-inline _LIBCPP_HIDE_FROM_ABI bool is_other(file_status __s) noexcept {
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_other(file_status __s) noexcept {
   return exists(__s) && !is_regular_file(__s) && !is_directory(__s) && !is_symlink(__s);
 }
-inline _LIBCPP_HIDE_FROM_ABI bool is_other(const path& __p) { return is_other(__status(__p)); }
-inline _LIBCPP_HIDE_FROM_ABI bool is_other(const path& __p, error_code& __ec) noexcept {
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_other(const path& __p) { return is_other(__status(__p)); }
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_other(const path& __p, error_code& __ec) noexcept {
   return is_other(__status(__p, &__ec));
 }
-inline _LIBCPP_HIDE_FROM_ABI bool is_socket(file_status __s) noexcept { return __s.type() == file_type::socket; }
-inline _LIBCPP_HIDE_FROM_ABI bool is_socket(const path& __p) { return is_socket(__status(__p)); }
-inline _LIBCPP_HIDE_FROM_ABI bool is_socket(const path& __p, error_code& __ec) noexcept {
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_socket(file_status __s) noexcept {
+  return __s.type() == file_type::socket;
+}
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_socket(const path& __p) { return is_socket(__status(__p)); }
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool is_socket(const path& __p, error_code& __ec) noexcept {
   return is_socket(__status(__p, &__ec));
 }
-inline _LIBCPP_HIDE_FROM_ABI file_time_type last_write_time(const path& __p) { return __last_write_time(__p); }
-inline _LIBCPP_HIDE_FROM_ABI file_time_type last_write_time(const path& __p, error_code& __ec) noexcept {
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI file_time_type last_write_time(const path& __p) {
+  return __last_write_time(__p);
+}
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI file_time_type last_write_time(const path& __p, error_code& __ec) noexcept {
   return __last_write_time(__p, &__ec);
 }
 inline _LIBCPP_HIDE_FROM_ABI void last_write_time(const path& __p, file_time_type __t) { __last_write_time(__p, __t); }
@@ -233,7 +262,7 @@ inline _LIBCPP_HIDE_FROM_ABI void permissions(const path& __p, perms __prms, per
   __permissions(__p, __prms, __opts, &__ec);
 }
 
-inline _LIBCPP_HIDE_FROM_ABI path proximate(const path& __p, const path& __base, error_code& __ec) {
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI path proximate(const path& __p, const path& __base, error_code& __ec) {
   path __tmp = __weakly_canonical(__p, &__ec);
   if (__ec)
     return {};
@@ -243,16 +272,18 @@ inline _LIBCPP_HIDE_FROM_ABI path proximate(const path& __p, const path& __base,
   return __tmp.lexically_proximate(__tmp_base);
 }
 
-inline _LIBCPP_HIDE_FROM_ABI path proximate(const path& __p, error_code& __ec) {
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI path proximate(const path& __p, error_code& __ec) {
   return proximate(__p, current_path(), __ec);
 }
-inline _LIBCPP_HIDE_FROM_ABI path proximate(const path& __p, const path& __base = current_path()) {
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI path proximate(const path& __p, const path& __base = current_path()) {
   return __weakly_canonical(__p).lexically_proximate(__weakly_canonical(__base));
 }
-inline _LIBCPP_HIDE_FROM_ABI path read_symlink(const path& __p) { return __read_symlink(__p); }
-inline _LIBCPP_HIDE_FROM_ABI path read_symlink(const path& __p, error_code& __ec) { return __read_symlink(__p, &__ec); }
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI path read_symlink(const path& __p) { return __read_symlink(__p); }
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI path read_symlink(const path& __p, error_code& __ec) {
+  return __read_symlink(__p, &__ec);
+}
 
-inline _LIBCPP_HIDE_FROM_ABI path relative(const path& __p, const path& __base, error_code& __ec) {
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI path relative(const path& __p, const path& __base, error_code& __ec) {
   path __tmp = __weakly_canonical(__p, &__ec);
   if (__ec)
     return path();
@@ -262,10 +293,10 @@ inline _LIBCPP_HIDE_FROM_ABI path relative(const path& __p, const path& __base,
   return __tmp.lexically_relative(__tmpbase);
 }
 
-inline _LIBCPP_HIDE_FROM_ABI path relative(const path& __p, error_code& __ec) {
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI path relative(const path& __p, error_code& __ec) {
   return relative(__p, current_path(), __ec);
 }
-inline _LIBCPP_HIDE_FROM_ABI path relative(const path& __p, const path& __base = current_path()) {
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI path relative(const path& __p, const path& __base = current_path()) {
   return __weakly_canonical(__p).lexically_relative(__weakly_canonical(__base));
 }
 inline _LIBCPP_HIDE_FROM_ABI uintmax_t remove_all(const path& __p) { return __remove_all(__p); }
@@ -282,27 +313,27 @@ inline _LIBCPP_HIDE_FROM_ABI void resize_file(const path& __p, uintmax_t __ns) {
 inline _LIBCPP_HIDE_FROM_ABI void resize_file(const path& __p, uintmax_t __ns, error_code& __ec) noexcept {
   return __resize_file(__p, __ns, &__ec);
 }
-inline _LIBCPP_HIDE_FROM_ABI space_info space(const path& __p) { return __space(__p); }
-inline _LIBCPP_HIDE_FROM_ABI space_info space(const path& __p, error_code& __ec) noexcept {
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI space_info space(const path& __p) { return __space(__p); }
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI space_info space(const path& __p, error_code& __ec) noexcept {
   return __space(__p, &__ec);
 }
-inline _LIBCPP_HIDE_FROM_ABI file_status status(const path& __p) { return __status(__p); }
-inline _LIBCPP_HIDE_FROM_ABI file_status status(const path& __p, error_code& __ec) noexcept {
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI file_status status(const path& __p) { return __status(__p); }
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI file_status status(const path& __p, error_code& __ec) noexcept {
   return __status(__p, &__ec);
 }
-inline _LIBCPP_HIDE_FROM_ABI file_status symlink_status(const path& __p) { return __symlink_status(__p); }
-inline _LIBCPP_HIDE_FROM_ABI file_status symlink_status(const path& __p, error_code& __ec) noexcept {
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI file_status symlink_status(const path& __p) { return __symlink_status(__p); }
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI file_status symlink_status(const path& __p, error_code& __ec) noexcept {
   return __symlink_status(__p, &__ec);
 }
-inline _LIBCPP_HIDE_FROM_ABI path temp_directory_path() { return __temp_directory_path(); }
-inline _LIBCPP_HIDE_FROM_ABI path temp_directory_path(error_code& __ec) { return __temp_directory_path(&__ec); }
-inline _LIBCPP_HIDE_FROM_ABI path weakly_canonical(path const& __p) { return __weakly_canonical(__p); }
-inline _LIBCPP_HIDE_FROM_ABI path weakly_canonical(path const& __p, error_code& __ec) {
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI path temp_directory_path() { return __temp_directory_path(); }
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI path temp_directory_path(error_code& __ec) {
+  return __temp_directory_path(&__ec);
+}
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI path weakly_canonical(path const& __p) { return __weakly_canonical(__p); }
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI path weakly_canonical(path const& __p, error_code& __ec) {
   return __weakly_canonical(__p, &__ec);
 }
 
-_LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY_POP
-
 _LIBCPP_END_NAMESPACE_FILESYSTEM
 
 #endif // _LIBCPP_STD_VER >= 17 && _LIBCPP_HAS_FILESYSTEM
diff --git a/lib/libcxx/include/__filesystem/path.h b/lib/libcxx/include/__filesystem/path.h
index 381e5678a5..4fd3acad4d 100644
--- a/lib/libcxx/include/__filesystem/path.h
+++ b/lib/libcxx/include/__filesystem/path.h
@@ -42,8 +42,6 @@ _LIBCPP_PUSH_MACROS
 
 _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM
 
-_LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY_PUSH
-
 template <class _Tp>
 struct __can_convert_char {
   static const bool value = false;
@@ -326,6 +324,7 @@ struct _PathCVT<char> {
   }
 };
 
+#    if _LIBCPP_HAS_LOCALIZATION
 template <class _ECharT>
 struct _PathExport {
   typedef __narrow_to_utf8<sizeof(wchar_t) * __CHAR_BIT__> _Narrower;
@@ -366,7 +365,7 @@ struct _PathExport<char16_t> {
   }
 };
 
-#    if _LIBCPP_HAS_CHAR8_T
+#      if _LIBCPP_HAS_CHAR8_T
 template <>
 struct _PathExport<char8_t> {
   typedef __narrow_to_utf8<sizeof(wchar_t) * __CHAR_BIT__> _Narrower;
@@ -376,8 +375,9 @@ struct _PathExport<char8_t> {
     _Narrower()(back_inserter(__dest), __src.data(), __src.data() + __src.size());
   }
 };
-#    endif // _LIBCPP_HAS_CHAR8_T
-#  endif   /* _LIBCPP_WIN32API */
+#      endif // _LIBCPP_HAS_CHAR8_T
+#    endif   // _LIBCPP_HAS_LOCALIZATION
+#  endif     // _LIBCPP_WIN32API
 
 class _LIBCPP_EXPORTED_FROM_ABI path {
   template <class _SourceOrIter, class _Tp = path&>
@@ -667,16 +667,16 @@ public:
   _LIBCPP_HIDE_FROM_ABI void __reserve(size_t __s) { __pn_.reserve(__s); }
 
   // native format observers
-  _LIBCPP_HIDE_FROM_ABI const string_type& native() const noexcept { return __pn_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI const string_type& native() const noexcept { return __pn_; }
 
-  _LIBCPP_HIDE_FROM_ABI const value_type* c_str() const noexcept { return __pn_.c_str(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI const value_type* c_str() const noexcept { return __pn_.c_str(); }
 
   _LIBCPP_HIDE_FROM_ABI operator string_type() const { return __pn_; }
 
 #  if defined(_LIBCPP_WIN32API)
-  _LIBCPP_HIDE_FROM_ABI std::wstring wstring() const { return __pn_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI std::wstring wstring() const { return __pn_; }
 
-  _LIBCPP_HIDE_FROM_ABI std::wstring generic_wstring() const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI std::wstring generic_wstring() const {
     std::wstring __s;
     __s.resize(__pn_.size());
     std::replace_copy(__pn_.begin(), __pn_.end(), __s.begin(), '\\', '/');
@@ -685,6 +685,7 @@ public:
 
 #    if _LIBCPP_HAS_LOCALIZATION
   template <class _ECharT, class _Traits = char_traits<_ECharT>, class _Allocator = allocator<_ECharT> >
+  [[nodiscard]]
   _LIBCPP_HIDE_FROM_ABI basic_string<_ECharT, _Traits, _Allocator> string(const _Allocator& __a = _Allocator()) const {
     using _Str = basic_string<_ECharT, _Traits, _Allocator>;
     _Str __s(__a);
@@ -693,8 +694,8 @@ public:
     return __s;
   }
 
-  _LIBCPP_HIDE_FROM_ABI std::string string() const { return string<char>(); }
-  _LIBCPP_HIDE_FROM_ABI __u8_string u8string() const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI std::string string() const { return string<char>(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI __u8_string u8string() const {
     using _CVT = __narrow_to_utf8<sizeof(wchar_t) * __CHAR_BIT__>;
     __u8_string __s;
     __s.reserve(__pn_.size());
@@ -702,12 +703,12 @@ public:
     return __s;
   }
 
-  _LIBCPP_HIDE_FROM_ABI std::u16string u16string() const { return string<char16_t>(); }
-  _LIBCPP_HIDE_FROM_ABI std::u32string u32string() const { return string<char32_t>(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI std::u16string u16string() const { return string<char16_t>(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI std::u32string u32string() const { return string<char32_t>(); }
 
   // generic format observers
   template <class _ECharT, class _Traits = char_traits<_ECharT>, class _Allocator = allocator<_ECharT> >
-  _LIBCPP_HIDE_FROM_ABI basic_string<_ECharT, _Traits, _Allocator>
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI basic_string<_ECharT, _Traits, _Allocator>
   generic_string(const _Allocator& __a = _Allocator()) const {
     using _Str = basic_string<_ECharT, _Traits, _Allocator>;
     _Str __s   = string<_ECharT, _Traits, _Allocator>(__a);
@@ -718,10 +719,10 @@ public:
     return __s;
   }
 
-  _LIBCPP_HIDE_FROM_ABI std::string generic_string() const { return generic_string<char>(); }
-  _LIBCPP_HIDE_FROM_ABI std::u16string generic_u16string() const { return generic_string<char16_t>(); }
-  _LIBCPP_HIDE_FROM_ABI std::u32string generic_u32string() const { return generic_string<char32_t>(); }
-  _LIBCPP_HIDE_FROM_ABI __u8_string generic_u8string() const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI std::string generic_string() const { return generic_string<char>(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI std::u16string generic_u16string() const { return generic_string<char16_t>(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI std::u32string generic_u32string() const { return generic_string<char32_t>(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI __u8_string generic_u8string() const {
     __u8_string __s = u8string();
     std::replace(__s.begin(), __s.end(), '\\', '/');
     return __s;
@@ -729,15 +730,18 @@ public:
 #    endif // _LIBCPP_HAS_LOCALIZATION
 #  else    /* _LIBCPP_WIN32API */
 
-  _LIBCPP_HIDE_FROM_ABI std::string string() const { return __pn_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI std::string string() const { return __pn_; }
 #    if _LIBCPP_HAS_CHAR8_T
-  _LIBCPP_HIDE_FROM_ABI std::u8string u8string() const { return std::u8string(__pn_.begin(), __pn_.end()); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI std::u8string u8string() const {
+    return std::u8string(__pn_.begin(), __pn_.end());
+  }
 #    else
-  _LIBCPP_HIDE_FROM_ABI std::string u8string() const { return __pn_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI std::string u8string() const { return __pn_; }
 #    endif
 
 #    if _LIBCPP_HAS_LOCALIZATION
   template <class _ECharT, class _Traits = char_traits<_ECharT>, class _Allocator = allocator<_ECharT> >
+  [[nodiscard]]
   _LIBCPP_HIDE_FROM_ABI basic_string<_ECharT, _Traits, _Allocator> string(const _Allocator& __a = _Allocator()) const {
     using _CVT = __widen_from_utf8<sizeof(_ECharT) * __CHAR_BIT__>;
     using _Str = basic_string<_ECharT, _Traits, _Allocator>;
@@ -748,32 +752,34 @@ public:
   }
 
 #      if _LIBCPP_HAS_WIDE_CHARACTERS
-  _LIBCPP_HIDE_FROM_ABI std::wstring wstring() const { return string<wchar_t>(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI std::wstring wstring() const { return string<wchar_t>(); }
 #      endif
-  _LIBCPP_HIDE_FROM_ABI std::u16string u16string() const { return string<char16_t>(); }
-  _LIBCPP_HIDE_FROM_ABI std::u32string u32string() const { return string<char32_t>(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI std::u16string u16string() const { return string<char16_t>(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI std::u32string u32string() const { return string<char32_t>(); }
 #    endif // _LIBCPP_HAS_LOCALIZATION
 
   // generic format observers
-  _LIBCPP_HIDE_FROM_ABI std::string generic_string() const { return __pn_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI std::string generic_string() const { return __pn_; }
 #    if _LIBCPP_HAS_CHAR8_T
-  _LIBCPP_HIDE_FROM_ABI std::u8string generic_u8string() const { return std::u8string(__pn_.begin(), __pn_.end()); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI std::u8string generic_u8string() const {
+    return std::u8string(__pn_.begin(), __pn_.end());
+  }
 #    else
-  _LIBCPP_HIDE_FROM_ABI std::string generic_u8string() const { return __pn_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI std::string generic_u8string() const { return __pn_; }
 #    endif
 
 #    if _LIBCPP_HAS_LOCALIZATION
   template <class _ECharT, class _Traits = char_traits<_ECharT>, class _Allocator = allocator<_ECharT> >
-  _LIBCPP_HIDE_FROM_ABI basic_string<_ECharT, _Traits, _Allocator>
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI basic_string<_ECharT, _Traits, _Allocator>
   generic_string(const _Allocator& __a = _Allocator()) const {
     return string<_ECharT, _Traits, _Allocator>(__a);
   }
 
 #      if _LIBCPP_HAS_WIDE_CHARACTERS
-  _LIBCPP_HIDE_FROM_ABI std::wstring generic_wstring() const { return string<wchar_t>(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI std::wstring generic_wstring() const { return string<wchar_t>(); }
 #      endif
-  _LIBCPP_HIDE_FROM_ABI std::u16string generic_u16string() const { return string<char16_t>(); }
-  _LIBCPP_HIDE_FROM_ABI std::u32string generic_u32string() const { return string<char32_t>(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI std::u16string generic_u16string() const { return string<char16_t>(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI std::u32string generic_u32string() const { return string<char32_t>(); }
 #    endif // _LIBCPP_HAS_LOCALIZATION
 #  endif   /* !_LIBCPP_WIN32API */
 
@@ -790,40 +796,40 @@ private:
 
 public:
   // compare
-  _LIBCPP_HIDE_FROM_ABI int compare(const path& __p) const noexcept { return __compare(__p.__pn_); }
-  _LIBCPP_HIDE_FROM_ABI int compare(const string_type& __s) const { return __compare(__s); }
-  _LIBCPP_HIDE_FROM_ABI int compare(__string_view __s) const { return __compare(__s); }
-  _LIBCPP_HIDE_FROM_ABI int compare(const value_type* __s) const { return __compare(__s); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI int compare(const path& __p) const noexcept { return __compare(__p.__pn_); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI int compare(const string_type& __s) const { return __compare(__s); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI int compare(__string_view __s) const { return __compare(__s); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI int compare(const value_type* __s) const { return __compare(__s); }
 
   // decomposition
-  _LIBCPP_HIDE_FROM_ABI path root_name() const { return string_type(__root_name()); }
-  _LIBCPP_HIDE_FROM_ABI path root_directory() const { return string_type(__root_directory()); }
-  _LIBCPP_HIDE_FROM_ABI path root_path() const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI path root_name() const { return string_type(__root_name()); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI path root_directory() const { return string_type(__root_directory()); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI path root_path() const {
 #  if defined(_LIBCPP_WIN32API)
     return string_type(__root_path_raw());
 #  else
     return root_name().append(string_type(__root_directory()));
 #  endif
   }
-  _LIBCPP_HIDE_FROM_ABI path relative_path() const { return string_type(__relative_path()); }
-  _LIBCPP_HIDE_FROM_ABI path parent_path() const { return string_type(__parent_path()); }
-  _LIBCPP_HIDE_FROM_ABI path filename() const { return string_type(__filename()); }
-  _LIBCPP_HIDE_FROM_ABI path stem() const { return string_type(__stem()); }
-  _LIBCPP_HIDE_FROM_ABI path extension() const { return string_type(__extension()); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI path relative_path() const { return string_type(__relative_path()); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI path parent_path() const { return string_type(__parent_path()); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI path filename() const { return string_type(__filename()); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI path stem() const { return string_type(__stem()); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI path extension() const { return string_type(__extension()); }
 
   // query
   [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool empty() const noexcept { return __pn_.empty(); }
 
-  _LIBCPP_HIDE_FROM_ABI bool has_root_name() const { return !__root_name().empty(); }
-  _LIBCPP_HIDE_FROM_ABI bool has_root_directory() const { return !__root_directory().empty(); }
-  _LIBCPP_HIDE_FROM_ABI bool has_root_path() const { return !__root_path_raw().empty(); }
-  _LIBCPP_HIDE_FROM_ABI bool has_relative_path() const { return !__relative_path().empty(); }
-  _LIBCPP_HIDE_FROM_ABI bool has_parent_path() const { return !__parent_path().empty(); }
-  _LIBCPP_HIDE_FROM_ABI bool has_filename() const { return !__filename().empty(); }
-  _LIBCPP_HIDE_FROM_ABI bool has_stem() const { return !__stem().empty(); }
-  _LIBCPP_HIDE_FROM_ABI bool has_extension() const { return !__extension().empty(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool has_root_name() const { return !__root_name().empty(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool has_root_directory() const { return !__root_directory().empty(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool has_root_path() const { return !__root_path_raw().empty(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool has_relative_path() const { return !__relative_path().empty(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool has_parent_path() const { return !__parent_path().empty(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool has_filename() const { return !__filename().empty(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool has_stem() const { return !__stem().empty(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool has_extension() const { return !__extension().empty(); }
 
-  _LIBCPP_HIDE_FROM_ABI bool is_absolute() const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool is_absolute() const {
 #  if defined(_LIBCPP_WIN32API)
     __string_view __root_name_str = __root_name();
     __string_view __root_dir      = __root_directory();
@@ -847,13 +853,13 @@ public:
     return has_root_directory();
 #  endif
   }
-  _LIBCPP_HIDE_FROM_ABI bool is_relative() const { return !is_absolute(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool is_relative() const { return !is_absolute(); }
 
   // relative paths
-  path lexically_normal() const;
-  path lexically_relative(const path& __base) const;
+  [[nodiscard]] path lexically_normal() const;
+  [[nodiscard]] path lexically_relative(const path& __base) const;
 
-  _LIBCPP_HIDE_FROM_ABI path lexically_proximate(const path& __base) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI path lexically_proximate(const path& __base) const {
     path __result = this->lexically_relative(__base);
     if (__result.native().empty())
       return *this;
@@ -861,11 +867,11 @@ public:
   }
 
   // iterators
-  class _LIBCPP_EXPORTED_FROM_ABI iterator;
+  class iterator;
   typedef iterator const_iterator;
 
-  iterator begin() const;
-  iterator end() const;
+  [[nodiscard]] iterator begin() const;
+  [[nodiscard]] iterator end() const;
 
 #  if _LIBCPP_HAS_LOCALIZATION
   template <
@@ -908,17 +914,15 @@ private:
 
 inline _LIBCPP_HIDE_FROM_ABI void swap(path& __lhs, path& __rhs) noexcept { __lhs.swap(__rhs); }
 
-_LIBCPP_EXPORTED_FROM_ABI size_t hash_value(const path& __p) noexcept;
-
-_LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY_POP
+[[nodiscard]] _LIBCPP_EXPORTED_FROM_ABI size_t hash_value(const path& __p) noexcept;
 
 _LIBCPP_END_NAMESPACE_FILESYSTEM
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
 template <>
-struct _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY hash<filesystem::path> : __unary_function<filesystem::path, size_t> {
-  _LIBCPP_HIDE_FROM_ABI size_t operator()(filesystem::path const& __p) const noexcept {
+struct hash<filesystem::path> : __unary_function<filesystem::path, size_t> {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI size_t operator()(filesystem::path const& __p) const noexcept {
     return filesystem::hash_value(__p);
   }
 };
diff --git a/lib/libcxx/include/__filesystem/path_iterator.h b/lib/libcxx/include/__filesystem/path_iterator.h
index e0f601662d..dd408a76ca 100644
--- a/lib/libcxx/include/__filesystem/path_iterator.h
+++ b/lib/libcxx/include/__filesystem/path_iterator.h
@@ -52,7 +52,7 @@ public:
 
   _LIBCPP_HIDE_FROM_ABI iterator& operator=(const iterator&) = default;
 
-  _LIBCPP_HIDE_FROM_ABI reference operator*() const { return __stashed_elem_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI reference operator*() const { return __stashed_elem_; }
 
   _LIBCPP_HIDE_FROM_ABI pointer operator->() const { return &__stashed_elem_; }
 
@@ -95,12 +95,10 @@ private:
   _ParserState __state_;
 };
 
-_LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY
 inline _LIBCPP_HIDE_FROM_ABI bool operator==(const path::iterator& __lhs, const path::iterator& __rhs) {
   return __lhs.__path_ptr_ == __rhs.__path_ptr_ && __lhs.__entry_.data() == __rhs.__entry_.data();
 }
 
-_LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY
 inline _LIBCPP_HIDE_FROM_ABI bool operator!=(const path::iterator& __lhs, const path::iterator& __rhs) {
   return !(__lhs == __rhs);
 }
diff --git a/lib/libcxx/include/__filesystem/perm_options.h b/lib/libcxx/include/__filesystem/perm_options.h
index 64c16ee60a..a2ab733eb2 100644
--- a/lib/libcxx/include/__filesystem/perm_options.h
+++ b/lib/libcxx/include/__filesystem/perm_options.h
@@ -22,19 +22,19 @@ _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM
 
 enum class perm_options : unsigned char { replace = 1, add = 2, remove = 4, nofollow = 8 };
 
-_LIBCPP_HIDE_FROM_ABI inline constexpr perm_options operator&(perm_options __lhs, perm_options __rhs) {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr perm_options operator&(perm_options __lhs, perm_options __rhs) {
   return static_cast<perm_options>(static_cast<unsigned>(__lhs) & static_cast<unsigned>(__rhs));
 }
 
-_LIBCPP_HIDE_FROM_ABI inline constexpr perm_options operator|(perm_options __lhs, perm_options __rhs) {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr perm_options operator|(perm_options __lhs, perm_options __rhs) {
   return static_cast<perm_options>(static_cast<unsigned>(__lhs) | static_cast<unsigned>(__rhs));
 }
 
-_LIBCPP_HIDE_FROM_ABI inline constexpr perm_options operator^(perm_options __lhs, perm_options __rhs) {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr perm_options operator^(perm_options __lhs, perm_options __rhs) {
   return static_cast<perm_options>(static_cast<unsigned>(__lhs) ^ static_cast<unsigned>(__rhs));
 }
 
-_LIBCPP_HIDE_FROM_ABI inline constexpr perm_options operator~(perm_options __lhs) {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr perm_options operator~(perm_options __lhs) {
   return static_cast<perm_options>(~static_cast<unsigned>(__lhs));
 }
 
diff --git a/lib/libcxx/include/__filesystem/perms.h b/lib/libcxx/include/__filesystem/perms.h
index 458f1e6e53..042f249e12 100644
--- a/lib/libcxx/include/__filesystem/perms.h
+++ b/lib/libcxx/include/__filesystem/perms.h
@@ -51,19 +51,19 @@ enum class perms : unsigned {
   unknown    = 0xFFFF,
 };
 
-_LIBCPP_HIDE_FROM_ABI inline constexpr perms operator&(perms __lhs, perms __rhs) {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr perms operator&(perms __lhs, perms __rhs) {
   return static_cast<perms>(static_cast<unsigned>(__lhs) & static_cast<unsigned>(__rhs));
 }
 
-_LIBCPP_HIDE_FROM_ABI inline constexpr perms operator|(perms __lhs, perms __rhs) {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr perms operator|(perms __lhs, perms __rhs) {
   return static_cast<perms>(static_cast<unsigned>(__lhs) | static_cast<unsigned>(__rhs));
 }
 
-_LIBCPP_HIDE_FROM_ABI inline constexpr perms operator^(perms __lhs, perms __rhs) {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr perms operator^(perms __lhs, perms __rhs) {
   return static_cast<perms>(static_cast<unsigned>(__lhs) ^ static_cast<unsigned>(__rhs));
 }
 
-_LIBCPP_HIDE_FROM_ABI inline constexpr perms operator~(perms __lhs) {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr perms operator~(perms __lhs) {
   return static_cast<perms>(~static_cast<unsigned>(__lhs));
 }
 
diff --git a/lib/libcxx/include/__filesystem/recursive_directory_iterator.h b/lib/libcxx/include/__filesystem/recursive_directory_iterator.h
index ad01a9982b..18165b0031 100644
--- a/lib/libcxx/include/__filesystem/recursive_directory_iterator.h
+++ b/lib/libcxx/include/__filesystem/recursive_directory_iterator.h
@@ -33,8 +33,6 @@ _LIBCPP_PUSH_MACROS
 
 _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM
 
-_LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY_PUSH
-
 class recursive_directory_iterator {
 public:
   using value_type        = directory_entry;
@@ -73,7 +71,7 @@ public:
 
   _LIBCPP_HIDE_FROM_ABI ~recursive_directory_iterator() = default;
 
-  _LIBCPP_HIDE_FROM_ABI const directory_entry& operator*() const { return __dereference(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI const directory_entry& operator*() const { return __dereference(); }
 
   _LIBCPP_HIDE_FROM_ABI const directory_entry* operator->() const { return &__dereference(); }
 
@@ -87,14 +85,14 @@ public:
 
   _LIBCPP_HIDE_FROM_ABI recursive_directory_iterator& increment(error_code& __ec) { return __increment(&__ec); }
 
-  _LIBCPP_EXPORTED_FROM_ABI directory_options options() const;
-  _LIBCPP_EXPORTED_FROM_ABI int depth() const;
+  [[nodiscard]] _LIBCPP_EXPORTED_FROM_ABI directory_options options() const;
+  [[nodiscard]] _LIBCPP_EXPORTED_FROM_ABI int depth() const;
 
   _LIBCPP_HIDE_FROM_ABI void pop() { __pop(); }
 
   _LIBCPP_HIDE_FROM_ABI void pop(error_code& __ec) { __pop(&__ec); }
 
-  _LIBCPP_HIDE_FROM_ABI bool recursion_pending() const { return __rec_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool recursion_pending() const { return __rec_; }
 
   _LIBCPP_HIDE_FROM_ABI void disable_recursion_pending() { __rec_ = false; }
 
@@ -132,27 +130,24 @@ operator!=(const recursive_directory_iterator& __lhs, const recursive_directory_
   return !(__lhs == __rhs);
 }
 // enable recursive_directory_iterator range-based for statements
-inline _LIBCPP_HIDE_FROM_ABI recursive_directory_iterator begin(recursive_directory_iterator __iter) noexcept {
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI recursive_directory_iterator
+begin(recursive_directory_iterator __iter) noexcept {
   return __iter;
 }
 
-inline _LIBCPP_HIDE_FROM_ABI recursive_directory_iterator end(recursive_directory_iterator) noexcept {
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI recursive_directory_iterator end(recursive_directory_iterator) noexcept {
   return recursive_directory_iterator();
 }
 
-_LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY_POP
-
 _LIBCPP_END_NAMESPACE_FILESYSTEM
 
 #  if _LIBCPP_STD_VER >= 20
 
 template <>
-_LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY inline constexpr bool
-    std::ranges::enable_borrowed_range<std::filesystem::recursive_directory_iterator> = true;
+inline constexpr bool std::ranges::enable_borrowed_range<std::filesystem::recursive_directory_iterator> = true;
 
 template <>
-_LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY inline constexpr bool
-    std::ranges::enable_view<std::filesystem::recursive_directory_iterator> = true;
+inline constexpr bool std::ranges::enable_view<std::filesystem::recursive_directory_iterator> = true;
 
 #  endif // _LIBCPP_STD_VER >= 20
 
diff --git a/lib/libcxx/include/__filesystem/space_info.h b/lib/libcxx/include/__filesystem/space_info.h
index 3fa57d3309..28f7ced40d 100644
--- a/lib/libcxx/include/__filesystem/space_info.h
+++ b/lib/libcxx/include/__filesystem/space_info.h
@@ -21,7 +21,7 @@
 
 _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM
 
-struct _LIBCPP_EXPORTED_FROM_ABI space_info {
+struct space_info {
   uintmax_t capacity;
   uintmax_t free;
   uintmax_t available;
diff --git a/lib/libcxx/include/__filesystem/u8path.h b/lib/libcxx/include/__filesystem/u8path.h
index a701425e42..aabd2bbd3c 100644
--- a/lib/libcxx/include/__filesystem/u8path.h
+++ b/lib/libcxx/include/__filesystem/u8path.h
@@ -24,32 +24,32 @@
 
 _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM
 
-_LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY_PUSH
-
+#  if !defined(_LIBCPP_WIN32API) || _LIBCPP_HAS_LOCALIZATION
 template <class _InputIt, __enable_if_t<__is_pathable<_InputIt>::value, int> = 0>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_DEPRECATED_WITH_CHAR8_T path u8path(_InputIt __f, _InputIt __l) {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_DEPRECATED_WITH_CHAR8_T path u8path(_InputIt __f, _InputIt __l) {
   static_assert(
-#  if _LIBCPP_HAS_CHAR8_T
+#    if _LIBCPP_HAS_CHAR8_T
       is_same<typename __is_pathable<_InputIt>::__char_type, char8_t>::value ||
-#  endif
+#    endif
           is_same<typename __is_pathable<_InputIt>::__char_type, char>::value,
       "u8path(Iter, Iter) requires Iter have a value_type of type 'char'"
       " or 'char8_t'");
-#  if defined(_LIBCPP_WIN32API)
+#    if defined(_LIBCPP_WIN32API)
   string __tmp(__f, __l);
   using _CVT = __widen_from_utf8<sizeof(wchar_t) * __CHAR_BIT__>;
   std::wstring __w;
   __w.reserve(__tmp.size());
   _CVT()(back_inserter(__w), __tmp.data(), __tmp.data() + __tmp.size());
   return path(__w);
-#  else
+#    else
   return path(__f, __l);
-#  endif /* !_LIBCPP_WIN32API */
+#    endif // defined(_LIBCPP_WIN32API)
 }
+#  endif // !defined(_LIBCPP_WIN32API) || _LIBCPP_HAS_LOCALIZATION
 
-#  if defined(_LIBCPP_WIN32API)
+#  if defined(_LIBCPP_WIN32API) && _LIBCPP_HAS_LOCALIZATION
 template <class _InputIt, __enable_if_t<__is_pathable<_InputIt>::value, int> = 0>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_DEPRECATED_WITH_CHAR8_T path u8path(_InputIt __f, _NullSentinel) {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_DEPRECATED_WITH_CHAR8_T path u8path(_InputIt __f, _NullSentinel) {
   static_assert(
 #    if _LIBCPP_HAS_CHAR8_T
       is_same<typename __is_pathable<_InputIt>::__char_type, char8_t>::value ||
@@ -67,10 +67,10 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_DEPRECATED_WITH_CHAR8_T path u8path(_InputIt __f,
   _CVT()(back_inserter(__w), __tmp.data(), __tmp.data() + __tmp.size());
   return path(__w);
 }
-#  endif /* _LIBCPP_WIN32API */
+#  endif // defined(_LIBCPP_WIN32API) && _LIBCPP_HAS_LOCALIZATION
 
 template <class _Source, __enable_if_t<__is_pathable<_Source>::value, int> = 0>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_DEPRECATED_WITH_CHAR8_T path u8path(const _Source& __s) {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_DEPRECATED_WITH_CHAR8_T path u8path(const _Source& __s) {
   static_assert(
 #  if _LIBCPP_HAS_CHAR8_T
       is_same<typename __is_pathable<_Source>::__char_type, char8_t>::value ||
@@ -86,8 +86,6 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_DEPRECATED_WITH_CHAR8_T path u8path(const _Source&
 #  endif
 }
 
-_LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY_POP
-
 _LIBCPP_END_NAMESPACE_FILESYSTEM
 
 #endif // _LIBCPP_STD_VER >= 17
diff --git a/lib/libcxx/include/__flat_map/flat_map.h b/lib/libcxx/include/__flat_map/flat_map.h
index bf193f6d3c..50487cada2 100644
--- a/lib/libcxx/include/__flat_map/flat_map.h
+++ b/lib/libcxx/include/__flat_map/flat_map.h
@@ -29,7 +29,6 @@
 #include <__flat_map/key_value_iterator.h>
 #include <__flat_map/sorted_unique.h>
 #include <__flat_map/utils.h>
-#include <__functional/invoke.h>
 #include <__functional/is_transparent.h>
 #include <__functional/operations.h>
 #include <__fwd/memory.h>
@@ -48,7 +47,7 @@
 #include <__ranges/container_compatible_range.h>
 #include <__ranges/drop_view.h>
 #include <__ranges/from_range.h>
-#include <__ranges/ref_view.h>
+#include <__ranges/range_adaptor.h>
 #include <__ranges/size.h>
 #include <__ranges/subrange.h>
 #include <__ranges/zip_view.h>
@@ -410,41 +409,45 @@ public:
   }
 
   // iterators
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator begin() noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator begin() noexcept {
     return iterator(__containers_.keys.begin(), __containers_.values.begin());
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator begin() const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator begin() const noexcept {
     return const_iterator(__containers_.keys.begin(), __containers_.values.begin());
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator end() noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator end() noexcept {
     return iterator(__containers_.keys.end(), __containers_.values.end());
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator end() const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator end() const noexcept {
     return const_iterator(__containers_.keys.end(), __containers_.values.end());
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 reverse_iterator rbegin() noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 reverse_iterator rbegin() noexcept {
     return reverse_iterator(end());
   }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator rbegin() const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator rbegin() const noexcept {
     return const_reverse_iterator(end());
   }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 reverse_iterator rend() noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 reverse_iterator rend() noexcept {
     return reverse_iterator(begin());
   }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator rend() const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator rend() const noexcept {
     return const_reverse_iterator(begin());
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator cbegin() const noexcept { return begin(); }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator cend() const noexcept { return end(); }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator crbegin() const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator cbegin() const noexcept {
+    return begin();
+  }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator cend() const noexcept {
+    return end();
+  }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator crbegin() const noexcept {
     return const_reverse_iterator(end());
   }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator crend() const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator crend() const noexcept {
     return const_reverse_iterator(begin());
   }
 
@@ -453,11 +456,11 @@ public:
     return __containers_.keys.empty();
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type size() const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type size() const noexcept {
     return __containers_.keys.size();
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type max_size() const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type max_size() const noexcept {
     return std::min<size_type>(__containers_.keys.max_size(), __containers_.values.max_size());
   }
 
@@ -481,7 +484,7 @@ public:
     return try_emplace(std::forward<_Kp>(__x)).first->second;
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 mapped_type& at(const key_type& __x) {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 mapped_type& at(const key_type& __x) {
     auto __it = find(__x);
     if (__it == end()) {
       std::__throw_out_of_range("flat_map::at(const key_type&): Key does not exist");
@@ -489,7 +492,7 @@ public:
     return __it->second;
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const mapped_type& at(const key_type& __x) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const mapped_type& at(const key_type& __x) const {
     auto __it = find(__x);
     if (__it == end()) {
       std::__throw_out_of_range("flat_map::at(const key_type&) const: Key does not exist");
@@ -499,7 +502,7 @@ public:
 
   template <class _Kp>
     requires __is_compare_transparent
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 mapped_type& at(const _Kp& __x) {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 mapped_type& at(const _Kp& __x) {
     auto __it = find(__x);
     if (__it == end()) {
       std::__throw_out_of_range("flat_map::at(const K&): Key does not exist");
@@ -509,7 +512,7 @@ public:
 
   template <class _Kp>
     requires __is_compare_transparent
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const mapped_type& at(const _Kp& __x) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const mapped_type& at(const _Kp& __x) const {
     auto __it = find(__x);
     if (__it == end()) {
       std::__throw_out_of_range("flat_map::at(const K&) const: Key does not exist");
@@ -589,6 +592,15 @@ public:
     __append_sort_merge_unique</*WasSorted = */ false>(ranges::begin(__range), ranges::end(__range));
   }
 
+  template <_ContainerCompatibleRange<value_type> _Range>
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void insert_range(sorted_unique_t, _Range&& __range) {
+    if constexpr (ranges::sized_range<_Range>) {
+      __reserve(ranges::size(__range));
+    }
+
+    __append_sort_merge_unique</*WasSorted = */ true>(ranges::begin(__range), ranges::end(__range));
+  }
+
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void insert(initializer_list<value_type> __il) {
     insert(__il.begin(), __il.end());
   }
@@ -597,7 +609,7 @@ public:
     insert(sorted_unique, __il.begin(), __il.end());
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 containers extract() && {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 containers extract() && {
     auto __guard = std::__make_scope_guard([&]() noexcept { clear() /* noexcept */; });
     auto __ret   = std::move(__containers_);
     return __ret;
@@ -738,14 +750,17 @@ public:
     return iterator(std::move(__key_it), std::move(__mapped_it));
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void swap(flat_map& __y) noexcept {
-    // warning: The spec has unconditional noexcept, which means that
-    // if any of the following functions throw an exception,
-    // std::terminate will be called.
-    // This is discussed in P2767, which hasn't been voted on yet.
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void
+  swap(flat_map& __y) noexcept(is_nothrow_swappable_v<key_container_type> &&
+                               is_nothrow_swappable_v<mapped_container_type> && is_nothrow_swappable_v<key_compare>) {
+    auto __on_failure = std::__make_exception_guard([&]() noexcept {
+      clear() /* noexcept */;
+      __y.clear() /* noexcept */;
+    });
     ranges::swap(__compare_, __y.__compare_);
     ranges::swap(__containers_.keys, __y.__containers_.keys);
     ranges::swap(__containers_.values, __y.__containers_.values);
+    __on_failure.__complete();
   }
 
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void clear() noexcept {
@@ -754,116 +769,121 @@ public:
   }
 
   // observers
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 key_compare key_comp() const { return __compare_; }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 value_compare value_comp() const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 key_compare key_comp() const { return __compare_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 value_compare value_comp() const {
     return value_compare(__compare_);
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const key_container_type& keys() const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const key_container_type& keys() const noexcept {
     return __containers_.keys;
   }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const mapped_container_type& values() const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const mapped_container_type&
+  values() const noexcept {
     return __containers_.values;
   }
 
   // map operations
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator find(const key_type& __x) {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator find(const key_type& __x) {
     return __find_impl(*this, __x);
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator find(const key_type& __x) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator find(const key_type& __x) const {
     return __find_impl(*this, __x);
   }
 
   template <class _Kp>
     requires __is_compare_transparent
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator find(const _Kp& __x) {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator find(const _Kp& __x) {
     return __find_impl(*this, __x);
   }
 
   template <class _Kp>
     requires __is_compare_transparent
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator find(const _Kp& __x) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator find(const _Kp& __x) const {
     return __find_impl(*this, __x);
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type count(const key_type& __x) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type count(const key_type& __x) const {
     return contains(__x) ? 1 : 0;
   }
 
   template <class _Kp>
     requires __is_compare_transparent
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type count(const _Kp& __x) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type count(const _Kp& __x) const {
     return contains(__x) ? 1 : 0;
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool contains(const key_type& __x) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool contains(const key_type& __x) const {
     return find(__x) != end();
   }
 
   template <class _Kp>
     requires __is_compare_transparent
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool contains(const _Kp& __x) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool contains(const _Kp& __x) const {
     return find(__x) != end();
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator lower_bound(const key_type& __x) {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator lower_bound(const key_type& __x) {
     return __lower_bound<iterator>(*this, __x);
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator lower_bound(const key_type& __x) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator
+  lower_bound(const key_type& __x) const {
     return __lower_bound<const_iterator>(*this, __x);
   }
 
   template <class _Kp>
     requires __is_compare_transparent
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator lower_bound(const _Kp& __x) {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator lower_bound(const _Kp& __x) {
     return __lower_bound<iterator>(*this, __x);
   }
 
   template <class _Kp>
     requires __is_compare_transparent
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator lower_bound(const _Kp& __x) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator lower_bound(const _Kp& __x) const {
     return __lower_bound<const_iterator>(*this, __x);
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator upper_bound(const key_type& __x) {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator upper_bound(const key_type& __x) {
     return __upper_bound<iterator>(*this, __x);
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator upper_bound(const key_type& __x) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator
+  upper_bound(const key_type& __x) const {
     return __upper_bound<const_iterator>(*this, __x);
   }
 
   template <class _Kp>
     requires __is_compare_transparent
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator upper_bound(const _Kp& __x) {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator upper_bound(const _Kp& __x) {
     return __upper_bound<iterator>(*this, __x);
   }
 
   template <class _Kp>
     requires __is_compare_transparent
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator upper_bound(const _Kp& __x) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator upper_bound(const _Kp& __x) const {
     return __upper_bound<const_iterator>(*this, __x);
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair<iterator, iterator> equal_range(const key_type& __x) {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair<iterator, iterator>
+  equal_range(const key_type& __x) {
     return __equal_range_impl(*this, __x);
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair<const_iterator, const_iterator>
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair<const_iterator, const_iterator>
   equal_range(const key_type& __x) const {
     return __equal_range_impl(*this, __x);
   }
 
   template <class _Kp>
     requires __is_compare_transparent
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair<iterator, iterator> equal_range(const _Kp& __x) {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair<iterator, iterator>
+  equal_range(const _Kp& __x) {
     return __equal_range_impl(*this, __x);
   }
   template <class _Kp>
     requires __is_compare_transparent
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair<const_iterator, const_iterator>
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair<const_iterator, const_iterator>
   equal_range(const _Kp& __x) const {
     return __equal_range_impl(*this, __x);
   }
@@ -878,7 +898,8 @@ public:
         __x.begin(), __x.end(), __y.begin(), __y.end(), std::__synth_three_way);
   }
 
-  friend _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void swap(flat_map& __x, flat_map& __y) noexcept {
+  friend _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void
+  swap(flat_map& __x, flat_map& __y) noexcept(noexcept(__x.swap(__y))) {
     __x.swap(__y);
   }
 
@@ -913,7 +934,7 @@ private:
         __compare_(std::forward<_CompArg>(__comp)...) {}
 
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool __is_sorted_and_unique(auto&& __key_container) const {
-    auto __greater_or_equal_to = [this](const auto& __x, const auto& __y) { return !__compare_(__x, __y); };
+    auto __greater_or_equal_to = [this](const auto& __x, const auto& __y) -> bool { return !__compare_(__x, __y); };
     return ranges::adjacent_find(__key_container, __greater_or_equal_to) == ranges::end(__key_container);
   }
 
@@ -946,7 +967,7 @@ private:
       auto __zv                  = ranges::views::zip(__containers_.keys, __containers_.values);
       auto __append_start_offset = __containers_.keys.size() - __num_of_appended;
       auto __end                 = __zv.end();
-      auto __compare_key         = [this](const auto& __p1, const auto& __p2) {
+      auto __compare_key         = [this](const auto& __p1, const auto& __p2) -> bool {
         return __compare_(std::get<0>(__p1), std::get<0>(__p2));
       };
       if constexpr (!_WasSorted) {
@@ -1125,8 +1146,7 @@ private:
 };
 
 template <class _KeyContainer, class _MappedContainer, class _Compare = less<typename _KeyContainer::value_type>>
-  requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value &&
-           !__is_allocator<_MappedContainer>::value &&
+  requires(!__is_allocator_v<_Compare> && !__is_allocator_v<_KeyContainer> && !__is_allocator_v<_MappedContainer> &&
            is_invocable_v<const _Compare&,
                           const typename _KeyContainer::value_type&,
                           const typename _KeyContainer::value_type&>)
@@ -1139,7 +1159,7 @@ flat_map(_KeyContainer, _MappedContainer, _Compare = _Compare())
 
 template <class _KeyContainer, class _MappedContainer, class _Allocator>
   requires(uses_allocator_v<_KeyContainer, _Allocator> && uses_allocator_v<_MappedContainer, _Allocator> &&
-           !__is_allocator<_KeyContainer>::value && !__is_allocator<_MappedContainer>::value)
+           !__is_allocator_v<_KeyContainer> && !__is_allocator_v<_MappedContainer>)
 flat_map(_KeyContainer, _MappedContainer, _Allocator)
     -> flat_map<typename _KeyContainer::value_type,
                 typename _MappedContainer::value_type,
@@ -1148,9 +1168,8 @@ flat_map(_KeyContainer, _MappedContainer, _Allocator)
                 _MappedContainer>;
 
 template <class _KeyContainer, class _MappedContainer, class _Compare, class _Allocator>
-  requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value &&
-           !__is_allocator<_MappedContainer>::value && uses_allocator_v<_KeyContainer, _Allocator> &&
-           uses_allocator_v<_MappedContainer, _Allocator> &&
+  requires(!__is_allocator_v<_Compare> && !__is_allocator_v<_KeyContainer> && !__is_allocator_v<_MappedContainer> &&
+           uses_allocator_v<_KeyContainer, _Allocator> && uses_allocator_v<_MappedContainer, _Allocator> &&
            is_invocable_v<const _Compare&,
                           const typename _KeyContainer::value_type&,
                           const typename _KeyContainer::value_type&>)
@@ -1162,8 +1181,7 @@ flat_map(_KeyContainer, _MappedContainer, _Compare, _Allocator)
                 _MappedContainer>;
 
 template <class _KeyContainer, class _MappedContainer, class _Compare = less<typename _KeyContainer::value_type>>
-  requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value &&
-           !__is_allocator<_MappedContainer>::value &&
+  requires(!__is_allocator_v<_Compare> && !__is_allocator_v<_KeyContainer> && !__is_allocator_v<_MappedContainer> &&
            is_invocable_v<const _Compare&,
                           const typename _KeyContainer::value_type&,
                           const typename _KeyContainer::value_type&>)
@@ -1176,7 +1194,7 @@ flat_map(sorted_unique_t, _KeyContainer, _MappedContainer, _Compare = _Compare()
 
 template <class _KeyContainer, class _MappedContainer, class _Allocator>
   requires(uses_allocator_v<_KeyContainer, _Allocator> && uses_allocator_v<_MappedContainer, _Allocator> &&
-           !__is_allocator<_KeyContainer>::value && !__is_allocator<_MappedContainer>::value)
+           !__is_allocator_v<_KeyContainer> && !__is_allocator_v<_MappedContainer>)
 flat_map(sorted_unique_t, _KeyContainer, _MappedContainer, _Allocator)
     -> flat_map<typename _KeyContainer::value_type,
                 typename _MappedContainer::value_type,
@@ -1185,9 +1203,8 @@ flat_map(sorted_unique_t, _KeyContainer, _MappedContainer, _Allocator)
                 _MappedContainer>;
 
 template <class _KeyContainer, class _MappedContainer, class _Compare, class _Allocator>
-  requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value &&
-           !__is_allocator<_MappedContainer>::value && uses_allocator_v<_KeyContainer, _Allocator> &&
-           uses_allocator_v<_MappedContainer, _Allocator> &&
+  requires(!__is_allocator_v<_Compare> && !__is_allocator_v<_KeyContainer> && !__is_allocator_v<_MappedContainer> &&
+           uses_allocator_v<_KeyContainer, _Allocator> && uses_allocator_v<_MappedContainer, _Allocator> &&
            is_invocable_v<const _Compare&,
                           const typename _KeyContainer::value_type&,
                           const typename _KeyContainer::value_type&>)
@@ -1199,19 +1216,19 @@ flat_map(sorted_unique_t, _KeyContainer, _MappedContainer, _Compare, _Allocator)
                 _MappedContainer>;
 
 template <class _InputIterator, class _Compare = less<__iter_key_type<_InputIterator>>>
-  requires(__has_input_iterator_category<_InputIterator>::value && !__is_allocator<_Compare>::value)
+  requires(__has_input_iterator_category<_InputIterator>::value && !__is_allocator_v<_Compare>)
 flat_map(_InputIterator, _InputIterator, _Compare = _Compare())
     -> flat_map<__iter_key_type<_InputIterator>, __iter_mapped_type<_InputIterator>, _Compare>;
 
 template <class _InputIterator, class _Compare = less<__iter_key_type<_InputIterator>>>
-  requires(__has_input_iterator_category<_InputIterator>::value && !__is_allocator<_Compare>::value)
+  requires(__has_input_iterator_category<_InputIterator>::value && !__is_allocator_v<_Compare>)
 flat_map(sorted_unique_t, _InputIterator, _InputIterator, _Compare = _Compare())
     -> flat_map<__iter_key_type<_InputIterator>, __iter_mapped_type<_InputIterator>, _Compare>;
 
 template <ranges::input_range _Range,
           class _Compare   = less<__range_key_type<_Range>>,
           class _Allocator = allocator<byte>,
-          class            = __enable_if_t<!__is_allocator<_Compare>::value && __is_allocator<_Allocator>::value>>
+          class            = __enable_if_t<!__is_allocator_v<_Compare> && __is_allocator_v<_Allocator>>>
 flat_map(from_range_t, _Range&&, _Compare = _Compare(), _Allocator = _Allocator()) -> flat_map<
     __range_key_type<_Range>,
     __range_mapped_type<_Range>,
@@ -1219,7 +1236,7 @@ flat_map(from_range_t, _Range&&, _Compare = _Compare(), _Allocator = _Allocator(
     vector<__range_key_type<_Range>, __allocator_traits_rebind_t<_Allocator, __range_key_type<_Range>>>,
     vector<__range_mapped_type<_Range>, __allocator_traits_rebind_t<_Allocator, __range_mapped_type<_Range>>>>;
 
-template <ranges::input_range _Range, class _Allocator, class = __enable_if_t<__is_allocator<_Allocator>::value>>
+template <ranges::input_range _Range, class _Allocator, class = __enable_if_t<__is_allocator_v<_Allocator>>>
 flat_map(from_range_t, _Range&&, _Allocator) -> flat_map<
     __range_key_type<_Range>,
     __range_mapped_type<_Range>,
@@ -1228,11 +1245,11 @@ flat_map(from_range_t, _Range&&, _Allocator) -> flat_map<
     vector<__range_mapped_type<_Range>, __allocator_traits_rebind_t<_Allocator, __range_mapped_type<_Range>>>>;
 
 template <class _Key, class _Tp, class _Compare = less<_Key>>
-  requires(!__is_allocator<_Compare>::value)
+  requires(!__is_allocator_v<_Compare>)
 flat_map(initializer_list<pair<_Key, _Tp>>, _Compare = _Compare()) -> flat_map<_Key, _Tp, _Compare>;
 
 template <class _Key, class _Tp, class _Compare = less<_Key>>
-  requires(!__is_allocator<_Compare>::value)
+  requires(!__is_allocator_v<_Compare>)
 flat_map(sorted_unique_t, initializer_list<pair<_Key, _Tp>>, _Compare = _Compare()) -> flat_map<_Key, _Tp, _Compare>;
 
 template <class _Key, class _Tp, class _Compare, class _KeyContainer, class _MappedContainer, class _Allocator>
diff --git a/lib/libcxx/include/__flat_map/flat_multimap.h b/lib/libcxx/include/__flat_map/flat_multimap.h
index 0af6aac00c..72e3b5f216 100644
--- a/lib/libcxx/include/__flat_map/flat_multimap.h
+++ b/lib/libcxx/include/__flat_map/flat_multimap.h
@@ -22,7 +22,6 @@
 #include <__algorithm/upper_bound.h>
 #include <__assert>
 #include <__compare/synth_three_way.h>
-#include <__concepts/convertible_to.h>
 #include <__concepts/swappable.h>
 #include <__config>
 #include <__cstddef/byte.h>
@@ -30,7 +29,6 @@
 #include <__flat_map/key_value_iterator.h>
 #include <__flat_map/sorted_equivalent.h>
 #include <__flat_map/utils.h>
-#include <__functional/invoke.h>
 #include <__functional/is_transparent.h>
 #include <__functional/operations.h>
 #include <__fwd/vector.h>
@@ -47,7 +45,7 @@
 #include <__ranges/container_compatible_range.h>
 #include <__ranges/drop_view.h>
 #include <__ranges/from_range.h>
-#include <__ranges/ref_view.h>
+#include <__ranges/range_adaptor.h>
 #include <__ranges/size.h>
 #include <__ranges/subrange.h>
 #include <__ranges/zip_view.h>
@@ -57,14 +55,12 @@
 #include <__type_traits/is_allocator.h>
 #include <__type_traits/is_nothrow_constructible.h>
 #include <__type_traits/is_same.h>
-#include <__type_traits/maybe_const.h>
 #include <__utility/exception_guard.h>
 #include <__utility/move.h>
 #include <__utility/pair.h>
 #include <__utility/scope_guard.h>
 #include <__vector/vector.h>
 #include <initializer_list>
-#include <stdexcept>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
@@ -114,11 +110,12 @@ public:
   class value_compare {
   private:
     _LIBCPP_NO_UNIQUE_ADDRESS key_compare __comp_;
-    _LIBCPP_HIDE_FROM_ABI value_compare(key_compare __c) : __comp_(__c) {}
+    _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 value_compare(key_compare __c) : __comp_(__c) {}
     friend flat_multimap;
 
   public:
-    _LIBCPP_HIDE_FROM_ABI bool operator()(const_reference __x, const_reference __y) const {
+    _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool
+    operator()(const_reference __x, const_reference __y) const {
       return __comp_(__x.first, __y.first);
     }
   };
@@ -137,17 +134,17 @@ private:
 
 public:
   // [flat.map.cons], construct/copy/destroy
-  _LIBCPP_HIDE_FROM_ABI flat_multimap() noexcept(
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multimap() noexcept(
       is_nothrow_default_constructible_v<_KeyContainer> && is_nothrow_default_constructible_v<_MappedContainer> &&
       is_nothrow_default_constructible_v<_Compare>)
       : __containers_(), __compare_() {}
 
-  _LIBCPP_HIDE_FROM_ABI flat_multimap(const flat_multimap&) = default;
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multimap(const flat_multimap&) = default;
 
   // The copy/move constructors are not specified in the spec, which means they should be defaulted.
   // However, the move constructor can potentially leave a moved-from object in an inconsistent
   // state if an exception is thrown.
-  _LIBCPP_HIDE_FROM_ABI flat_multimap(flat_multimap&& __other) noexcept(
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multimap(flat_multimap&& __other) noexcept(
       is_nothrow_move_constructible_v<_KeyContainer> && is_nothrow_move_constructible_v<_MappedContainer> &&
       is_nothrow_move_constructible_v<_Compare>)
 #  if _LIBCPP_HAS_EXCEPTIONS
@@ -168,7 +165,8 @@ public:
 
   template <class _Allocator>
     requires __allocator_ctor_constraint<_Allocator>
-  _LIBCPP_HIDE_FROM_ABI flat_multimap(const flat_multimap& __other, const _Allocator& __alloc)
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26
+  flat_multimap(const flat_multimap& __other, const _Allocator& __alloc)
       : flat_multimap(__ctor_uses_allocator_tag{},
                       __alloc,
                       __other.__containers_.keys,
@@ -177,7 +175,7 @@ public:
 
   template <class _Allocator>
     requires __allocator_ctor_constraint<_Allocator>
-  _LIBCPP_HIDE_FROM_ABI flat_multimap(flat_multimap&& __other, const _Allocator& __alloc)
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multimap(flat_multimap&& __other, const _Allocator& __alloc)
 #  if _LIBCPP_HAS_EXCEPTIONS
       try
 #  endif // _LIBCPP_HAS_EXCEPTIONS
@@ -194,7 +192,7 @@ public:
 #  endif // _LIBCPP_HAS_EXCEPTIONS
   }
 
-  _LIBCPP_HIDE_FROM_ABI flat_multimap(
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multimap(
       key_container_type __key_cont, mapped_container_type __mapped_cont, const key_compare& __comp = key_compare())
       : __containers_{.keys = std::move(__key_cont), .values = std::move(__mapped_cont)}, __compare_(__comp) {
     _LIBCPP_ASSERT_VALID_INPUT_RANGE(__containers_.keys.size() == __containers_.values.size(),
@@ -204,7 +202,7 @@ public:
 
   template <class _Allocator>
     requires __allocator_ctor_constraint<_Allocator>
-  _LIBCPP_HIDE_FROM_ABI flat_multimap(
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multimap(
       const key_container_type& __key_cont, const mapped_container_type& __mapped_cont, const _Allocator& __alloc)
       : flat_multimap(__ctor_uses_allocator_tag{}, __alloc, __key_cont, __mapped_cont) {
     _LIBCPP_ASSERT_VALID_INPUT_RANGE(__containers_.keys.size() == __containers_.values.size(),
@@ -214,22 +212,22 @@ public:
 
   template <class _Allocator>
     requires __allocator_ctor_constraint<_Allocator>
-  _LIBCPP_HIDE_FROM_ABI
-  flat_multimap(const key_container_type& __key_cont,
-                const mapped_container_type& __mapped_cont,
-                const key_compare& __comp,
-                const _Allocator& __alloc)
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multimap(
+      const key_container_type& __key_cont,
+      const mapped_container_type& __mapped_cont,
+      const key_compare& __comp,
+      const _Allocator& __alloc)
       : flat_multimap(__ctor_uses_allocator_tag{}, __alloc, __key_cont, __mapped_cont, __comp) {
     _LIBCPP_ASSERT_VALID_INPUT_RANGE(__containers_.keys.size() == __containers_.values.size(),
                                      "flat_multimap keys and mapped containers have different size");
     __sort();
   }
 
-  _LIBCPP_HIDE_FROM_ABI
-  flat_multimap(sorted_equivalent_t,
-                key_container_type __key_cont,
-                mapped_container_type __mapped_cont,
-                const key_compare& __comp = key_compare())
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multimap(
+      sorted_equivalent_t,
+      key_container_type __key_cont,
+      mapped_container_type __mapped_cont,
+      const key_compare& __comp = key_compare())
       : __containers_{.keys = std::move(__key_cont), .values = std::move(__mapped_cont)}, __compare_(__comp) {
     _LIBCPP_ASSERT_VALID_INPUT_RANGE(__containers_.keys.size() == __containers_.values.size(),
                                      "flat_multimap keys and mapped containers have different size");
@@ -238,11 +236,11 @@ public:
 
   template <class _Allocator>
     requires __allocator_ctor_constraint<_Allocator>
-  _LIBCPP_HIDE_FROM_ABI
-  flat_multimap(sorted_equivalent_t,
-                const key_container_type& __key_cont,
-                const mapped_container_type& __mapped_cont,
-                const _Allocator& __alloc)
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multimap(
+      sorted_equivalent_t,
+      const key_container_type& __key_cont,
+      const mapped_container_type& __mapped_cont,
+      const _Allocator& __alloc)
       : flat_multimap(__ctor_uses_allocator_tag{}, __alloc, __key_cont, __mapped_cont) {
     _LIBCPP_ASSERT_VALID_INPUT_RANGE(__containers_.keys.size() == __containers_.values.size(),
                                      "flat_multimap keys and mapped containers have different size");
@@ -251,33 +249,35 @@ public:
 
   template <class _Allocator>
     requires __allocator_ctor_constraint<_Allocator>
-  _LIBCPP_HIDE_FROM_ABI
-  flat_multimap(sorted_equivalent_t,
-                const key_container_type& __key_cont,
-                const mapped_container_type& __mapped_cont,
-                const key_compare& __comp,
-                const _Allocator& __alloc)
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multimap(
+      sorted_equivalent_t,
+      const key_container_type& __key_cont,
+      const mapped_container_type& __mapped_cont,
+      const key_compare& __comp,
+      const _Allocator& __alloc)
       : flat_multimap(__ctor_uses_allocator_tag{}, __alloc, __key_cont, __mapped_cont, __comp) {
     _LIBCPP_ASSERT_VALID_INPUT_RANGE(__containers_.keys.size() == __containers_.values.size(),
                                      "flat_multimap keys and mapped containers have different size");
     _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(__is_sorted(__containers_.keys), "Key container is not sorted");
   }
 
-  _LIBCPP_HIDE_FROM_ABI explicit flat_multimap(const key_compare& __comp) : __containers_(), __compare_(__comp) {}
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 explicit flat_multimap(const key_compare& __comp)
+      : __containers_(), __compare_(__comp) {}
 
   template <class _Allocator>
     requires __allocator_ctor_constraint<_Allocator>
-  _LIBCPP_HIDE_FROM_ABI flat_multimap(const key_compare& __comp, const _Allocator& __alloc)
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26
+  flat_multimap(const key_compare& __comp, const _Allocator& __alloc)
       : flat_multimap(__ctor_uses_allocator_empty_tag{}, __alloc, __comp) {}
 
   template <class _Allocator>
     requires __allocator_ctor_constraint<_Allocator>
-  _LIBCPP_HIDE_FROM_ABI explicit flat_multimap(const _Allocator& __alloc)
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 explicit flat_multimap(const _Allocator& __alloc)
       : flat_multimap(__ctor_uses_allocator_empty_tag{}, __alloc) {}
 
   template <class _InputIterator>
     requires __has_input_iterator_category<_InputIterator>::value
-  _LIBCPP_HIDE_FROM_ABI
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26
   flat_multimap(_InputIterator __first, _InputIterator __last, const key_compare& __comp = key_compare())
       : __containers_(), __compare_(__comp) {
     insert(__first, __last);
@@ -285,7 +285,7 @@ public:
 
   template <class _InputIterator, class _Allocator>
     requires(__has_input_iterator_category<_InputIterator>::value && __allocator_ctor_constraint<_Allocator>)
-  _LIBCPP_HIDE_FROM_ABI
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26
   flat_multimap(_InputIterator __first, _InputIterator __last, const key_compare& __comp, const _Allocator& __alloc)
       : flat_multimap(__ctor_uses_allocator_empty_tag{}, __alloc, __comp) {
     insert(__first, __last);
@@ -293,91 +293,99 @@ public:
 
   template <class _InputIterator, class _Allocator>
     requires(__has_input_iterator_category<_InputIterator>::value && __allocator_ctor_constraint<_Allocator>)
-  _LIBCPP_HIDE_FROM_ABI flat_multimap(_InputIterator __first, _InputIterator __last, const _Allocator& __alloc)
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26
+  flat_multimap(_InputIterator __first, _InputIterator __last, const _Allocator& __alloc)
       : flat_multimap(__ctor_uses_allocator_empty_tag{}, __alloc) {
     insert(__first, __last);
   }
 
   template <_ContainerCompatibleRange<value_type> _Range>
-  _LIBCPP_HIDE_FROM_ABI flat_multimap(from_range_t __fr, _Range&& __rg)
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multimap(from_range_t __fr, _Range&& __rg)
       : flat_multimap(__fr, std::forward<_Range>(__rg), key_compare()) {}
 
   template <_ContainerCompatibleRange<value_type> _Range, class _Allocator>
     requires __allocator_ctor_constraint<_Allocator>
-  _LIBCPP_HIDE_FROM_ABI flat_multimap(from_range_t, _Range&& __rg, const _Allocator& __alloc)
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26
+  flat_multimap(from_range_t, _Range&& __rg, const _Allocator& __alloc)
       : flat_multimap(__ctor_uses_allocator_empty_tag{}, __alloc) {
     insert_range(std::forward<_Range>(__rg));
   }
 
   template <_ContainerCompatibleRange<value_type> _Range>
-  _LIBCPP_HIDE_FROM_ABI flat_multimap(from_range_t, _Range&& __rg, const key_compare& __comp) : flat_multimap(__comp) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26
+  flat_multimap(from_range_t, _Range&& __rg, const key_compare& __comp)
+      : flat_multimap(__comp) {
     insert_range(std::forward<_Range>(__rg));
   }
 
   template <_ContainerCompatibleRange<value_type> _Range, class _Allocator>
     requires __allocator_ctor_constraint<_Allocator>
-  _LIBCPP_HIDE_FROM_ABI flat_multimap(from_range_t, _Range&& __rg, const key_compare& __comp, const _Allocator& __alloc)
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26
+  flat_multimap(from_range_t, _Range&& __rg, const key_compare& __comp, const _Allocator& __alloc)
       : flat_multimap(__ctor_uses_allocator_empty_tag{}, __alloc, __comp) {
     insert_range(std::forward<_Range>(__rg));
   }
 
   template <class _InputIterator>
     requires __has_input_iterator_category<_InputIterator>::value
-  _LIBCPP_HIDE_FROM_ABI flat_multimap(
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multimap(
       sorted_equivalent_t, _InputIterator __first, _InputIterator __last, const key_compare& __comp = key_compare())
       : __containers_(), __compare_(__comp) {
     insert(sorted_equivalent, __first, __last);
   }
   template <class _InputIterator, class _Allocator>
     requires(__has_input_iterator_category<_InputIterator>::value && __allocator_ctor_constraint<_Allocator>)
-  _LIBCPP_HIDE_FROM_ABI
-  flat_multimap(sorted_equivalent_t,
-                _InputIterator __first,
-                _InputIterator __last,
-                const key_compare& __comp,
-                const _Allocator& __alloc)
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multimap(
+      sorted_equivalent_t,
+      _InputIterator __first,
+      _InputIterator __last,
+      const key_compare& __comp,
+      const _Allocator& __alloc)
       : flat_multimap(__ctor_uses_allocator_empty_tag{}, __alloc, __comp) {
     insert(sorted_equivalent, __first, __last);
   }
 
   template <class _InputIterator, class _Allocator>
     requires(__has_input_iterator_category<_InputIterator>::value && __allocator_ctor_constraint<_Allocator>)
-  _LIBCPP_HIDE_FROM_ABI
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26
   flat_multimap(sorted_equivalent_t, _InputIterator __first, _InputIterator __last, const _Allocator& __alloc)
       : flat_multimap(__ctor_uses_allocator_empty_tag{}, __alloc) {
     insert(sorted_equivalent, __first, __last);
   }
 
-  _LIBCPP_HIDE_FROM_ABI flat_multimap(initializer_list<value_type> __il, const key_compare& __comp = key_compare())
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26
+  flat_multimap(initializer_list<value_type> __il, const key_compare& __comp = key_compare())
       : flat_multimap(__il.begin(), __il.end(), __comp) {}
 
   template <class _Allocator>
     requires __allocator_ctor_constraint<_Allocator>
-  _LIBCPP_HIDE_FROM_ABI
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26
   flat_multimap(initializer_list<value_type> __il, const key_compare& __comp, const _Allocator& __alloc)
       : flat_multimap(__il.begin(), __il.end(), __comp, __alloc) {}
 
   template <class _Allocator>
     requires __allocator_ctor_constraint<_Allocator>
-  _LIBCPP_HIDE_FROM_ABI flat_multimap(initializer_list<value_type> __il, const _Allocator& __alloc)
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26
+  flat_multimap(initializer_list<value_type> __il, const _Allocator& __alloc)
       : flat_multimap(__il.begin(), __il.end(), __alloc) {}
 
-  _LIBCPP_HIDE_FROM_ABI
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26
   flat_multimap(sorted_equivalent_t, initializer_list<value_type> __il, const key_compare& __comp = key_compare())
       : flat_multimap(sorted_equivalent, __il.begin(), __il.end(), __comp) {}
 
   template <class _Allocator>
     requires __allocator_ctor_constraint<_Allocator>
-  _LIBCPP_HIDE_FROM_ABI flat_multimap(
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multimap(
       sorted_equivalent_t, initializer_list<value_type> __il, const key_compare& __comp, const _Allocator& __alloc)
       : flat_multimap(sorted_equivalent, __il.begin(), __il.end(), __comp, __alloc) {}
 
   template <class _Allocator>
     requires __allocator_ctor_constraint<_Allocator>
-  _LIBCPP_HIDE_FROM_ABI flat_multimap(sorted_equivalent_t, initializer_list<value_type> __il, const _Allocator& __alloc)
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26
+  flat_multimap(sorted_equivalent_t, initializer_list<value_type> __il, const _Allocator& __alloc)
       : flat_multimap(sorted_equivalent, __il.begin(), __il.end(), __alloc) {}
 
-  _LIBCPP_HIDE_FROM_ABI flat_multimap& operator=(initializer_list<value_type> __il) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multimap& operator=(initializer_list<value_type> __il) {
     clear();
     insert(__il);
     return *this;
@@ -386,9 +394,9 @@ public:
   // copy/move assignment are not specified in the spec (defaulted)
   // but move assignment can potentially leave moved from object in an inconsistent
   // state if an exception is thrown
-  _LIBCPP_HIDE_FROM_ABI flat_multimap& operator=(const flat_multimap&) = default;
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multimap& operator=(const flat_multimap&) = default;
 
-  _LIBCPP_HIDE_FROM_ABI flat_multimap& operator=(flat_multimap&& __other) noexcept(
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multimap& operator=(flat_multimap&& __other) noexcept(
       is_nothrow_move_assignable_v<_KeyContainer> && is_nothrow_move_assignable_v<_MappedContainer> &&
       is_nothrow_move_assignable_v<_Compare>) {
     auto __clear_other_guard = std::__make_scope_guard([&]() noexcept { __other.clear() /* noexcept */; });
@@ -400,38 +408,58 @@ public:
   }
 
   // iterators
-  _LIBCPP_HIDE_FROM_ABI iterator begin() noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator begin() noexcept {
     return iterator(__containers_.keys.begin(), __containers_.values.begin());
   }
 
-  _LIBCPP_HIDE_FROM_ABI const_iterator begin() const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator begin() const noexcept {
     return const_iterator(__containers_.keys.begin(), __containers_.values.begin());
   }
 
-  _LIBCPP_HIDE_FROM_ABI iterator end() noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator end() noexcept {
     return iterator(__containers_.keys.end(), __containers_.values.end());
   }
 
-  _LIBCPP_HIDE_FROM_ABI const_iterator end() const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator end() const noexcept {
     return const_iterator(__containers_.keys.end(), __containers_.values.end());
   }
 
-  _LIBCPP_HIDE_FROM_ABI reverse_iterator rbegin() noexcept { return reverse_iterator(end()); }
-  _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rbegin() const noexcept { return const_reverse_iterator(end()); }
-  _LIBCPP_HIDE_FROM_ABI reverse_iterator rend() noexcept { return reverse_iterator(begin()); }
-  _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rend() const noexcept { return const_reverse_iterator(begin()); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 reverse_iterator rbegin() noexcept {
+    return reverse_iterator(end());
+  }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator rbegin() const noexcept {
+    return const_reverse_iterator(end());
+  }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 reverse_iterator rend() noexcept {
+    return reverse_iterator(begin());
+  }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator rend() const noexcept {
+    return const_reverse_iterator(begin());
+  }
 
-  _LIBCPP_HIDE_FROM_ABI const_iterator cbegin() const noexcept { return begin(); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator cend() const noexcept { return end(); }
-  _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crbegin() const noexcept { return const_reverse_iterator(end()); }
-  _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crend() const noexcept { return const_reverse_iterator(begin()); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator cbegin() const noexcept {
+    return begin();
+  }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator cend() const noexcept {
+    return end();
+  }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator crbegin() const noexcept {
+    return const_reverse_iterator(end());
+  }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator crend() const noexcept {
+    return const_reverse_iterator(begin());
+  }
 
   // [flat.map.capacity], capacity
-  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool empty() const noexcept { return __containers_.keys.empty(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool empty() const noexcept {
+    return __containers_.keys.empty();
+  }
 
-  _LIBCPP_HIDE_FROM_ABI size_type size() const noexcept { return __containers_.keys.size(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type size() const noexcept {
+    return __containers_.keys.size();
+  }
 
-  _LIBCPP_HIDE_FROM_ABI size_type max_size() const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type max_size() const noexcept {
     return std::min<size_type>(__containers_.keys.max_size(), __containers_.values.max_size());
   }
 
@@ -439,7 +467,7 @@ public:
   template <class... _Args>
     requires is_constructible_v<pair<key_type, mapped_type>, _Args...> && is_move_constructible_v<key_type> &&
              is_move_constructible_v<mapped_type>
-  _LIBCPP_HIDE_FROM_ABI iterator emplace(_Args&&... __args) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator emplace(_Args&&... __args) {
     std::pair<key_type, mapped_type> __pair(std::forward<_Args>(__args)...);
     auto __key_it    = std::upper_bound(__containers_.keys.begin(), __containers_.keys.end(), __pair.first, __compare_);
     auto __mapped_it = __corresponding_mapped_it(*this, __key_it);
@@ -450,7 +478,7 @@ public:
 
   template <class... _Args>
     requires is_constructible_v<pair<key_type, mapped_type>, _Args...>
-  _LIBCPP_HIDE_FROM_ABI iterator emplace_hint(const_iterator __hint, _Args&&... __args) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator emplace_hint(const_iterator __hint, _Args&&... __args) {
     std::pair<key_type, mapped_type> __pair(std::forward<_Args>(__args)...);
 
     auto __prev_larger  = __hint != cbegin() && __compare_(__pair.first, (__hint - 1)->first);
@@ -490,33 +518,35 @@ public:
         *this, __key_iter, __mapped_iter, std::move(__pair.first), std::move(__pair.second));
   }
 
-  _LIBCPP_HIDE_FROM_ABI iterator insert(const value_type& __x) { return emplace(__x); }
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator insert(const value_type& __x) { return emplace(__x); }
 
-  _LIBCPP_HIDE_FROM_ABI iterator insert(value_type&& __x) { return emplace(std::move(__x)); }
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator insert(value_type&& __x) {
+    return emplace(std::move(__x));
+  }
 
-  _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __hint, const value_type& __x) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator insert(const_iterator __hint, const value_type& __x) {
     return emplace_hint(__hint, __x);
   }
 
-  _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __hint, value_type&& __x) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator insert(const_iterator __hint, value_type&& __x) {
     return emplace_hint(__hint, std::move(__x));
   }
 
   template <class _PairLike>
     requires is_constructible_v<pair<key_type, mapped_type>, _PairLike>
-  _LIBCPP_HIDE_FROM_ABI iterator insert(_PairLike&& __x) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator insert(_PairLike&& __x) {
     return emplace(std::forward<_PairLike>(__x));
   }
 
   template <class _PairLike>
     requires is_constructible_v<pair<key_type, mapped_type>, _PairLike>
-  _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __hint, _PairLike&& __x) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator insert(const_iterator __hint, _PairLike&& __x) {
     return emplace_hint(__hint, std::forward<_PairLike>(__x));
   }
 
   template <class _InputIterator>
     requires __has_input_iterator_category<_InputIterator>::value
-  _LIBCPP_HIDE_FROM_ABI void insert(_InputIterator __first, _InputIterator __last) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void insert(_InputIterator __first, _InputIterator __last) {
     if constexpr (sized_sentinel_for<_InputIterator, _InputIterator>) {
       __reserve(__last - __first);
     }
@@ -525,7 +555,8 @@ public:
 
   template <class _InputIterator>
     requires __has_input_iterator_category<_InputIterator>::value
-  _LIBCPP_HIDE_FROM_ABI void insert(sorted_equivalent_t, _InputIterator __first, _InputIterator __last) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void
+  insert(sorted_equivalent_t, _InputIterator __first, _InputIterator __last) {
     if constexpr (sized_sentinel_for<_InputIterator, _InputIterator>) {
       __reserve(__last - __first);
     }
@@ -534,7 +565,7 @@ public:
   }
 
   template <_ContainerCompatibleRange<value_type> _Range>
-  _LIBCPP_HIDE_FROM_ABI void insert_range(_Range&& __range) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void insert_range(_Range&& __range) {
     if constexpr (ranges::sized_range<_Range>) {
       __reserve(ranges::size(__range));
     }
@@ -542,19 +573,32 @@ public:
     __append_sort_merge</*WasSorted = */ false>(ranges::begin(__range), ranges::end(__range));
   }
 
-  _LIBCPP_HIDE_FROM_ABI void insert(initializer_list<value_type> __il) { insert(__il.begin(), __il.end()); }
+  template <_ContainerCompatibleRange<value_type> _Range>
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void insert_range(sorted_equivalent_t, _Range&& __range) {
+    if constexpr (ranges::sized_range<_Range>) {
+      __reserve(ranges::size(__range));
+    }
 
-  _LIBCPP_HIDE_FROM_ABI void insert(sorted_equivalent_t, initializer_list<value_type> __il) {
+    __append_sort_merge</*WasSorted = */ true>(ranges::begin(__range), ranges::end(__range));
+  }
+
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void insert(initializer_list<value_type> __il) {
+    insert(__il.begin(), __il.end());
+  }
+
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void
+  insert(sorted_equivalent_t, initializer_list<value_type> __il) {
     insert(sorted_equivalent, __il.begin(), __il.end());
   }
 
-  _LIBCPP_HIDE_FROM_ABI containers extract() && {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 containers extract() && {
     auto __guard = std::__make_scope_guard([&]() noexcept { clear() /* noexcept */; });
     auto __ret   = std::move(__containers_);
     return __ret;
   }
 
-  _LIBCPP_HIDE_FROM_ABI void replace(key_container_type&& __key_cont, mapped_container_type&& __mapped_cont) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void
+  replace(key_container_type&& __key_cont, mapped_container_type&& __mapped_cont) {
     _LIBCPP_ASSERT_VALID_INPUT_RANGE(
         __key_cont.size() == __mapped_cont.size(), "flat_multimap keys and mapped containers have different size");
 
@@ -565,15 +609,15 @@ public:
     __guard.__complete();
   }
 
-  _LIBCPP_HIDE_FROM_ABI iterator erase(iterator __position) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator erase(iterator __position) {
     return __erase(__position.__key_iter_, __position.__mapped_iter_);
   }
 
-  _LIBCPP_HIDE_FROM_ABI iterator erase(const_iterator __position) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator erase(const_iterator __position) {
     return __erase(__position.__key_iter_, __position.__mapped_iter_);
   }
 
-  _LIBCPP_HIDE_FROM_ABI size_type erase(const key_type& __x) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type erase(const key_type& __x) {
     auto [__first, __last] = equal_range(__x);
     auto __res             = __last - __first;
     erase(__first, __last);
@@ -583,14 +627,14 @@ public:
   template <class _Kp>
     requires(__is_compare_transparent && !is_convertible_v<_Kp &&, iterator> &&
              !is_convertible_v<_Kp &&, const_iterator>)
-  _LIBCPP_HIDE_FROM_ABI size_type erase(_Kp&& __x) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type erase(_Kp&& __x) {
     auto [__first, __last] = equal_range(__x);
     auto __res             = __last - __first;
     erase(__first, __last);
     return __res;
   }
 
-  _LIBCPP_HIDE_FROM_ABI iterator erase(const_iterator __first, const_iterator __last) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator erase(const_iterator __first, const_iterator __last) {
     auto __on_failure = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; });
     auto __key_it     = __containers_.keys.erase(__first.__key_iter_, __last.__key_iter_);
     auto __mapped_it  = __containers_.values.erase(__first.__mapped_iter_, __last.__mapped_iter_);
@@ -598,146 +642,178 @@ public:
     return iterator(std::move(__key_it), std::move(__mapped_it));
   }
 
-  _LIBCPP_HIDE_FROM_ABI void swap(flat_multimap& __y) noexcept {
-    // warning: The spec has unconditional noexcept, which means that
-    // if any of the following functions throw an exception,
-    // std::terminate will be called
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void swap(flat_multimap& __y) noexcept(
+      is_nothrow_swappable_v<key_container_type> && is_nothrow_swappable_v<mapped_container_type> &&
+      is_nothrow_swappable_v<key_compare>) {
+    auto __on_failure = std::__make_exception_guard([&]() noexcept {
+      clear() /* noexcept */;
+      __y.clear() /* noexcept */;
+    });
     ranges::swap(__compare_, __y.__compare_);
     ranges::swap(__containers_.keys, __y.__containers_.keys);
     ranges::swap(__containers_.values, __y.__containers_.values);
+    __on_failure.__complete();
   }
 
-  _LIBCPP_HIDE_FROM_ABI void clear() noexcept {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void clear() noexcept {
     __containers_.keys.clear();
     __containers_.values.clear();
   }
 
   // observers
-  _LIBCPP_HIDE_FROM_ABI key_compare key_comp() const { return __compare_; }
-  _LIBCPP_HIDE_FROM_ABI value_compare value_comp() const { return value_compare(__compare_); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 key_compare key_comp() const { return __compare_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 value_compare value_comp() const {
+    return value_compare(__compare_);
+  }
 
-  _LIBCPP_HIDE_FROM_ABI const key_container_type& keys() const noexcept { return __containers_.keys; }
-  _LIBCPP_HIDE_FROM_ABI const mapped_container_type& values() const noexcept { return __containers_.values; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const key_container_type& keys() const noexcept {
+    return __containers_.keys;
+  }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const mapped_container_type&
+  values() const noexcept {
+    return __containers_.values;
+  }
 
   // map operations
-  _LIBCPP_HIDE_FROM_ABI iterator find(const key_type& __x) { return __find_impl(*this, __x); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator find(const key_type& __x) {
+    return __find_impl(*this, __x);
+  }
 
-  _LIBCPP_HIDE_FROM_ABI const_iterator find(const key_type& __x) const { return __find_impl(*this, __x); }
-
-  template <class _Kp>
-    requires __is_compare_transparent
-  _LIBCPP_HIDE_FROM_ABI iterator find(const _Kp& __x) {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator find(const key_type& __x) const {
     return __find_impl(*this, __x);
   }
 
   template <class _Kp>
     requires __is_compare_transparent
-  _LIBCPP_HIDE_FROM_ABI const_iterator find(const _Kp& __x) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator find(const _Kp& __x) {
     return __find_impl(*this, __x);
   }
 
-  _LIBCPP_HIDE_FROM_ABI size_type count(const key_type& __x) const {
+  template <class _Kp>
+    requires __is_compare_transparent
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator find(const _Kp& __x) const {
+    return __find_impl(*this, __x);
+  }
+
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type count(const key_type& __x) const {
     auto [__first, __last] = equal_range(__x);
     return __last - __first;
   }
 
   template <class _Kp>
     requires __is_compare_transparent
-  _LIBCPP_HIDE_FROM_ABI size_type count(const _Kp& __x) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type count(const _Kp& __x) const {
     auto [__first, __last] = equal_range(__x);
     return __last - __first;
   }
 
-  _LIBCPP_HIDE_FROM_ABI bool contains(const key_type& __x) const { return find(__x) != end(); }
-
-  template <class _Kp>
-    requires __is_compare_transparent
-  _LIBCPP_HIDE_FROM_ABI bool contains(const _Kp& __x) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool contains(const key_type& __x) const {
     return find(__x) != end();
   }
 
-  _LIBCPP_HIDE_FROM_ABI iterator lower_bound(const key_type& __x) { return __lower_bound<iterator>(*this, __x); }
+  template <class _Kp>
+    requires __is_compare_transparent
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool contains(const _Kp& __x) const {
+    return find(__x) != end();
+  }
 
-  _LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const key_type& __x) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator lower_bound(const key_type& __x) {
+    return __lower_bound<iterator>(*this, __x);
+  }
+
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator
+  lower_bound(const key_type& __x) const {
     return __lower_bound<const_iterator>(*this, __x);
   }
 
   template <class _Kp>
     requires __is_compare_transparent
-  _LIBCPP_HIDE_FROM_ABI iterator lower_bound(const _Kp& __x) {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator lower_bound(const _Kp& __x) {
     return __lower_bound<iterator>(*this, __x);
   }
 
   template <class _Kp>
     requires __is_compare_transparent
-  _LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const _Kp& __x) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator lower_bound(const _Kp& __x) const {
     return __lower_bound<const_iterator>(*this, __x);
   }
 
-  _LIBCPP_HIDE_FROM_ABI iterator upper_bound(const key_type& __x) { return __upper_bound<iterator>(*this, __x); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator upper_bound(const key_type& __x) {
+    return __upper_bound<iterator>(*this, __x);
+  }
 
-  _LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const key_type& __x) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator
+  upper_bound(const key_type& __x) const {
     return __upper_bound<const_iterator>(*this, __x);
   }
 
   template <class _Kp>
     requires __is_compare_transparent
-  _LIBCPP_HIDE_FROM_ABI iterator upper_bound(const _Kp& __x) {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator upper_bound(const _Kp& __x) {
     return __upper_bound<iterator>(*this, __x);
   }
 
   template <class _Kp>
     requires __is_compare_transparent
-  _LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const _Kp& __x) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator upper_bound(const _Kp& __x) const {
     return __upper_bound<const_iterator>(*this, __x);
   }
 
-  _LIBCPP_HIDE_FROM_ABI pair<iterator, iterator> equal_range(const key_type& __x) {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair<iterator, iterator>
+  equal_range(const key_type& __x) {
     return __equal_range_impl(*this, __x);
   }
 
-  _LIBCPP_HIDE_FROM_ABI pair<const_iterator, const_iterator> equal_range(const key_type& __x) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair<const_iterator, const_iterator>
+  equal_range(const key_type& __x) const {
     return __equal_range_impl(*this, __x);
   }
 
   template <class _Kp>
     requires __is_compare_transparent
-  _LIBCPP_HIDE_FROM_ABI pair<iterator, iterator> equal_range(const _Kp& __x) {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair<iterator, iterator>
+  equal_range(const _Kp& __x) {
     return __equal_range_impl(*this, __x);
   }
   template <class _Kp>
     requires __is_compare_transparent
-  _LIBCPP_HIDE_FROM_ABI pair<const_iterator, const_iterator> equal_range(const _Kp& __x) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair<const_iterator, const_iterator>
+  equal_range(const _Kp& __x) const {
     return __equal_range_impl(*this, __x);
   }
 
-  friend _LIBCPP_HIDE_FROM_ABI bool operator==(const flat_multimap& __x, const flat_multimap& __y) {
+  friend _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool
+  operator==(const flat_multimap& __x, const flat_multimap& __y) {
     return ranges::equal(__x, __y);
   }
 
-  friend _LIBCPP_HIDE_FROM_ABI auto operator<=>(const flat_multimap& __x, const flat_multimap& __y) {
+  friend _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 auto
+  operator<=>(const flat_multimap& __x, const flat_multimap& __y) {
     return std::lexicographical_compare_three_way(
         __x.begin(), __x.end(), __y.begin(), __y.end(), std::__synth_three_way);
   }
 
-  friend _LIBCPP_HIDE_FROM_ABI void swap(flat_multimap& __x, flat_multimap& __y) noexcept { __x.swap(__y); }
+  friend _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void
+  swap(flat_multimap& __x, flat_multimap& __y) noexcept(noexcept(__x.swap(__y))) {
+    __x.swap(__y);
+  }
 
 private:
   struct __ctor_uses_allocator_tag {
-    explicit _LIBCPP_HIDE_FROM_ABI __ctor_uses_allocator_tag() = default;
+    explicit _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 __ctor_uses_allocator_tag() = default;
   };
   struct __ctor_uses_allocator_empty_tag {
-    explicit _LIBCPP_HIDE_FROM_ABI __ctor_uses_allocator_empty_tag() = default;
+    explicit _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 __ctor_uses_allocator_empty_tag() = default;
   };
 
   template <class _Allocator, class _KeyCont, class _MappedCont, class... _CompArg>
     requires __allocator_ctor_constraint<_Allocator>
-  _LIBCPP_HIDE_FROM_ABI
-  flat_multimap(__ctor_uses_allocator_tag,
-                const _Allocator& __alloc,
-                _KeyCont&& __key_cont,
-                _MappedCont&& __mapped_cont,
-                _CompArg&&... __comp)
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multimap(
+      __ctor_uses_allocator_tag,
+      const _Allocator& __alloc,
+      _KeyCont&& __key_cont,
+      _MappedCont&& __mapped_cont,
+      _CompArg&&... __comp)
       : __containers_{.keys = std::make_obj_using_allocator<key_container_type>(
                           __alloc, std::forward<_KeyCont>(__key_cont)),
                       .values = std::make_obj_using_allocator<mapped_container_type>(
@@ -746,36 +822,39 @@ private:
 
   template <class _Allocator, class... _CompArg>
     requires __allocator_ctor_constraint<_Allocator>
-  _LIBCPP_HIDE_FROM_ABI flat_multimap(__ctor_uses_allocator_empty_tag, const _Allocator& __alloc, _CompArg&&... __comp)
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26
+  flat_multimap(__ctor_uses_allocator_empty_tag, const _Allocator& __alloc, _CompArg&&... __comp)
       : __containers_{.keys   = std::make_obj_using_allocator<key_container_type>(__alloc),
                       .values = std::make_obj_using_allocator<mapped_container_type>(__alloc)},
         __compare_(std::forward<_CompArg>(__comp)...) {}
 
-  _LIBCPP_HIDE_FROM_ABI bool __is_sorted(auto&& __key_container) const {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool __is_sorted(auto&& __key_container) const {
     return ranges::is_sorted(__key_container, __compare_);
   }
 
-  _LIBCPP_HIDE_FROM_ABI void __sort() {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void __sort() {
     auto __zv = ranges::views::zip(__containers_.keys, __containers_.values);
     ranges::sort(__zv, __compare_, [](const auto& __p) -> decltype(auto) { return std::get<0>(__p); });
   }
 
   template <class _Self, class _KeyIter>
-  _LIBCPP_HIDE_FROM_ABI static auto __corresponding_mapped_it(_Self&& __self, _KeyIter&& __key_iter) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 static auto
+  __corresponding_mapped_it(_Self&& __self, _KeyIter&& __key_iter) {
     return __self.__containers_.values.begin() +
            static_cast<ranges::range_difference_t<mapped_container_type>>(
                ranges::distance(__self.__containers_.keys.begin(), __key_iter));
   }
 
   template <bool _WasSorted, class _InputIterator, class _Sentinel>
-  _LIBCPP_HIDE_FROM_ABI void __append_sort_merge(_InputIterator __first, _Sentinel __last) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void
+  __append_sort_merge(_InputIterator __first, _Sentinel __last) {
     auto __on_failure     = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; });
     size_t __num_appended = __flat_map_utils::__append(*this, std::move(__first), std::move(__last));
     if (__num_appended != 0) {
       auto __zv                  = ranges::views::zip(__containers_.keys, __containers_.values);
       auto __append_start_offset = __containers_.keys.size() - __num_appended;
       auto __end                 = __zv.end();
-      auto __compare_key         = [this](const auto& __p1, const auto& __p2) {
+      auto __compare_key         = [this](const auto& __p1, const auto& __p2) -> bool {
         return __compare_(std::get<0>(__p1), std::get<0>(__p2));
       };
       if constexpr (!_WasSorted) {
@@ -791,7 +870,7 @@ private:
   }
 
   template <class _Self, class _Kp>
-  _LIBCPP_HIDE_FROM_ABI static auto __find_impl(_Self&& __self, const _Kp& __key) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 static auto __find_impl(_Self&& __self, const _Kp& __key) {
     auto __it   = __self.lower_bound(__key);
     auto __last = __self.end();
     if (__it == __last || __self.__compare_(__key, __it->first)) {
@@ -801,7 +880,7 @@ private:
   }
 
   template <class _Self, class _Kp>
-  _LIBCPP_HIDE_FROM_ABI static auto __equal_range_impl(_Self&& __self, const _Kp& __key) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 static auto __equal_range_impl(_Self&& __self, const _Kp& __key) {
     auto [__key_first, __key_last] =
         std::equal_range(__self.__containers_.keys.begin(), __self.__containers_.keys.end(), __key, __self.__compare_);
 
@@ -811,7 +890,7 @@ private:
   }
 
   template <class _Res, class _Self, class _Kp>
-  _LIBCPP_HIDE_FROM_ABI static _Res __lower_bound(_Self&& __self, _Kp& __x) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 static _Res __lower_bound(_Self&& __self, _Kp& __x) {
     auto __key_iter =
         std::lower_bound(__self.__containers_.keys.begin(), __self.__containers_.keys.end(), __x, __self.__compare_);
     auto __mapped_iter = __corresponding_mapped_it(__self, __key_iter);
@@ -819,14 +898,14 @@ private:
   }
 
   template <class _Res, class _Self, class _Kp>
-  _LIBCPP_HIDE_FROM_ABI static _Res __upper_bound(_Self&& __self, _Kp& __x) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 static _Res __upper_bound(_Self&& __self, _Kp& __x) {
     auto __key_iter =
         std::upper_bound(__self.__containers_.keys.begin(), __self.__containers_.keys.end(), __x, __self.__compare_);
     auto __mapped_iter = __corresponding_mapped_it(__self, __key_iter);
     return _Res(std::move(__key_iter), std::move(__mapped_iter));
   }
 
-  _LIBCPP_HIDE_FROM_ABI void __reserve(size_t __size) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void __reserve(size_t __size) {
     if constexpr (__container_traits<_KeyContainer>::__reservable) {
       __containers_.keys.reserve(__size);
     }
@@ -837,7 +916,8 @@ private:
   }
 
   template <class _KIter, class _MIter>
-  _LIBCPP_HIDE_FROM_ABI iterator __erase(_KIter __key_iter_to_remove, _MIter __mapped_iter_to_remove) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator
+  __erase(_KIter __key_iter_to_remove, _MIter __mapped_iter_to_remove) {
     auto __on_failure  = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; });
     auto __key_iter    = __containers_.keys.erase(__key_iter_to_remove);
     auto __mapped_iter = __containers_.values.erase(__mapped_iter_to_remove);
@@ -847,7 +927,8 @@ private:
 
   template <class _Key2, class _Tp2, class _Compare2, class _KeyContainer2, class _MappedContainer2, class _Predicate>
   friend typename flat_multimap<_Key2, _Tp2, _Compare2, _KeyContainer2, _MappedContainer2>::size_type
-  erase_if(flat_multimap<_Key2, _Tp2, _Compare2, _KeyContainer2, _MappedContainer2>&, _Predicate);
+      _LIBCPP_CONSTEXPR_SINCE_CXX26
+      erase_if(flat_multimap<_Key2, _Tp2, _Compare2, _KeyContainer2, _MappedContainer2>&, _Predicate);
 
   friend __flat_map_utils;
 
@@ -855,8 +936,9 @@ private:
   _LIBCPP_NO_UNIQUE_ADDRESS key_compare __compare_;
 
   struct __key_equiv {
-    _LIBCPP_HIDE_FROM_ABI __key_equiv(key_compare __c) : __comp_(__c) {}
-    _LIBCPP_HIDE_FROM_ABI bool operator()(const_reference __x, const_reference __y) const {
+    _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 __key_equiv(key_compare __c) : __comp_(__c) {}
+    _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool
+    operator()(const_reference __x, const_reference __y) const {
       return !__comp_(std::get<0>(__x), std::get<0>(__y)) && !__comp_(std::get<0>(__y), std::get<0>(__x));
     }
     key_compare __comp_;
@@ -864,8 +946,7 @@ private:
 };
 
 template <class _KeyContainer, class _MappedContainer, class _Compare = less<typename _KeyContainer::value_type>>
-  requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value &&
-           !__is_allocator<_MappedContainer>::value &&
+  requires(!__is_allocator_v<_Compare> && !__is_allocator_v<_KeyContainer> && !__is_allocator_v<_MappedContainer> &&
            is_invocable_v<const _Compare&,
                           const typename _KeyContainer::value_type&,
                           const typename _KeyContainer::value_type&>)
@@ -878,7 +959,7 @@ flat_multimap(_KeyContainer, _MappedContainer, _Compare = _Compare())
 
 template <class _KeyContainer, class _MappedContainer, class _Allocator>
   requires(uses_allocator_v<_KeyContainer, _Allocator> && uses_allocator_v<_MappedContainer, _Allocator> &&
-           !__is_allocator<_KeyContainer>::value && !__is_allocator<_MappedContainer>::value)
+           !__is_allocator_v<_KeyContainer> && !__is_allocator_v<_MappedContainer>)
 flat_multimap(_KeyContainer, _MappedContainer, _Allocator)
     -> flat_multimap<typename _KeyContainer::value_type,
                      typename _MappedContainer::value_type,
@@ -887,9 +968,8 @@ flat_multimap(_KeyContainer, _MappedContainer, _Allocator)
                      _MappedContainer>;
 
 template <class _KeyContainer, class _MappedContainer, class _Compare, class _Allocator>
-  requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value &&
-           !__is_allocator<_MappedContainer>::value && uses_allocator_v<_KeyContainer, _Allocator> &&
-           uses_allocator_v<_MappedContainer, _Allocator> &&
+  requires(!__is_allocator_v<_Compare> && !__is_allocator_v<_KeyContainer> && !__is_allocator_v<_MappedContainer> &&
+           uses_allocator_v<_KeyContainer, _Allocator> && uses_allocator_v<_MappedContainer, _Allocator> &&
            is_invocable_v<const _Compare&,
                           const typename _KeyContainer::value_type&,
                           const typename _KeyContainer::value_type&>)
@@ -901,8 +981,7 @@ flat_multimap(_KeyContainer, _MappedContainer, _Compare, _Allocator)
                      _MappedContainer>;
 
 template <class _KeyContainer, class _MappedContainer, class _Compare = less<typename _KeyContainer::value_type>>
-  requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value &&
-           !__is_allocator<_MappedContainer>::value &&
+  requires(!__is_allocator_v<_Compare> && !__is_allocator_v<_KeyContainer> && !__is_allocator_v<_MappedContainer> &&
            is_invocable_v<const _Compare&,
                           const typename _KeyContainer::value_type&,
                           const typename _KeyContainer::value_type&>)
@@ -915,7 +994,7 @@ flat_multimap(sorted_equivalent_t, _KeyContainer, _MappedContainer, _Compare = _
 
 template <class _KeyContainer, class _MappedContainer, class _Allocator>
   requires(uses_allocator_v<_KeyContainer, _Allocator> && uses_allocator_v<_MappedContainer, _Allocator> &&
-           !__is_allocator<_KeyContainer>::value && !__is_allocator<_MappedContainer>::value)
+           !__is_allocator_v<_KeyContainer> && !__is_allocator_v<_MappedContainer>)
 flat_multimap(sorted_equivalent_t, _KeyContainer, _MappedContainer, _Allocator)
     -> flat_multimap<typename _KeyContainer::value_type,
                      typename _MappedContainer::value_type,
@@ -924,9 +1003,8 @@ flat_multimap(sorted_equivalent_t, _KeyContainer, _MappedContainer, _Allocator)
                      _MappedContainer>;
 
 template <class _KeyContainer, class _MappedContainer, class _Compare, class _Allocator>
-  requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value &&
-           !__is_allocator<_MappedContainer>::value && uses_allocator_v<_KeyContainer, _Allocator> &&
-           uses_allocator_v<_MappedContainer, _Allocator> &&
+  requires(!__is_allocator_v<_Compare> && !__is_allocator_v<_KeyContainer> && !__is_allocator_v<_MappedContainer> &&
+           uses_allocator_v<_KeyContainer, _Allocator> && uses_allocator_v<_MappedContainer, _Allocator> &&
            is_invocable_v<const _Compare&,
                           const typename _KeyContainer::value_type&,
                           const typename _KeyContainer::value_type&>)
@@ -938,19 +1016,19 @@ flat_multimap(sorted_equivalent_t, _KeyContainer, _MappedContainer, _Compare, _A
                      _MappedContainer>;
 
 template <class _InputIterator, class _Compare = less<__iter_key_type<_InputIterator>>>
-  requires(__has_input_iterator_category<_InputIterator>::value && !__is_allocator<_Compare>::value)
+  requires(__has_input_iterator_category<_InputIterator>::value && !__is_allocator_v<_Compare>)
 flat_multimap(_InputIterator, _InputIterator, _Compare = _Compare())
     -> flat_multimap<__iter_key_type<_InputIterator>, __iter_mapped_type<_InputIterator>, _Compare>;
 
 template <class _InputIterator, class _Compare = less<__iter_key_type<_InputIterator>>>
-  requires(__has_input_iterator_category<_InputIterator>::value && !__is_allocator<_Compare>::value)
+  requires(__has_input_iterator_category<_InputIterator>::value && !__is_allocator_v<_Compare>)
 flat_multimap(sorted_equivalent_t, _InputIterator, _InputIterator, _Compare = _Compare())
     -> flat_multimap<__iter_key_type<_InputIterator>, __iter_mapped_type<_InputIterator>, _Compare>;
 
 template <ranges::input_range _Range,
           class _Compare   = less<__range_key_type<_Range>>,
           class _Allocator = allocator<byte>,
-          class            = __enable_if_t<!__is_allocator<_Compare>::value && __is_allocator<_Allocator>::value>>
+          class            = __enable_if_t<!__is_allocator_v<_Compare> && __is_allocator_v<_Allocator>>>
 flat_multimap(from_range_t, _Range&&, _Compare = _Compare(), _Allocator = _Allocator()) -> flat_multimap<
     __range_key_type<_Range>,
     __range_mapped_type<_Range>,
@@ -958,7 +1036,7 @@ flat_multimap(from_range_t, _Range&&, _Compare = _Compare(), _Allocator = _Alloc
     vector<__range_key_type<_Range>, __allocator_traits_rebind_t<_Allocator, __range_key_type<_Range>>>,
     vector<__range_mapped_type<_Range>, __allocator_traits_rebind_t<_Allocator, __range_mapped_type<_Range>>>>;
 
-template <ranges::input_range _Range, class _Allocator, class = __enable_if_t<__is_allocator<_Allocator>::value>>
+template <ranges::input_range _Range, class _Allocator, class = __enable_if_t<__is_allocator_v<_Allocator>>>
 flat_multimap(from_range_t, _Range&&, _Allocator) -> flat_multimap<
     __range_key_type<_Range>,
     __range_mapped_type<_Range>,
@@ -967,11 +1045,11 @@ flat_multimap(from_range_t, _Range&&, _Allocator) -> flat_multimap<
     vector<__range_mapped_type<_Range>, __allocator_traits_rebind_t<_Allocator, __range_mapped_type<_Range>>>>;
 
 template <class _Key, class _Tp, class _Compare = less<_Key>>
-  requires(!__is_allocator<_Compare>::value)
+  requires(!__is_allocator_v<_Compare>)
 flat_multimap(initializer_list<pair<_Key, _Tp>>, _Compare = _Compare()) -> flat_multimap<_Key, _Tp, _Compare>;
 
 template <class _Key, class _Tp, class _Compare = less<_Key>>
-  requires(!__is_allocator<_Compare>::value)
+  requires(!__is_allocator_v<_Compare>)
 flat_multimap(sorted_equivalent_t, initializer_list<pair<_Key, _Tp>>, _Compare = _Compare())
     -> flat_multimap<_Key, _Tp, _Compare>;
 
@@ -980,8 +1058,9 @@ struct uses_allocator<flat_multimap<_Key, _Tp, _Compare, _KeyContainer, _MappedC
     : bool_constant<uses_allocator_v<_KeyContainer, _Allocator> && uses_allocator_v<_MappedContainer, _Allocator>> {};
 
 template <class _Key, class _Tp, class _Compare, class _KeyContainer, class _MappedContainer, class _Predicate>
-_LIBCPP_HIDE_FROM_ABI typename flat_multimap<_Key, _Tp, _Compare, _KeyContainer, _MappedContainer>::size_type
-erase_if(flat_multimap<_Key, _Tp, _Compare, _KeyContainer, _MappedContainer>& __flat_multimap, _Predicate __pred) {
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26
+    typename flat_multimap<_Key, _Tp, _Compare, _KeyContainer, _MappedContainer>::size_type
+    erase_if(flat_multimap<_Key, _Tp, _Compare, _KeyContainer, _MappedContainer>& __flat_multimap, _Predicate __pred) {
   auto __zv     = ranges::views::zip(__flat_multimap.__containers_.keys, __flat_multimap.__containers_.values);
   auto __first  = __zv.begin();
   auto __last   = __zv.end();
diff --git a/lib/libcxx/include/__flat_map/key_value_iterator.h b/lib/libcxx/include/__flat_map/key_value_iterator.h
index d04a23d1f8..795651a079 100644
--- a/lib/libcxx/include/__flat_map/key_value_iterator.h
+++ b/lib/libcxx/include/__flat_map/key_value_iterator.h
@@ -20,7 +20,6 @@
 #include <__type_traits/conditional.h>
 #include <__utility/forward.h>
 #include <__utility/move.h>
-#include <__utility/pair.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
diff --git a/lib/libcxx/include/__flat_map/utils.h b/lib/libcxx/include/__flat_map/utils.h
index 3a05c71566..4b07e388d0 100644
--- a/lib/libcxx/include/__flat_map/utils.h
+++ b/lib/libcxx/include/__flat_map/utils.h
@@ -16,6 +16,7 @@
 #include <__utility/exception_guard.h>
 #include <__utility/forward.h>
 #include <__utility/move.h>
+#include <__vector/container_traits.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
diff --git a/lib/libcxx/include/__flat_set/flat_multiset.h b/lib/libcxx/include/__flat_set/flat_multiset.h
index 44d8af05a5..b2de63bc30 100644
--- a/lib/libcxx/include/__flat_set/flat_multiset.h
+++ b/lib/libcxx/include/__flat_set/flat_multiset.h
@@ -13,54 +13,41 @@
 #include <__algorithm/equal_range.h>
 #include <__algorithm/lexicographical_compare_three_way.h>
 #include <__algorithm/lower_bound.h>
-#include <__algorithm/min.h>
 #include <__algorithm/ranges_equal.h>
 #include <__algorithm/ranges_inplace_merge.h>
 #include <__algorithm/ranges_is_sorted.h>
 #include <__algorithm/ranges_sort.h>
-#include <__algorithm/ranges_unique.h>
 #include <__algorithm/remove_if.h>
 #include <__algorithm/upper_bound.h>
 #include <__assert>
 #include <__compare/synth_three_way.h>
-#include <__concepts/convertible_to.h>
 #include <__concepts/swappable.h>
 #include <__config>
-#include <__cstddef/byte.h>
-#include <__cstddef/ptrdiff_t.h>
-#include <__flat_map/key_value_iterator.h>
 #include <__flat_map/sorted_equivalent.h>
 #include <__flat_set/ra_iterator.h>
 #include <__flat_set/utils.h>
-#include <__functional/invoke.h>
 #include <__functional/is_transparent.h>
 #include <__functional/operations.h>
 #include <__fwd/vector.h>
 #include <__iterator/concepts.h>
-#include <__iterator/distance.h>
 #include <__iterator/iterator_traits.h>
 #include <__iterator/prev.h>
-#include <__iterator/ranges_iterator_traits.h>
 #include <__iterator/reverse_iterator.h>
 #include <__memory/allocator_traits.h>
 #include <__memory/uses_allocator.h>
 #include <__memory/uses_allocator_construction.h>
-#include <__ranges/access.h>
 #include <__ranges/concepts.h>
 #include <__ranges/container_compatible_range.h>
 #include <__ranges/drop_view.h>
 #include <__ranges/from_range.h>
-#include <__ranges/ref_view.h>
+#include <__ranges/range_adaptor.h>
 #include <__ranges/size.h>
 #include <__ranges/subrange.h>
-#include <__ranges/zip_view.h>
-#include <__type_traits/conjunction.h>
 #include <__type_traits/container_traits.h>
 #include <__type_traits/invoke.h>
 #include <__type_traits/is_allocator.h>
 #include <__type_traits/is_nothrow_constructible.h>
 #include <__type_traits/is_same.h>
-#include <__type_traits/maybe_const.h>
 #include <__utility/as_const.h>
 #include <__utility/exception_guard.h>
 #include <__utility/move.h>
@@ -108,16 +95,16 @@ public:
 
 public:
   // [flat.multiset.cons], constructors
-  _LIBCPP_HIDE_FROM_ABI flat_multiset() noexcept(is_nothrow_default_constructible_v<_KeyContainer> &&
-                                                 is_nothrow_default_constructible_v<_Compare>)
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multiset() noexcept(
+      is_nothrow_default_constructible_v<_KeyContainer> && is_nothrow_default_constructible_v<_Compare>)
       : __keys_(), __compare_() {}
 
-  _LIBCPP_HIDE_FROM_ABI flat_multiset(const flat_multiset&) = default;
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multiset(const flat_multiset&) = default;
 
   // The copy/move constructors are not specified in the spec, which means they should be defaulted.
   // However, the move constructor can potentially leave a moved-from object in an inconsistent
   // state if an exception is thrown.
-  _LIBCPP_HIDE_FROM_ABI flat_multiset(flat_multiset&& __other) noexcept(
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multiset(flat_multiset&& __other) noexcept(
       is_nothrow_move_constructible_v<_KeyContainer> && is_nothrow_move_constructible_v<_Compare>)
 #  if _LIBCPP_HAS_EXCEPTIONS
       try
@@ -134,14 +121,16 @@ public:
 #  endif // _LIBCPP_HAS_EXCEPTIONS
   }
 
-  _LIBCPP_HIDE_FROM_ABI explicit flat_multiset(const key_compare& __comp) : __keys_(), __compare_(__comp) {}
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 explicit flat_multiset(const key_compare& __comp)
+      : __keys_(), __compare_(__comp) {}
 
-  _LIBCPP_HIDE_FROM_ABI explicit flat_multiset(container_type __keys, const key_compare& __comp = key_compare())
+  _LIBCPP_HIDE_FROM_ABI
+  _LIBCPP_CONSTEXPR_SINCE_CXX26 explicit flat_multiset(container_type __keys, const key_compare& __comp = key_compare())
       : __keys_(std::move(__keys)), __compare_(__comp) {
     ranges::sort(__keys_, __compare_);
   }
 
-  _LIBCPP_HIDE_FROM_ABI
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26
   flat_multiset(sorted_equivalent_t, container_type __keys, const key_compare& __comp = key_compare())
       : __keys_(std::move(__keys)), __compare_(__comp) {
     _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(ranges::is_sorted(__keys_, __compare_), "Key container is not sorted");
@@ -149,7 +138,7 @@ public:
 
   template <class _InputIterator>
     requires __has_input_iterator_category<_InputIterator>::value
-  _LIBCPP_HIDE_FROM_ABI
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26
   flat_multiset(_InputIterator __first, _InputIterator __last, const key_compare& __comp = key_compare())
       : __keys_(), __compare_(__comp) {
     insert(__first, __last);
@@ -157,48 +146,53 @@ public:
 
   template <class _InputIterator>
     requires __has_input_iterator_category<_InputIterator>::value
-  _LIBCPP_HIDE_FROM_ABI flat_multiset(
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multiset(
       sorted_equivalent_t, _InputIterator __first, _InputIterator __last, const key_compare& __comp = key_compare())
       : __keys_(__first, __last), __compare_(__comp) {
     _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(ranges::is_sorted(__keys_, __compare_), "Key container is not sorted");
   }
 
   template <_ContainerCompatibleRange<value_type> _Range>
-  _LIBCPP_HIDE_FROM_ABI flat_multiset(from_range_t __fr, _Range&& __rg)
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multiset(from_range_t __fr, _Range&& __rg)
       : flat_multiset(__fr, std::forward<_Range>(__rg), key_compare()) {}
 
   template <_ContainerCompatibleRange<value_type> _Range>
-  _LIBCPP_HIDE_FROM_ABI flat_multiset(from_range_t, _Range&& __rg, const key_compare& __comp) : flat_multiset(__comp) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26
+  flat_multiset(from_range_t, _Range&& __rg, const key_compare& __comp)
+      : flat_multiset(__comp) {
     insert_range(std::forward<_Range>(__rg));
   }
 
-  _LIBCPP_HIDE_FROM_ABI flat_multiset(initializer_list<value_type> __il, const key_compare& __comp = key_compare())
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26
+  flat_multiset(initializer_list<value_type> __il, const key_compare& __comp = key_compare())
       : flat_multiset(__il.begin(), __il.end(), __comp) {}
 
-  _LIBCPP_HIDE_FROM_ABI
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26
   flat_multiset(sorted_equivalent_t, initializer_list<value_type> __il, const key_compare& __comp = key_compare())
       : flat_multiset(sorted_equivalent, __il.begin(), __il.end(), __comp) {}
 
   template <class _Allocator>
     requires uses_allocator<container_type, _Allocator>::value
-  _LIBCPP_HIDE_FROM_ABI explicit flat_multiset(const _Allocator& __alloc)
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 explicit flat_multiset(const _Allocator& __alloc)
       : __keys_(std::make_obj_using_allocator<container_type>(__alloc)), __compare_() {}
 
   template <class _Allocator>
     requires uses_allocator<container_type, _Allocator>::value
-  _LIBCPP_HIDE_FROM_ABI flat_multiset(const key_compare& __comp, const _Allocator& __alloc)
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26
+  flat_multiset(const key_compare& __comp, const _Allocator& __alloc)
       : __keys_(std::make_obj_using_allocator<container_type>(__alloc)), __compare_(__comp) {}
 
   template <class _Allocator>
     requires uses_allocator<container_type, _Allocator>::value
-  _LIBCPP_HIDE_FROM_ABI flat_multiset(const container_type& __keys, const _Allocator& __alloc)
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26
+  flat_multiset(const container_type& __keys, const _Allocator& __alloc)
       : __keys_(std::make_obj_using_allocator<container_type>(__alloc, __keys)), __compare_() {
     ranges::sort(__keys_, __compare_);
   }
 
   template <class _Allocator>
     requires uses_allocator<container_type, _Allocator>::value
-  _LIBCPP_HIDE_FROM_ABI
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26
   flat_multiset(const container_type& __keys, const key_compare& __comp, const _Allocator& __alloc)
       : __keys_(std::make_obj_using_allocator<container_type>(__alloc, __keys)), __compare_(__comp) {
     ranges::sort(__keys_, __compare_);
@@ -206,14 +200,15 @@ public:
 
   template <class _Allocator>
     requires uses_allocator<container_type, _Allocator>::value
-  _LIBCPP_HIDE_FROM_ABI flat_multiset(sorted_equivalent_t, const container_type& __keys, const _Allocator& __alloc)
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26
+  flat_multiset(sorted_equivalent_t, const container_type& __keys, const _Allocator& __alloc)
       : __keys_(std::make_obj_using_allocator<container_type>(__alloc, __keys)), __compare_() {
     _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(ranges::is_sorted(__keys_, __compare_), "Key container is not sorted");
   }
 
   template <class _Allocator>
     requires uses_allocator<container_type, _Allocator>::value
-  _LIBCPP_HIDE_FROM_ABI
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26
   flat_multiset(sorted_equivalent_t, const container_type& __keys, const key_compare& __comp, const _Allocator& __alloc)
       : __keys_(std::make_obj_using_allocator<container_type>(__alloc, __keys)), __compare_(__comp) {
     _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(ranges::is_sorted(__keys_, __compare_), "Key container is not sorted");
@@ -221,13 +216,14 @@ public:
 
   template <class _Allocator>
     requires uses_allocator<container_type, _Allocator>::value
-  _LIBCPP_HIDE_FROM_ABI flat_multiset(const flat_multiset& __other, const _Allocator& __alloc)
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26
+  flat_multiset(const flat_multiset& __other, const _Allocator& __alloc)
       : __keys_(std::make_obj_using_allocator<container_type>(__alloc, __other.__keys_)),
         __compare_(__other.__compare_) {}
 
   template <class _Allocator>
     requires uses_allocator<container_type, _Allocator>::value
-  _LIBCPP_HIDE_FROM_ABI flat_multiset(flat_multiset&& __other, const _Allocator& __alloc)
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multiset(flat_multiset&& __other, const _Allocator& __alloc)
 #  if _LIBCPP_HAS_EXCEPTIONS
       try
 #  endif // _LIBCPP_HAS_EXCEPTIONS
@@ -243,14 +239,15 @@ public:
 
   template <class _InputIterator, class _Allocator>
     requires(__has_input_iterator_category<_InputIterator>::value && uses_allocator<container_type, _Allocator>::value)
-  _LIBCPP_HIDE_FROM_ABI flat_multiset(_InputIterator __first, _InputIterator __last, const _Allocator& __alloc)
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26
+  flat_multiset(_InputIterator __first, _InputIterator __last, const _Allocator& __alloc)
       : __keys_(std::make_obj_using_allocator<container_type>(__alloc)), __compare_() {
     insert(__first, __last);
   }
 
   template <class _InputIterator, class _Allocator>
     requires(__has_input_iterator_category<_InputIterator>::value && uses_allocator<container_type, _Allocator>::value)
-  _LIBCPP_HIDE_FROM_ABI
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26
   flat_multiset(_InputIterator __first, _InputIterator __last, const key_compare& __comp, const _Allocator& __alloc)
       : __keys_(std::make_obj_using_allocator<container_type>(__alloc)), __compare_(__comp) {
     insert(__first, __last);
@@ -258,7 +255,7 @@ public:
 
   template <class _InputIterator, class _Allocator>
     requires(__has_input_iterator_category<_InputIterator>::value && uses_allocator<container_type, _Allocator>::value)
-  _LIBCPP_HIDE_FROM_ABI
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26
   flat_multiset(sorted_equivalent_t, _InputIterator __first, _InputIterator __last, const _Allocator& __alloc)
       : __keys_(std::make_obj_using_allocator<container_type>(__alloc, __first, __last)), __compare_() {
     _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(ranges::is_sorted(__keys_, __compare_), "Key container is not sorted");
@@ -266,53 +263,57 @@ public:
 
   template <class _InputIterator, class _Allocator>
     requires(__has_input_iterator_category<_InputIterator>::value && uses_allocator<container_type, _Allocator>::value)
-  _LIBCPP_HIDE_FROM_ABI
-  flat_multiset(sorted_equivalent_t,
-                _InputIterator __first,
-                _InputIterator __last,
-                const key_compare& __comp,
-                const _Allocator& __alloc)
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multiset(
+      sorted_equivalent_t,
+      _InputIterator __first,
+      _InputIterator __last,
+      const key_compare& __comp,
+      const _Allocator& __alloc)
       : __keys_(std::make_obj_using_allocator<container_type>(__alloc, __first, __last)), __compare_(__comp) {
     _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(ranges::is_sorted(__keys_, __compare_), "Key container is not sorted");
   }
 
   template <_ContainerCompatibleRange<value_type> _Range, class _Allocator>
     requires uses_allocator<container_type, _Allocator>::value
-  _LIBCPP_HIDE_FROM_ABI flat_multiset(from_range_t, _Range&& __rg, const _Allocator& __alloc)
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26
+  flat_multiset(from_range_t, _Range&& __rg, const _Allocator& __alloc)
       : __keys_(std::make_obj_using_allocator<container_type>(__alloc)), __compare_() {
     insert_range(std::forward<_Range>(__rg));
   }
 
   template <_ContainerCompatibleRange<value_type> _Range, class _Allocator>
     requires uses_allocator<container_type, _Allocator>::value
-  _LIBCPP_HIDE_FROM_ABI flat_multiset(from_range_t, _Range&& __rg, const key_compare& __comp, const _Allocator& __alloc)
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26
+  flat_multiset(from_range_t, _Range&& __rg, const key_compare& __comp, const _Allocator& __alloc)
       : __keys_(std::make_obj_using_allocator<container_type>(__alloc)), __compare_(__comp) {
     insert_range(std::forward<_Range>(__rg));
   }
 
   template <class _Allocator>
     requires uses_allocator<container_type, _Allocator>::value
-  _LIBCPP_HIDE_FROM_ABI flat_multiset(initializer_list<value_type> __il, const _Allocator& __alloc)
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26
+  flat_multiset(initializer_list<value_type> __il, const _Allocator& __alloc)
       : flat_multiset(__il.begin(), __il.end(), __alloc) {}
 
   template <class _Allocator>
     requires uses_allocator<container_type, _Allocator>::value
-  _LIBCPP_HIDE_FROM_ABI
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26
   flat_multiset(initializer_list<value_type> __il, const key_compare& __comp, const _Allocator& __alloc)
       : flat_multiset(__il.begin(), __il.end(), __comp, __alloc) {}
 
   template <class _Allocator>
     requires uses_allocator<container_type, _Allocator>::value
-  _LIBCPP_HIDE_FROM_ABI flat_multiset(sorted_equivalent_t, initializer_list<value_type> __il, const _Allocator& __alloc)
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26
+  flat_multiset(sorted_equivalent_t, initializer_list<value_type> __il, const _Allocator& __alloc)
       : flat_multiset(sorted_equivalent, __il.begin(), __il.end(), __alloc) {}
 
   template <class _Allocator>
     requires uses_allocator<container_type, _Allocator>::value
-  _LIBCPP_HIDE_FROM_ABI flat_multiset(
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multiset(
       sorted_equivalent_t, initializer_list<value_type> __il, const key_compare& __comp, const _Allocator& __alloc)
       : flat_multiset(sorted_equivalent, __il.begin(), __il.end(), __comp, __alloc) {}
 
-  _LIBCPP_HIDE_FROM_ABI flat_multiset& operator=(initializer_list<value_type> __il) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multiset& operator=(initializer_list<value_type> __il) {
     clear();
     insert(__il);
     return *this;
@@ -321,9 +322,9 @@ public:
   // copy/move assignment are not specified in the spec (defaulted)
   // but move assignment can potentially leave moved from object in an inconsistent
   // state if an exception is thrown
-  _LIBCPP_HIDE_FROM_ABI flat_multiset& operator=(const flat_multiset&) = default;
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multiset& operator=(const flat_multiset&) = default;
 
-  _LIBCPP_HIDE_FROM_ABI flat_multiset& operator=(flat_multiset&& __other) noexcept(
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_multiset& operator=(flat_multiset&& __other) noexcept(
       is_nothrow_move_assignable_v<_KeyContainer> && is_nothrow_move_assignable_v<_Compare>) {
     auto __clear_other_guard = std::__make_scope_guard([&]() noexcept { __other.clear() /* noexcept */; });
     auto __clear_self_guard  = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; });
@@ -334,30 +335,60 @@ public:
   }
 
   // iterators
-  _LIBCPP_HIDE_FROM_ABI iterator begin() noexcept { return iterator(std::as_const(__keys_).begin()); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator begin() const noexcept { return const_iterator(__keys_.begin()); }
-  _LIBCPP_HIDE_FROM_ABI iterator end() noexcept { return iterator(std::as_const(__keys_).end()); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator end() const noexcept { return const_iterator(__keys_.end()); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator begin() noexcept {
+    return iterator(std::as_const(__keys_).begin());
+  }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator begin() const noexcept {
+    return const_iterator(__keys_.begin());
+  }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator end() noexcept {
+    return iterator(std::as_const(__keys_).end());
+  }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator end() const noexcept {
+    return const_iterator(__keys_.end());
+  }
 
-  _LIBCPP_HIDE_FROM_ABI reverse_iterator rbegin() noexcept { return reverse_iterator(end()); }
-  _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rbegin() const noexcept { return const_reverse_iterator(end()); }
-  _LIBCPP_HIDE_FROM_ABI reverse_iterator rend() noexcept { return reverse_iterator(begin()); }
-  _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rend() const noexcept { return const_reverse_iterator(begin()); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 reverse_iterator rbegin() noexcept {
+    return reverse_iterator(end());
+  }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator rbegin() const noexcept {
+    return const_reverse_iterator(end());
+  }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 reverse_iterator rend() noexcept {
+    return reverse_iterator(begin());
+  }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator rend() const noexcept {
+    return const_reverse_iterator(begin());
+  }
 
-  _LIBCPP_HIDE_FROM_ABI const_iterator cbegin() const noexcept { return begin(); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator cend() const noexcept { return end(); }
-  _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crbegin() const noexcept { return const_reverse_iterator(end()); }
-  _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crend() const noexcept { return const_reverse_iterator(begin()); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator cbegin() const noexcept {
+    return begin();
+  }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator cend() const noexcept {
+    return end();
+  }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator crbegin() const noexcept {
+    return const_reverse_iterator(end());
+  }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator crend() const noexcept {
+    return const_reverse_iterator(begin());
+  }
 
   // capacity
-  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool empty() const noexcept { return __keys_.empty(); }
-  _LIBCPP_HIDE_FROM_ABI size_type size() const noexcept { return __keys_.size(); }
-  _LIBCPP_HIDE_FROM_ABI size_type max_size() const noexcept { return __keys_.max_size(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool empty() const noexcept {
+    return __keys_.empty();
+  }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type size() const noexcept {
+    return __keys_.size();
+  }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type max_size() const noexcept {
+    return __keys_.max_size();
+  }
 
   // [flat.multiset.modifiers], modifiers
   template <class... _Args>
     requires is_constructible_v<value_type, _Args...>
-  _LIBCPP_HIDE_FROM_ABI iterator emplace(_Args&&... __args) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator emplace(_Args&&... __args) {
     if constexpr (sizeof...(__args) == 1 && (is_same_v<remove_cvref_t<_Args>, _Key> && ...)) {
       return __emplace(std::forward<_Args>(__args)...);
     } else {
@@ -367,7 +398,7 @@ public:
 
   template <class... _Args>
     requires is_constructible_v<value_type, _Args...>
-  _LIBCPP_HIDE_FROM_ABI iterator emplace_hint(const_iterator __hint, _Args&&... __args) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator emplace_hint(const_iterator __hint, _Args&&... __args) {
     if constexpr (sizeof...(__args) == 1 && (is_same_v<remove_cvref_t<_Args>, _Key> && ...)) {
       return __emplace_hint(std::move(__hint), std::forward<_Args>(__args)...);
     } else {
@@ -375,21 +406,23 @@ public:
     }
   }
 
-  _LIBCPP_HIDE_FROM_ABI iterator insert(const value_type& __x) { return emplace(__x); }
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator insert(const value_type& __x) { return emplace(__x); }
 
-  _LIBCPP_HIDE_FROM_ABI iterator insert(value_type&& __x) { return emplace(std::move(__x)); }
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator insert(value_type&& __x) {
+    return emplace(std::move(__x));
+  }
 
-  _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __hint, const value_type& __x) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator insert(const_iterator __hint, const value_type& __x) {
     return emplace_hint(__hint, __x);
   }
 
-  _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __hint, value_type&& __x) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator insert(const_iterator __hint, value_type&& __x) {
     return emplace_hint(__hint, std::move(__x));
   }
 
   template <class _InputIterator>
     requires __has_input_iterator_category<_InputIterator>::value
-  _LIBCPP_HIDE_FROM_ABI void insert(_InputIterator __first, _InputIterator __last) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void insert(_InputIterator __first, _InputIterator __last) {
     if constexpr (sized_sentinel_for<_InputIterator, _InputIterator>) {
       __reserve(__last - __first);
     }
@@ -398,7 +431,8 @@ public:
 
   template <class _InputIterator>
     requires __has_input_iterator_category<_InputIterator>::value
-  _LIBCPP_HIDE_FROM_ABI void insert(sorted_equivalent_t, _InputIterator __first, _InputIterator __last) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void
+  insert(sorted_equivalent_t, _InputIterator __first, _InputIterator __last) {
     if constexpr (sized_sentinel_for<_InputIterator, _InputIterator>) {
       __reserve(__last - __first);
     }
@@ -407,7 +441,7 @@ public:
   }
 
   template <_ContainerCompatibleRange<value_type> _Range>
-  _LIBCPP_HIDE_FROM_ABI void insert_range(_Range&& __range) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void insert_range(_Range&& __range) {
     if constexpr (ranges::sized_range<_Range>) {
       __reserve(ranges::size(__range));
     }
@@ -415,26 +449,38 @@ public:
     __append_sort_merge</*WasSorted = */ false>(std::forward<_Range>(__range));
   }
 
-  _LIBCPP_HIDE_FROM_ABI void insert(initializer_list<value_type> __il) { insert(__il.begin(), __il.end()); }
+  template <_ContainerCompatibleRange<value_type> _Range>
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void insert_range(sorted_equivalent_t, _Range&& __range) {
+    if constexpr (ranges::sized_range<_Range>) {
+      __reserve(ranges::size(__range));
+    }
 
-  _LIBCPP_HIDE_FROM_ABI void insert(sorted_equivalent_t, initializer_list<value_type> __il) {
+    __append_sort_merge</*WasSorted = */ true>(std::forward<_Range>(__range));
+  }
+
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void insert(initializer_list<value_type> __il) {
+    insert(__il.begin(), __il.end());
+  }
+
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void
+  insert(sorted_equivalent_t, initializer_list<value_type> __il) {
     insert(sorted_equivalent, __il.begin(), __il.end());
   }
 
-  _LIBCPP_HIDE_FROM_ABI container_type extract() && {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 container_type extract() && {
     auto __guard = std::__make_scope_guard([&]() noexcept { clear() /* noexcept */; });
     auto __ret   = std::move(__keys_);
     return __ret;
   }
 
-  _LIBCPP_HIDE_FROM_ABI void replace(container_type&& __keys) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void replace(container_type&& __keys) {
     _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(ranges::is_sorted(__keys, __compare_), "Key container is not sorted");
     auto __guard = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; });
     __keys_      = std::move(__keys);
     __guard.__complete();
   }
 
-  _LIBCPP_HIDE_FROM_ABI iterator erase(iterator __position) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator erase(iterator __position) {
     auto __on_failure = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; });
     auto __key_iter   = __keys_.erase(__position.__base());
     __on_failure.__complete();
@@ -444,7 +490,7 @@ public:
   // The following overload is the same as the iterator overload
   // iterator erase(const_iterator __position);
 
-  _LIBCPP_HIDE_FROM_ABI size_type erase(const key_type& __x) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type erase(const key_type& __x) {
     auto [__first, __last] = equal_range(__x);
     auto __res             = __last - __first;
     erase(__first, __last);
@@ -454,149 +500,170 @@ public:
   template <class _Kp>
     requires(__is_transparent_v<_Compare> && !is_convertible_v<_Kp &&, iterator> &&
              !is_convertible_v<_Kp &&, const_iterator>)
-  _LIBCPP_HIDE_FROM_ABI size_type erase(_Kp&& __x) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type erase(_Kp&& __x) {
     auto [__first, __last] = equal_range(__x);
     auto __res             = __last - __first;
     erase(__first, __last);
     return __res;
   }
 
-  _LIBCPP_HIDE_FROM_ABI iterator erase(const_iterator __first, const_iterator __last) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator erase(const_iterator __first, const_iterator __last) {
     auto __on_failure = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; });
     auto __key_it     = __keys_.erase(__first.__base(), __last.__base());
     __on_failure.__complete();
     return iterator(std::move(__key_it));
   }
 
-  _LIBCPP_HIDE_FROM_ABI void swap(flat_multiset& __y) noexcept {
-    // warning: The spec has unconditional noexcept, which means that
-    // if any of the following functions throw an exception,
-    // std::terminate will be called
-    // This is discussed in P3567, which hasn't been voted on yet.
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void
+  swap(flat_multiset& __y) noexcept(is_nothrow_swappable_v<container_type> && is_nothrow_swappable_v<key_compare>) {
+    auto __on_failure = std::__make_exception_guard([&]() noexcept {
+      clear() /* noexcept */;
+      __y.clear() /* noexcept */;
+    });
     ranges::swap(__compare_, __y.__compare_);
     ranges::swap(__keys_, __y.__keys_);
+    __on_failure.__complete();
   }
 
-  _LIBCPP_HIDE_FROM_ABI void clear() noexcept { __keys_.clear(); }
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void clear() noexcept { __keys_.clear(); }
 
   // observers
-  _LIBCPP_HIDE_FROM_ABI key_compare key_comp() const { return __compare_; }
-  _LIBCPP_HIDE_FROM_ABI value_compare value_comp() const { return __compare_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 key_compare key_comp() const { return __compare_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 value_compare value_comp() const {
+    return __compare_;
+  }
 
   // map operations
-  _LIBCPP_HIDE_FROM_ABI iterator find(const key_type& __x) { return __find_impl(*this, __x); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator find(const key_type& __x) {
+    return __find_impl(*this, __x);
+  }
 
-  _LIBCPP_HIDE_FROM_ABI const_iterator find(const key_type& __x) const { return __find_impl(*this, __x); }
-
-  template <class _Kp>
-    requires __is_transparent_v<_Compare>
-  _LIBCPP_HIDE_FROM_ABI iterator find(const _Kp& __x) {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator find(const key_type& __x) const {
     return __find_impl(*this, __x);
   }
 
   template <class _Kp>
     requires __is_transparent_v<_Compare>
-  _LIBCPP_HIDE_FROM_ABI const_iterator find(const _Kp& __x) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator find(const _Kp& __x) {
     return __find_impl(*this, __x);
   }
 
-  _LIBCPP_HIDE_FROM_ABI size_type count(const key_type& __x) const {
+  template <class _Kp>
+    requires __is_transparent_v<_Compare>
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator find(const _Kp& __x) const {
+    return __find_impl(*this, __x);
+  }
+
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type count(const key_type& __x) const {
     auto [__first, __last] = equal_range(__x);
     return __last - __first;
   }
 
   template <class _Kp>
     requires __is_transparent_v<_Compare>
-  _LIBCPP_HIDE_FROM_ABI size_type count(const _Kp& __x) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type count(const _Kp& __x) const {
     auto [__first, __last] = equal_range(__x);
     return __last - __first;
   }
 
-  _LIBCPP_HIDE_FROM_ABI bool contains(const key_type& __x) const { return find(__x) != end(); }
-
-  template <class _Kp>
-    requires __is_transparent_v<_Compare>
-  _LIBCPP_HIDE_FROM_ABI bool contains(const _Kp& __x) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool contains(const key_type& __x) const {
     return find(__x) != end();
   }
 
-  _LIBCPP_HIDE_FROM_ABI iterator lower_bound(const key_type& __x) {
+  template <class _Kp>
+    requires __is_transparent_v<_Compare>
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool contains(const _Kp& __x) const {
+    return find(__x) != end();
+  }
+
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator lower_bound(const key_type& __x) {
     const auto& __keys = __keys_;
     return iterator(std::lower_bound(__keys.begin(), __keys.end(), __x, __compare_));
   }
 
-  _LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const key_type& __x) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator
+  lower_bound(const key_type& __x) const {
     return const_iterator(std::lower_bound(__keys_.begin(), __keys_.end(), __x, __compare_));
   }
 
   template <class _Kp>
     requires __is_transparent_v<_Compare>
-  _LIBCPP_HIDE_FROM_ABI iterator lower_bound(const _Kp& __x) {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator lower_bound(const _Kp& __x) {
     const auto& __keys = __keys_;
     return iterator(std::lower_bound(__keys.begin(), __keys.end(), __x, __compare_));
   }
 
   template <class _Kp>
     requires __is_transparent_v<_Compare>
-  _LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const _Kp& __x) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator lower_bound(const _Kp& __x) const {
     return const_iterator(std::lower_bound(__keys_.begin(), __keys_.end(), __x, __compare_));
   }
 
-  _LIBCPP_HIDE_FROM_ABI iterator upper_bound(const key_type& __x) {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator upper_bound(const key_type& __x) {
     const auto& __keys = __keys_;
     return iterator(std::upper_bound(__keys.begin(), __keys.end(), __x, __compare_));
   }
 
-  _LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const key_type& __x) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator
+  upper_bound(const key_type& __x) const {
     return const_iterator(std::upper_bound(__keys_.begin(), __keys_.end(), __x, __compare_));
   }
 
   template <class _Kp>
     requires __is_transparent_v<_Compare>
-  _LIBCPP_HIDE_FROM_ABI iterator upper_bound(const _Kp& __x) {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator upper_bound(const _Kp& __x) {
     const auto& __keys = __keys_;
     return iterator(std::upper_bound(__keys.begin(), __keys.end(), __x, __compare_));
   }
 
   template <class _Kp>
     requires __is_transparent_v<_Compare>
-  _LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const _Kp& __x) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator upper_bound(const _Kp& __x) const {
     return const_iterator(std::upper_bound(__keys_.begin(), __keys_.end(), __x, __compare_));
   }
 
-  _LIBCPP_HIDE_FROM_ABI pair<iterator, iterator> equal_range(const key_type& __x) {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair<iterator, iterator>
+  equal_range(const key_type& __x) {
     return __equal_range_impl(*this, __x);
   }
 
-  _LIBCPP_HIDE_FROM_ABI pair<const_iterator, const_iterator> equal_range(const key_type& __x) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair<const_iterator, const_iterator>
+  equal_range(const key_type& __x) const {
     return __equal_range_impl(*this, __x);
   }
 
   template <class _Kp>
     requires __is_transparent_v<_Compare>
-  _LIBCPP_HIDE_FROM_ABI pair<iterator, iterator> equal_range(const _Kp& __x) {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair<iterator, iterator>
+  equal_range(const _Kp& __x) {
     return __equal_range_impl(*this, __x);
   }
   template <class _Kp>
     requires __is_transparent_v<_Compare>
-  _LIBCPP_HIDE_FROM_ABI pair<const_iterator, const_iterator> equal_range(const _Kp& __x) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair<const_iterator, const_iterator>
+  equal_range(const _Kp& __x) const {
     return __equal_range_impl(*this, __x);
   }
 
-  friend _LIBCPP_HIDE_FROM_ABI bool operator==(const flat_multiset& __x, const flat_multiset& __y) {
+  friend _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool
+  operator==(const flat_multiset& __x, const flat_multiset& __y) {
     return ranges::equal(__x, __y);
   }
 
-  friend _LIBCPP_HIDE_FROM_ABI auto operator<=>(const flat_multiset& __x, const flat_multiset& __y) {
+  friend _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 auto
+  operator<=>(const flat_multiset& __x, const flat_multiset& __y) {
     return std::lexicographical_compare_three_way(
         __x.begin(), __x.end(), __y.begin(), __y.end(), std::__synth_three_way);
   }
 
-  friend _LIBCPP_HIDE_FROM_ABI void swap(flat_multiset& __x, flat_multiset& __y) noexcept { __x.swap(__y); }
+  friend _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void
+  swap(flat_multiset& __x, flat_multiset& __y) noexcept(noexcept(__x.swap(__y))) {
+    __x.swap(__y);
+  }
 
 private:
   template <bool _WasSorted, class... _Args>
-  _LIBCPP_HIDE_FROM_ABI void __append_sort_merge(_Args&&... __args) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void __append_sort_merge(_Args&&... __args) {
     auto __on_failure    = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; });
     size_type __old_size = size();
     __flat_set_utils::__append(*this, std::forward<_Args>(__args)...);
@@ -604,20 +671,20 @@ private:
       ranges::sort(__keys_.begin() + __old_size, __keys_.end(), __compare_);
     } else {
       _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(
-          ranges::is_sorted(__keys_ | ranges::views::drop(__old_size)), "Key container is not sorted");
+          ranges::is_sorted(__keys_ | ranges::views::drop(__old_size), __compare_), "Key container is not sorted");
     }
     ranges::inplace_merge(__keys_.begin(), __keys_.begin() + __old_size, __keys_.end(), __compare_);
     __on_failure.__complete();
   }
 
   template <class _Kp>
-  _LIBCPP_HIDE_FROM_ABI iterator __emplace(_Kp&& __key) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator __emplace(_Kp&& __key) {
     auto __it = upper_bound(__key);
     return __flat_set_utils::__emplace_exact_pos(*this, __it, std::forward<_Kp>(__key));
   }
 
   template <class _Kp>
-  _LIBCPP_HIDE_FROM_ABI iterator __emplace_hint(const_iterator __hint, _Kp&& __key) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator __emplace_hint(const_iterator __hint, _Kp&& __key) {
     auto __prev_larger  = __hint != cbegin() && __compare_(__key, *std::prev(__hint));
     auto __next_smaller = __hint != cend() && __compare_(*__hint, __key);
 
@@ -649,7 +716,7 @@ private:
   }
 
   template <class _Self, class _Kp>
-  _LIBCPP_HIDE_FROM_ABI static auto __find_impl(_Self&& __self, const _Kp& __key) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 static auto __find_impl(_Self&& __self, const _Kp& __key) {
     auto __it   = __self.lower_bound(__key);
     auto __last = __self.end();
     if (__it == __last || __self.__compare_(__key, *__it)) {
@@ -659,29 +726,30 @@ private:
   }
 
   template <class _Self, class _Kp>
-  _LIBCPP_HIDE_FROM_ABI static auto __equal_range_impl(_Self&& __self, const _Kp& __key) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 static auto __equal_range_impl(_Self&& __self, const _Kp& __key) {
     using __iter = _If<is_const_v<__libcpp_remove_reference_t<_Self>>, const_iterator, iterator>;
     auto [__key_first, __key_last] =
         std::equal_range(__self.__keys_.begin(), __self.__keys_.end(), __key, __self.__compare_);
     return std::make_pair(__iter(__key_first), __iter(__key_last));
   }
 
-  _LIBCPP_HIDE_FROM_ABI void __reserve(size_t __size) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void __reserve(size_t __size) {
     if constexpr (__container_traits<_KeyContainer>::__reservable) {
       __keys_.reserve(__size);
     }
   }
 
   template <class _Key2, class _Compare2, class _KeyContainer2, class _Predicate>
-  friend typename flat_multiset<_Key2, _Compare2, _KeyContainer2>::size_type
+  friend typename flat_multiset<_Key2, _Compare2, _KeyContainer2>::size_type _LIBCPP_CONSTEXPR_SINCE_CXX26
   erase_if(flat_multiset<_Key2, _Compare2, _KeyContainer2>&, _Predicate);
 
   _KeyContainer __keys_;
   _LIBCPP_NO_UNIQUE_ADDRESS key_compare __compare_;
 
   struct __key_equiv {
-    _LIBCPP_HIDE_FROM_ABI __key_equiv(key_compare __c) : __comp_(__c) {}
-    _LIBCPP_HIDE_FROM_ABI bool operator()(const_reference __x, const_reference __y) const {
+    _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 __key_equiv(key_compare __c) : __comp_(__c) {}
+    _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool
+    operator()(const_reference __x, const_reference __y) const {
       return !__comp_(std::get<0>(__x), std::get<0>(__y)) && !__comp_(std::get<0>(__y), std::get<0>(__x));
     }
     key_compare __comp_;
@@ -689,7 +757,7 @@ private:
 };
 
 template <class _KeyContainer, class _Compare = less<typename _KeyContainer::value_type>>
-  requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value &&
+  requires(!__is_allocator_v<_Compare> && !__is_allocator_v<_KeyContainer> &&
            is_invocable_v<const _Compare&,
                           const typename _KeyContainer::value_type&,
                           const typename _KeyContainer::value_type&>)
@@ -697,12 +765,12 @@ flat_multiset(_KeyContainer, _Compare = _Compare())
     -> flat_multiset<typename _KeyContainer::value_type, _Compare, _KeyContainer>;
 
 template <class _KeyContainer, class _Allocator>
-  requires(uses_allocator_v<_KeyContainer, _Allocator> && !__is_allocator<_KeyContainer>::value)
+  requires(uses_allocator_v<_KeyContainer, _Allocator> && !__is_allocator_v<_KeyContainer>)
 flat_multiset(_KeyContainer, _Allocator)
     -> flat_multiset<typename _KeyContainer::value_type, less<typename _KeyContainer::value_type>, _KeyContainer>;
 
 template <class _KeyContainer, class _Compare, class _Allocator>
-  requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value &&
+  requires(!__is_allocator_v<_Compare> && !__is_allocator_v<_KeyContainer> &&
            uses_allocator_v<_KeyContainer, _Allocator> &&
            is_invocable_v<const _Compare&,
                           const typename _KeyContainer::value_type&,
@@ -711,7 +779,7 @@ flat_multiset(_KeyContainer, _Compare, _Allocator)
     -> flat_multiset<typename _KeyContainer::value_type, _Compare, _KeyContainer>;
 
 template <class _KeyContainer, class _Compare = less<typename _KeyContainer::value_type>>
-  requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value &&
+  requires(!__is_allocator_v<_Compare> && !__is_allocator_v<_KeyContainer> &&
            is_invocable_v<const _Compare&,
                           const typename _KeyContainer::value_type&,
                           const typename _KeyContainer::value_type&>)
@@ -719,12 +787,12 @@ flat_multiset(sorted_equivalent_t, _KeyContainer, _Compare = _Compare())
     -> flat_multiset<typename _KeyContainer::value_type, _Compare, _KeyContainer>;
 
 template <class _KeyContainer, class _Allocator>
-  requires(uses_allocator_v<_KeyContainer, _Allocator> && !__is_allocator<_KeyContainer>::value)
+  requires(uses_allocator_v<_KeyContainer, _Allocator> && !__is_allocator_v<_KeyContainer>)
 flat_multiset(sorted_equivalent_t, _KeyContainer, _Allocator)
     -> flat_multiset<typename _KeyContainer::value_type, less<typename _KeyContainer::value_type>, _KeyContainer>;
 
 template <class _KeyContainer, class _Compare, class _Allocator>
-  requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value &&
+  requires(!__is_allocator_v<_Compare> && !__is_allocator_v<_KeyContainer> &&
            uses_allocator_v<_KeyContainer, _Allocator> &&
            is_invocable_v<const _Compare&,
                           const typename _KeyContainer::value_type&,
@@ -732,37 +800,37 @@ template <class _KeyContainer, class _Compare, class _Allocator>
 flat_multiset(sorted_equivalent_t, _KeyContainer, _Compare, _Allocator)
     -> flat_multiset<typename _KeyContainer::value_type, _Compare, _KeyContainer>;
 
-template <class _InputIterator, class _Compare = less<__iter_value_type<_InputIterator>>>
-  requires(__has_input_iterator_category<_InputIterator>::value && !__is_allocator<_Compare>::value)
+template <class _InputIterator, class _Compare = less<__iterator_value_type<_InputIterator>>>
+  requires(__has_input_iterator_category<_InputIterator>::value && !__is_allocator_v<_Compare>)
 flat_multiset(_InputIterator, _InputIterator, _Compare = _Compare())
-    -> flat_multiset<__iter_value_type<_InputIterator>, _Compare>;
+    -> flat_multiset<__iterator_value_type<_InputIterator>, _Compare>;
 
-template <class _InputIterator, class _Compare = less<__iter_value_type<_InputIterator>>>
-  requires(__has_input_iterator_category<_InputIterator>::value && !__is_allocator<_Compare>::value)
+template <class _InputIterator, class _Compare = less<__iterator_value_type<_InputIterator>>>
+  requires(__has_input_iterator_category<_InputIterator>::value && !__is_allocator_v<_Compare>)
 flat_multiset(sorted_equivalent_t, _InputIterator, _InputIterator, _Compare = _Compare())
-    -> flat_multiset<__iter_value_type<_InputIterator>, _Compare>;
+    -> flat_multiset<__iterator_value_type<_InputIterator>, _Compare>;
 
 template <ranges::input_range _Range,
           class _Compare   = less<ranges::range_value_t<_Range>>,
           class _Allocator = allocator<ranges::range_value_t<_Range>>,
-          class            = __enable_if_t<!__is_allocator<_Compare>::value && __is_allocator<_Allocator>::value>>
+          class            = __enable_if_t<!__is_allocator_v<_Compare> && __is_allocator_v<_Allocator>>>
 flat_multiset(from_range_t, _Range&&, _Compare = _Compare(), _Allocator = _Allocator()) -> flat_multiset<
     ranges::range_value_t<_Range>,
     _Compare,
     vector<ranges::range_value_t<_Range>, __allocator_traits_rebind_t<_Allocator, ranges::range_value_t<_Range>>>>;
 
-template <ranges::input_range _Range, class _Allocator, class = __enable_if_t<__is_allocator<_Allocator>::value>>
+template <ranges::input_range _Range, class _Allocator, class = __enable_if_t<__is_allocator_v<_Allocator>>>
 flat_multiset(from_range_t, _Range&&, _Allocator) -> flat_multiset<
     ranges::range_value_t<_Range>,
     less<ranges::range_value_t<_Range>>,
     vector<ranges::range_value_t<_Range>, __allocator_traits_rebind_t<_Allocator, ranges::range_value_t<_Range>>>>;
 
 template <class _Key, class _Compare = less<_Key>>
-  requires(!__is_allocator<_Compare>::value)
+  requires(!__is_allocator_v<_Compare>)
 flat_multiset(initializer_list<_Key>, _Compare = _Compare()) -> flat_multiset<_Key, _Compare>;
 
 template <class _Key, class _Compare = less<_Key>>
-  requires(!__is_allocator<_Compare>::value)
+  requires(!__is_allocator_v<_Compare>)
 flat_multiset(sorted_equivalent_t, initializer_list<_Key>, _Compare = _Compare()) -> flat_multiset<_Key, _Compare>;
 
 template <class _Key, class _Compare, class _KeyContainer, class _Allocator>
@@ -770,7 +838,7 @@ struct uses_allocator<flat_multiset<_Key, _Compare, _KeyContainer>, _Allocator>
     : bool_constant<uses_allocator_v<_KeyContainer, _Allocator> > {};
 
 template <class _Key, class _Compare, class _KeyContainer, class _Predicate>
-_LIBCPP_HIDE_FROM_ABI typename flat_multiset<_Key, _Compare, _KeyContainer>::size_type
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 typename flat_multiset<_Key, _Compare, _KeyContainer>::size_type
 erase_if(flat_multiset<_Key, _Compare, _KeyContainer>& __flat_multiset, _Predicate __pred) {
   auto __guard = std::__make_exception_guard([&] { __flat_multiset.clear(); });
   auto __it =
diff --git a/lib/libcxx/include/__flat_set/flat_set.h b/lib/libcxx/include/__flat_set/flat_set.h
index 95cb998459..57c3926e33 100644
--- a/lib/libcxx/include/__flat_set/flat_set.h
+++ b/lib/libcxx/include/__flat_set/flat_set.h
@@ -12,7 +12,6 @@
 
 #include <__algorithm/lexicographical_compare_three_way.h>
 #include <__algorithm/lower_bound.h>
-#include <__algorithm/min.h>
 #include <__algorithm/ranges_adjacent_find.h>
 #include <__algorithm/ranges_equal.h>
 #include <__algorithm/ranges_inplace_merge.h>
@@ -24,20 +23,16 @@
 #include <__compare/synth_three_way.h>
 #include <__concepts/swappable.h>
 #include <__config>
-#include <__cstddef/ptrdiff_t.h>
 #include <__flat_map/sorted_unique.h>
 #include <__flat_set/ra_iterator.h>
 #include <__flat_set/utils.h>
-#include <__functional/invoke.h>
 #include <__functional/is_transparent.h>
 #include <__functional/operations.h>
 #include <__fwd/vector.h>
 #include <__iterator/concepts.h>
-#include <__iterator/distance.h>
 #include <__iterator/iterator_traits.h>
 #include <__iterator/next.h>
 #include <__iterator/prev.h>
-#include <__iterator/ranges_iterator_traits.h>
 #include <__iterator/reverse_iterator.h>
 #include <__memory/allocator_traits.h>
 #include <__memory/uses_allocator.h>
@@ -47,10 +42,7 @@
 #include <__ranges/container_compatible_range.h>
 #include <__ranges/drop_view.h>
 #include <__ranges/from_range.h>
-#include <__ranges/ref_view.h>
 #include <__ranges/size.h>
-#include <__ranges/subrange.h>
-#include <__type_traits/conjunction.h>
 #include <__type_traits/container_traits.h>
 #include <__type_traits/invoke.h>
 #include <__type_traits/is_allocator.h>
@@ -347,38 +339,42 @@ public:
   }
 
   // iterators
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator begin() noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator begin() noexcept {
     return iterator(std::as_const(__keys_).begin());
   }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator begin() const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator begin() const noexcept {
     return const_iterator(__keys_.begin());
   }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator end() noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator end() noexcept {
     return iterator(std::as_const(__keys_).end());
   }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator end() const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator end() const noexcept {
     return const_iterator(__keys_.end());
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 reverse_iterator rbegin() noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 reverse_iterator rbegin() noexcept {
     return reverse_iterator(end());
   }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator rbegin() const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator rbegin() const noexcept {
     return const_reverse_iterator(end());
   }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 reverse_iterator rend() noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 reverse_iterator rend() noexcept {
     return reverse_iterator(begin());
   }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator rend() const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator rend() const noexcept {
     return const_reverse_iterator(begin());
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator cbegin() const noexcept { return begin(); }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator cend() const noexcept { return end(); }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator crbegin() const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator cbegin() const noexcept {
+    return begin();
+  }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator cend() const noexcept {
+    return end();
+  }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator crbegin() const noexcept {
     return const_reverse_iterator(end());
   }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator crend() const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator crend() const noexcept {
     return const_reverse_iterator(begin());
   }
 
@@ -387,9 +383,13 @@ public:
     return __keys_.empty();
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type size() const noexcept { return __keys_.size(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type size() const noexcept {
+    return __keys_.size();
+  }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type max_size() const noexcept { return __keys_.max_size(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type max_size() const noexcept {
+    return __keys_.max_size();
+  }
 
   // [flat.set.modifiers], modifiers
   template <class... _Args>
@@ -466,6 +466,15 @@ public:
     __append_sort_merge_unique</*WasSorted = */ false>(std::forward<_Range>(__range));
   }
 
+  template <_ContainerCompatibleRange<value_type> _Range>
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void insert_range(std::sorted_unique_t, _Range&& __range) {
+    if constexpr (ranges::sized_range<_Range>) {
+      __reserve(ranges::size(__range));
+    }
+
+    __append_sort_merge_unique</*WasSorted = */ true>(std::forward<_Range>(__range));
+  }
+
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void insert(initializer_list<value_type> __il) {
     insert(__il.begin(), __il.end());
   }
@@ -474,7 +483,7 @@ public:
     insert(sorted_unique, __il.begin(), __il.end());
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 container_type extract() && {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 container_type extract() && {
     auto __guard = std::__make_scope_guard([&]() noexcept { clear() /* noexcept */; });
     auto __ret   = std::move(__keys_);
     return __ret;
@@ -524,123 +533,131 @@ public:
     return iterator(std::move(__key_it));
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void swap(flat_set& __y) noexcept {
-    // warning: The spec has unconditional noexcept, which means that
-    // if any of the following functions throw an exception,
-    // std::terminate will be called.
-    // This is discussed in P2767, which hasn't been voted on yet.
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void
+  swap(flat_set& __y) noexcept(is_nothrow_swappable_v<container_type> && is_nothrow_swappable_v<key_compare>) {
+    auto __on_failure = std::__make_exception_guard([&]() noexcept {
+      clear() /* noexcept */;
+      __y.clear() /* noexcept */;
+    });
     ranges::swap(__compare_, __y.__compare_);
     ranges::swap(__keys_, __y.__keys_);
+    __on_failure.__complete();
   }
 
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void clear() noexcept { __keys_.clear(); }
 
   // observers
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 key_compare key_comp() const { return __compare_; }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 value_compare value_comp() const { return __compare_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 key_compare key_comp() const { return __compare_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 value_compare value_comp() const {
+    return __compare_;
+  }
 
   // set operations
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator find(const key_type& __x) {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator find(const key_type& __x) {
     return __find_impl(*this, __x);
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator find(const key_type& __x) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator find(const key_type& __x) const {
     return __find_impl(*this, __x);
   }
 
   template <class _Kp>
     requires __is_transparent_v<_Compare>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator find(const _Kp& __x) {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator find(const _Kp& __x) {
     return __find_impl(*this, __x);
   }
 
   template <class _Kp>
     requires __is_transparent_v<_Compare>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator find(const _Kp& __x) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator find(const _Kp& __x) const {
     return __find_impl(*this, __x);
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type count(const key_type& __x) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type count(const key_type& __x) const {
     return contains(__x) ? 1 : 0;
   }
 
   template <class _Kp>
     requires __is_transparent_v<_Compare>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type count(const _Kp& __x) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type count(const _Kp& __x) const {
     return contains(__x) ? 1 : 0;
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool contains(const key_type& __x) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool contains(const key_type& __x) const {
     return find(__x) != end();
   }
 
   template <class _Kp>
     requires __is_transparent_v<_Compare>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool contains(const _Kp& __x) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool contains(const _Kp& __x) const {
     return find(__x) != end();
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator lower_bound(const key_type& __x) {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator lower_bound(const key_type& __x) {
     const auto& __keys = __keys_;
     return iterator(std::lower_bound(__keys.begin(), __keys.end(), __x, __compare_));
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator lower_bound(const key_type& __x) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator
+  lower_bound(const key_type& __x) const {
     return const_iterator(std::lower_bound(__keys_.begin(), __keys_.end(), __x, __compare_));
   }
 
   template <class _Kp>
     requires __is_transparent_v<_Compare>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator lower_bound(const _Kp& __x) {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator lower_bound(const _Kp& __x) {
     const auto& __keys = __keys_;
     return iterator(std::lower_bound(__keys.begin(), __keys.end(), __x, __compare_));
   }
 
   template <class _Kp>
     requires __is_transparent_v<_Compare>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator lower_bound(const _Kp& __x) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator lower_bound(const _Kp& __x) const {
     return const_iterator(std::lower_bound(__keys_.begin(), __keys_.end(), __x, __compare_));
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator upper_bound(const key_type& __x) {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator upper_bound(const key_type& __x) {
     const auto& __keys = __keys_;
     return iterator(std::upper_bound(__keys.begin(), __keys.end(), __x, __compare_));
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator upper_bound(const key_type& __x) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator
+  upper_bound(const key_type& __x) const {
     return const_iterator(std::upper_bound(__keys_.begin(), __keys_.end(), __x, __compare_));
   }
 
   template <class _Kp>
     requires __is_transparent_v<_Compare>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator upper_bound(const _Kp& __x) {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator upper_bound(const _Kp& __x) {
     const auto& __keys = __keys_;
     return iterator(std::upper_bound(__keys.begin(), __keys.end(), __x, __compare_));
   }
 
   template <class _Kp>
     requires __is_transparent_v<_Compare>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator upper_bound(const _Kp& __x) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator upper_bound(const _Kp& __x) const {
     return const_iterator(std::upper_bound(__keys_.begin(), __keys_.end(), __x, __compare_));
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair<iterator, iterator> equal_range(const key_type& __x) {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair<iterator, iterator>
+  equal_range(const key_type& __x) {
     return __equal_range_impl(*this, __x);
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair<const_iterator, const_iterator>
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair<const_iterator, const_iterator>
   equal_range(const key_type& __x) const {
     return __equal_range_impl(*this, __x);
   }
 
   template <class _Kp>
     requires __is_transparent_v<_Compare>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair<iterator, iterator> equal_range(const _Kp& __x) {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair<iterator, iterator>
+  equal_range(const _Kp& __x) {
     return __equal_range_impl(*this, __x);
   }
   template <class _Kp>
     requires __is_transparent_v<_Compare>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair<const_iterator, const_iterator>
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair<const_iterator, const_iterator>
   equal_range(const _Kp& __x) const {
     return __equal_range_impl(*this, __x);
   }
@@ -655,13 +672,14 @@ public:
         __x.begin(), __x.end(), __y.begin(), __y.end(), std::__synth_three_way);
   }
 
-  friend _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void swap(flat_set& __x, flat_set& __y) noexcept {
+  friend _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void
+  swap(flat_set& __x, flat_set& __y) noexcept(noexcept(__x.swap(__y))) {
     __x.swap(__y);
   }
 
 private:
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool __is_sorted_and_unique(auto&& __key_container) const {
-    auto __greater_or_equal_to = [this](const auto& __x, const auto& __y) { return !__compare_(__x, __y); };
+    auto __greater_or_equal_to = [this](const auto& __x, const auto& __y) -> bool { return !__compare_(__x, __y); };
     return ranges::adjacent_find(__key_container, __greater_or_equal_to) == ranges::end(__key_container);
   }
 
@@ -774,19 +792,19 @@ private:
 };
 
 template <class _KeyContainer, class _Compare = less<typename _KeyContainer::value_type>>
-  requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value &&
+  requires(!__is_allocator_v<_Compare> && !__is_allocator_v<_KeyContainer> &&
            is_invocable_v<const _Compare&,
                           const typename _KeyContainer::value_type&,
                           const typename _KeyContainer::value_type&>)
 flat_set(_KeyContainer, _Compare = _Compare()) -> flat_set<typename _KeyContainer::value_type, _Compare, _KeyContainer>;
 
 template <class _KeyContainer, class _Allocator>
-  requires(uses_allocator_v<_KeyContainer, _Allocator> && !__is_allocator<_KeyContainer>::value)
+  requires(uses_allocator_v<_KeyContainer, _Allocator> && !__is_allocator_v<_KeyContainer>)
 flat_set(_KeyContainer, _Allocator)
     -> flat_set<typename _KeyContainer::value_type, less<typename _KeyContainer::value_type>, _KeyContainer>;
 
 template <class _KeyContainer, class _Compare, class _Allocator>
-  requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value &&
+  requires(!__is_allocator_v<_Compare> && !__is_allocator_v<_KeyContainer> &&
            uses_allocator_v<_KeyContainer, _Allocator> &&
            is_invocable_v<const _Compare&,
                           const typename _KeyContainer::value_type&,
@@ -794,7 +812,7 @@ template <class _KeyContainer, class _Compare, class _Allocator>
 flat_set(_KeyContainer, _Compare, _Allocator) -> flat_set<typename _KeyContainer::value_type, _Compare, _KeyContainer>;
 
 template <class _KeyContainer, class _Compare = less<typename _KeyContainer::value_type>>
-  requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value &&
+  requires(!__is_allocator_v<_Compare> && !__is_allocator_v<_KeyContainer> &&
            is_invocable_v<const _Compare&,
                           const typename _KeyContainer::value_type&,
                           const typename _KeyContainer::value_type&>)
@@ -802,12 +820,12 @@ flat_set(sorted_unique_t, _KeyContainer, _Compare = _Compare())
     -> flat_set<typename _KeyContainer::value_type, _Compare, _KeyContainer>;
 
 template <class _KeyContainer, class _Allocator>
-  requires(uses_allocator_v<_KeyContainer, _Allocator> && !__is_allocator<_KeyContainer>::value)
+  requires(uses_allocator_v<_KeyContainer, _Allocator> && !__is_allocator_v<_KeyContainer>)
 flat_set(sorted_unique_t, _KeyContainer, _Allocator)
     -> flat_set<typename _KeyContainer::value_type, less<typename _KeyContainer::value_type>, _KeyContainer>;
 
 template <class _KeyContainer, class _Compare, class _Allocator>
-  requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value &&
+  requires(!__is_allocator_v<_Compare> && !__is_allocator_v<_KeyContainer> &&
            uses_allocator_v<_KeyContainer, _Allocator> &&
            is_invocable_v<const _Compare&,
                           const typename _KeyContainer::value_type&,
@@ -815,37 +833,37 @@ template <class _KeyContainer, class _Compare, class _Allocator>
 flat_set(sorted_unique_t, _KeyContainer, _Compare, _Allocator)
     -> flat_set<typename _KeyContainer::value_type, _Compare, _KeyContainer>;
 
-template <class _InputIterator, class _Compare = less<__iter_value_type<_InputIterator>>>
-  requires(__has_input_iterator_category<_InputIterator>::value && !__is_allocator<_Compare>::value)
+template <class _InputIterator, class _Compare = less<__iterator_value_type<_InputIterator>>>
+  requires(__has_input_iterator_category<_InputIterator>::value && !__is_allocator_v<_Compare>)
 flat_set(_InputIterator, _InputIterator, _Compare = _Compare())
-    -> flat_set<__iter_value_type<_InputIterator>, _Compare>;
+    -> flat_set<__iterator_value_type<_InputIterator>, _Compare>;
 
-template <class _InputIterator, class _Compare = less<__iter_value_type<_InputIterator>>>
-  requires(__has_input_iterator_category<_InputIterator>::value && !__is_allocator<_Compare>::value)
+template <class _InputIterator, class _Compare = less<__iterator_value_type<_InputIterator>>>
+  requires(__has_input_iterator_category<_InputIterator>::value && !__is_allocator_v<_Compare>)
 flat_set(sorted_unique_t, _InputIterator, _InputIterator, _Compare = _Compare())
-    -> flat_set<__iter_value_type<_InputIterator>, _Compare>;
+    -> flat_set<__iterator_value_type<_InputIterator>, _Compare>;
 
 template <ranges::input_range _Range,
           class _Compare   = less<ranges::range_value_t<_Range>>,
           class _Allocator = allocator<ranges::range_value_t<_Range>>,
-          class            = __enable_if_t<!__is_allocator<_Compare>::value && __is_allocator<_Allocator>::value>>
+          class            = __enable_if_t<!__is_allocator_v<_Compare> && __is_allocator_v<_Allocator>>>
 flat_set(from_range_t, _Range&&, _Compare = _Compare(), _Allocator = _Allocator()) -> flat_set<
     ranges::range_value_t<_Range>,
     _Compare,
     vector<ranges::range_value_t<_Range>, __allocator_traits_rebind_t<_Allocator, ranges::range_value_t<_Range>>>>;
 
-template <ranges::input_range _Range, class _Allocator, class = __enable_if_t<__is_allocator<_Allocator>::value>>
+template <ranges::input_range _Range, class _Allocator, class = __enable_if_t<__is_allocator_v<_Allocator>>>
 flat_set(from_range_t, _Range&&, _Allocator) -> flat_set<
     ranges::range_value_t<_Range>,
     less<ranges::range_value_t<_Range>>,
     vector<ranges::range_value_t<_Range>, __allocator_traits_rebind_t<_Allocator, ranges::range_value_t<_Range>>>>;
 
 template <class _Key, class _Compare = less<_Key>>
-  requires(!__is_allocator<_Compare>::value)
+  requires(!__is_allocator_v<_Compare>)
 flat_set(initializer_list<_Key>, _Compare = _Compare()) -> flat_set<_Key, _Compare>;
 
 template <class _Key, class _Compare = less<_Key>>
-  requires(!__is_allocator<_Compare>::value)
+  requires(!__is_allocator_v<_Compare>)
 flat_set(sorted_unique_t, initializer_list<_Key>, _Compare = _Compare()) -> flat_set<_Key, _Compare>;
 
 template <class _Key, class _Compare, class _KeyContainer, class _Allocator>
diff --git a/lib/libcxx/include/__format/concepts.h b/lib/libcxx/include/__format/concepts.h
index 28297c612d..5b603701c0 100644
--- a/lib/libcxx/include/__format/concepts.h
+++ b/lib/libcxx/include/__format/concepts.h
@@ -15,12 +15,8 @@
 #include <__config>
 #include <__format/format_parse_context.h>
 #include <__fwd/format.h>
-#include <__fwd/tuple.h>
-#include <__tuple/tuple_size.h>
-#include <__type_traits/is_specialization.h>
 #include <__type_traits/remove_const.h>
 #include <__type_traits/remove_reference.h>
-#include <__utility/pair.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
@@ -65,16 +61,6 @@ concept __formattable =
 #  if _LIBCPP_STD_VER >= 23
 template <class _Tp, class _CharT>
 concept formattable = __formattable<_Tp, _CharT>;
-
-// [tuple.like] defines a tuple-like exposition only concept. This concept is
-// not related to that. Therefore it uses a different name for the concept.
-//
-// TODO FMT Add a test to validate we fail when using that concept after P2165
-// has been implemented.
-template <class _Tp>
-concept __fmt_pair_like =
-    __is_specialization_v<_Tp, pair> || (__is_specialization_v<_Tp, tuple> && tuple_size_v<_Tp> == 2);
-
 #  endif // _LIBCPP_STD_VER >= 23
 #endif   // _LIBCPP_STD_VER >= 20
 
diff --git a/lib/libcxx/include/__format/extended_grapheme_cluster_table.h b/lib/libcxx/include/__format/extended_grapheme_cluster_table.h
index f76e018df7..6da07862d4 100644
--- a/lib/libcxx/include/__format/extended_grapheme_cluster_table.h
+++ b/lib/libcxx/include/__format/extended_grapheme_cluster_table.h
@@ -61,7 +61,7 @@
 #ifndef _LIBCPP___FORMAT_EXTENDED_GRAPHEME_CLUSTER_TABLE_H
 #define _LIBCPP___FORMAT_EXTENDED_GRAPHEME_CLUSTER_TABLE_H
 
-#include <__algorithm/ranges_upper_bound.h>
+#include <__algorithm/upper_bound.h>
 #include <__config>
 #include <__cstddef/ptrdiff_t.h>
 #include <__iterator/access.h>
@@ -1647,7 +1647,8 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[1501] = {
   // size. Then the upper bound for code point 3 will return the entry after
   // 0x1810. After moving to the previous entry the algorithm arrives at the
   // correct entry.
-  ptrdiff_t __i = std::ranges::upper_bound(__entries, (__code_point << 11) | 0x7ffu) - __entries;
+  ptrdiff_t __i =
+      std::upper_bound(std::begin(__entries), std::end(__entries), (__code_point << 11) | 0x7ffu) - __entries;
   if (__i == 0)
     return __property::__none;
 
diff --git a/lib/libcxx/include/__format/fmt_pair_like.h b/lib/libcxx/include/__format/fmt_pair_like.h
new file mode 100644
index 0000000000..d2f2f54d5a
--- /dev/null
+++ b/lib/libcxx/include/__format/fmt_pair_like.h
@@ -0,0 +1,42 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___FORMAT_FMT_PAIR_LIKE_H
+#define _LIBCPP___FORMAT_FMT_PAIR_LIKE_H
+
+#include <__config>
+#include <__fwd/pair.h>
+#include <__fwd/tuple.h>
+#include <__tuple/tuple_size.h>
+#include <__type_traits/is_specialization.h>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER >= 23
+
+// [tuple.like] defines a tuple-like exposition only concept. This concept is not related to that. Therefore it uses a
+// different name for the concept.
+//
+// TODO FMT Add a test to validate we fail when using that concept after P2165 has been implemented.
+
+// [format.range.fmtkind]/2.2.1 and [tab:formatter.range.type]:
+// "U is either a specialization of pair or a specialization of tuple such that tuple_size_v<U> is 2."
+template <class _Tp>
+concept __fmt_pair_like =
+    __is_specialization_v<_Tp, pair> || (__is_specialization_v<_Tp, tuple> && tuple_size_v<_Tp> == 2);
+
+#endif // _LIBCPP_STD_VER >= 23
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___FORMAT_FMT_PAIR_LIKE_H
diff --git a/lib/libcxx/include/__format/format_arg.h b/lib/libcxx/include/__format/format_arg.h
index ed5e76275e..19794f0f08 100644
--- a/lib/libcxx/include/__format/format_arg.h
+++ b/lib/libcxx/include/__format/format_arg.h
@@ -149,7 +149,7 @@ _LIBCPP_HIDE_FROM_ABI decltype(auto) __visit_format_arg(_Visitor&& __vis, basic_
   __libcpp_unreachable();
 }
 
-#  if _LIBCPP_STD_VER >= 26 && _LIBCPP_HAS_EXPLICIT_THIS_PARAMETER
+#  if _LIBCPP_STD_VER >= 26
 
 template <class _Rp, class _Visitor, class _Context>
 _LIBCPP_HIDE_FROM_ABI _Rp __visit_format_arg(_Visitor&& __vis, basic_format_arg<_Context> __arg) {
@@ -200,7 +200,7 @@ _LIBCPP_HIDE_FROM_ABI _Rp __visit_format_arg(_Visitor&& __vis, basic_format_arg<
   __libcpp_unreachable();
 }
 
-#  endif // _LIBCPP_STD_VER >= 26 && _LIBCPP_HAS_EXPLICIT_THIS_PARAMETER
+#  endif // _LIBCPP_STD_VER >= 26
 
 /// Contains the values used in basic_format_arg.
 ///
@@ -285,7 +285,7 @@ public:
 
   _LIBCPP_HIDE_FROM_ABI explicit operator bool() const noexcept { return __type_ != __format::__arg_t::__none; }
 
-#  if _LIBCPP_STD_VER >= 26 && _LIBCPP_HAS_EXPLICIT_THIS_PARAMETER
+#  if _LIBCPP_STD_VER >= 26
 
   // This function is user facing, so it must wrap the non-standard types of
   // the "variant" in a handle to stay conforming. See __arg_t for more details.
@@ -329,7 +329,7 @@ public:
     }
   }
 
-#  endif // _LIBCPP_STD_VER >= 26 && _LIBCPP_HAS_EXPLICIT_THIS_PARAMETER
+#  endif // _LIBCPP_STD_VER >= 26
 
 private:
   using char_type = typename _Context::char_type;
@@ -371,11 +371,8 @@ private:
 // This function is user facing, so it must wrap the non-standard types of
 // the "variant" in a handle to stay conforming. See __arg_t for more details.
 template <class _Visitor, class _Context>
-#  if _LIBCPP_STD_VER >= 26 && _LIBCPP_HAS_EXPLICIT_THIS_PARAMETER
-_LIBCPP_DEPRECATED_IN_CXX26
-#  endif
-    _LIBCPP_HIDE_FROM_ABI decltype(auto)
-    visit_format_arg(_Visitor&& __vis, basic_format_arg<_Context> __arg) {
+_LIBCPP_DEPRECATED_IN_CXX26 _LIBCPP_HIDE_FROM_ABI decltype(auto)
+visit_format_arg(_Visitor&& __vis, basic_format_arg<_Context> __arg) {
   switch (__arg.__type_) {
 #  if _LIBCPP_HAS_INT128
   case __format::__arg_t::__i128: {
@@ -387,7 +384,7 @@ _LIBCPP_DEPRECATED_IN_CXX26
     typename __basic_format_arg_value<_Context>::__handle __h{__arg.__value_.__u128_};
     return std::invoke(std::forward<_Visitor>(__vis), typename basic_format_arg<_Context>::handle{__h});
   }
-#  endif // _LIBCPP_STD_VER >= 26 && _LIBCPP_HAS_EXPLICIT_THIS_PARAMETER
+#  endif // _LIBCPP_HAS_INT128
   default:
     return std::__visit_format_arg(std::forward<_Visitor>(__vis), __arg);
   }
diff --git a/lib/libcxx/include/__format/format_args.h b/lib/libcxx/include/__format/format_args.h
index 9dd7a5ed9c..f1b648a10a 100644
--- a/lib/libcxx/include/__format/format_args.h
+++ b/lib/libcxx/include/__format/format_args.h
@@ -40,7 +40,7 @@ public:
     }
   }
 
-  _LIBCPP_HIDE_FROM_ABI basic_format_arg<_Context> get(size_t __id) const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI basic_format_arg<_Context> get(size_t __id) const noexcept {
     if (__id >= __size_)
       return basic_format_arg<_Context>{};
 
diff --git a/lib/libcxx/include/__format/format_context.h b/lib/libcxx/include/__format/format_context.h
index e672ee7ad0..9732ea9bf7 100644
--- a/lib/libcxx/include/__format/format_context.h
+++ b/lib/libcxx/include/__format/format_context.h
@@ -80,17 +80,17 @@ public:
   template <class _Tp>
   using formatter_type = formatter<_Tp, _CharT>;
 
-  _LIBCPP_HIDE_FROM_ABI basic_format_arg<basic_format_context> arg(size_t __id) const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI basic_format_arg<basic_format_context> arg(size_t __id) const noexcept {
     return __args_.get(__id);
   }
 #  if _LIBCPP_HAS_LOCALIZATION
-  _LIBCPP_HIDE_FROM_ABI std::locale locale() {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI std::locale locale() {
     if (!__loc_)
       __loc_ = std::locale{};
     return *__loc_;
   }
 #  endif
-  _LIBCPP_HIDE_FROM_ABI iterator out() { return std::move(__out_it_); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI iterator out() { return std::move(__out_it_); }
   _LIBCPP_HIDE_FROM_ABI void advance_to(iterator __it) { __out_it_ = std::move(__it); }
 
 private:
@@ -175,13 +175,13 @@ public:
                   __format::__determine_arg_t<basic_format_context, decltype(__arg)>(),
                   __basic_format_arg_value<basic_format_context>(__arg)};
           };
-#  if _LIBCPP_STD_VER >= 26 && _LIBCPP_HAS_EXPLICIT_THIS_PARAMETER
+#  if _LIBCPP_STD_VER >= 26
           return static_cast<_Context*>(__c)->arg(__id).visit(std::move(__visitor));
 #  else
           _LIBCPP_SUPPRESS_DEPRECATED_PUSH
           return std::visit_format_arg(std::move(__visitor), static_cast<_Context*>(__c)->arg(__id));
           _LIBCPP_SUPPRESS_DEPRECATED_POP
-#  endif // _LIBCPP_STD_VER >= 26 && _LIBCPP_HAS_EXPLICIT_THIS_PARAMETER
+#  endif // _LIBCPP_STD_VER >= 26
         }) {
   }
 
diff --git a/lib/libcxx/include/__format/format_parse_context.h b/lib/libcxx/include/__format/format_parse_context.h
index 67b90c7b7e..2eda9d7f1f 100644
--- a/lib/libcxx/include/__format/format_parse_context.h
+++ b/lib/libcxx/include/__format/format_parse_context.h
@@ -41,8 +41,8 @@ public:
   basic_format_parse_context(const basic_format_parse_context&)            = delete;
   basic_format_parse_context& operator=(const basic_format_parse_context&) = delete;
 
-  _LIBCPP_HIDE_FROM_ABI constexpr const_iterator begin() const noexcept { return __begin_; }
-  _LIBCPP_HIDE_FROM_ABI constexpr const_iterator end() const noexcept { return __end_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const_iterator begin() const noexcept { return __begin_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const_iterator end() const noexcept { return __end_; }
   _LIBCPP_HIDE_FROM_ABI constexpr void advance_to(const_iterator __it) { __begin_ = __it; }
 
   _LIBCPP_HIDE_FROM_ABI constexpr size_t next_arg_id() {
diff --git a/lib/libcxx/include/__format/formatter_output.h b/lib/libcxx/include/__format/formatter_output.h
index cc74e3858a..63dd7fcacd 100644
--- a/lib/libcxx/include/__format/formatter_output.h
+++ b/lib/libcxx/include/__format/formatter_output.h
@@ -45,7 +45,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 namespace __formatter {
 
-struct _LIBCPP_EXPORTED_FROM_ABI __padding_size_result {
+struct __padding_size_result {
   size_t __before_;
   size_t __after_;
 };
@@ -151,45 +151,41 @@ _LIBCPP_HIDE_FROM_ABI _OutIt __fill(_OutIt __out_it, size_t __n, _CharT __value)
   }
 }
 
+template <__fmt_char_type _CharT, output_iterator<const _CharT&> _OutIt>
+_LIBCPP_HIDE_FROM_ABI _OutIt __fill(_OutIt __out_it, size_t __n, __format_spec::__code_point<_CharT> __value) {
 #  if _LIBCPP_HAS_UNICODE
-template <__fmt_char_type _CharT, output_iterator<const _CharT&> _OutIt>
-  requires(same_as<_CharT, char>)
-_LIBCPP_HIDE_FROM_ABI _OutIt __fill(_OutIt __out_it, size_t __n, __format_spec::__code_point<_CharT> __value) {
-  std::size_t __bytes = std::countl_one(static_cast<unsigned char>(__value.__data[0]));
-  if (__bytes == 0)
-    return __formatter::__fill(std::move(__out_it), __n, __value.__data[0]);
-
-  for (size_t __i = 0; __i < __n; ++__i)
-    __out_it = __formatter::__copy(
-        std::addressof(__value.__data[0]), std::addressof(__value.__data[0]) + __bytes, std::move(__out_it));
-  return __out_it;
-}
+  if constexpr (same_as<_CharT, char>) {
+    std::size_t __bytes = std::countl_one(static_cast<unsigned char>(__value.__data[0]));
+    if (__bytes == 0)
+      return __formatter::__fill(std::move(__out_it), __n, __value.__data[0]);
 
+    for (size_t __i = 0; __i < __n; ++__i)
+      __out_it = __formatter::__copy(
+          std::addressof(__value.__data[0]), std::addressof(__value.__data[0]) + __bytes, std::move(__out_it));
+    return __out_it;
 #    if _LIBCPP_HAS_WIDE_CHARACTERS
-template <__fmt_char_type _CharT, output_iterator<const _CharT&> _OutIt>
-  requires(same_as<_CharT, wchar_t> && sizeof(wchar_t) == 2)
-_LIBCPP_HIDE_FROM_ABI _OutIt __fill(_OutIt __out_it, size_t __n, __format_spec::__code_point<_CharT> __value) {
-  if (!__unicode::__is_high_surrogate(__value.__data[0]))
-    return __formatter::__fill(std::move(__out_it), __n, __value.__data[0]);
+  } else if constexpr (same_as<_CharT, wchar_t>) {
+    if constexpr (sizeof(wchar_t) == 2) {
+      if (!__unicode::__is_high_surrogate(__value.__data[0]))
+        return __formatter::__fill(std::move(__out_it), __n, __value.__data[0]);
 
-  for (size_t __i = 0; __i < __n; ++__i)
-    __out_it = __formatter::__copy(
-        std::addressof(__value.__data[0]), std::addressof(__value.__data[0]) + 2, std::move(__out_it));
-  return __out_it;
-}
-
-template <__fmt_char_type _CharT, output_iterator<const _CharT&> _OutIt>
-  requires(same_as<_CharT, wchar_t> && sizeof(wchar_t) == 4)
-_LIBCPP_HIDE_FROM_ABI _OutIt __fill(_OutIt __out_it, size_t __n, __format_spec::__code_point<_CharT> __value) {
-  return __formatter::__fill(std::move(__out_it), __n, __value.__data[0]);
-}
+      for (size_t __i = 0; __i < __n; ++__i)
+        __out_it = __formatter::__copy(
+            std::addressof(__value.__data[0]), std::addressof(__value.__data[0]) + 2, std::move(__out_it));
+      return __out_it;
+    } else if constexpr (sizeof(wchar_t) == 4) {
+      return __formatter::__fill(std::move(__out_it), __n, __value.__data[0]);
+    } else {
+      static_assert(false, "expected sizeof(wchar_t) to be 2 or 4");
+    }
 #    endif // _LIBCPP_HAS_WIDE_CHARACTERS
-#  else    // _LIBCPP_HAS_UNICODE
-template <__fmt_char_type _CharT, output_iterator<const _CharT&> _OutIt>
-_LIBCPP_HIDE_FROM_ABI _OutIt __fill(_OutIt __out_it, size_t __n, __format_spec::__code_point<_CharT> __value) {
+  } else {
+    static_assert(false, "Unexpected CharT");
+  }
+#  else  // _LIBCPP_HAS_UNICODE
   return __formatter::__fill(std::move(__out_it), __n, __value.__data[0]);
+#  endif // _LIBCPP_HAS_UNICODE
 }
-#  endif   // _LIBCPP_HAS_UNICODE
 
 /// Writes the input to the output with the required padding.
 ///
diff --git a/lib/libcxx/include/__format/indic_conjunct_break_table.h b/lib/libcxx/include/__format/indic_conjunct_break_table.h
index f48ea62590..d85782d732 100644
--- a/lib/libcxx/include/__format/indic_conjunct_break_table.h
+++ b/lib/libcxx/include/__format/indic_conjunct_break_table.h
@@ -61,7 +61,7 @@
 #ifndef _LIBCPP___FORMAT_INDIC_CONJUNCT_BREAK_TABLE_H
 #define _LIBCPP___FORMAT_INDIC_CONJUNCT_BREAK_TABLE_H
 
-#include <__algorithm/ranges_upper_bound.h>
+#include <__algorithm/upper_bound.h>
 #include <__config>
 #include <__cstddef/ptrdiff_t.h>
 #include <__iterator/access.h>
@@ -531,7 +531,8 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[403] = {
   // size. Then the upper bound for code point 3 will return the entry after
   // 0x1810. After moving to the previous entry the algorithm arrives at the
   // correct entry.
-  ptrdiff_t __i = std::ranges::upper_bound(__entries, (__code_point << 11) | 0x7ffu) - __entries;
+  ptrdiff_t __i =
+      std::upper_bound(std::begin(__entries), std::end(__entries), (__code_point << 11) | 0x7ffu) - __entries;
   if (__i == 0)
     return __property::__none;
 
diff --git a/lib/libcxx/include/__format/range_default_formatter.h b/lib/libcxx/include/__format/range_default_formatter.h
index 7149debb2f..2d2190657b 100644
--- a/lib/libcxx/include/__format/range_default_formatter.h
+++ b/lib/libcxx/include/__format/range_default_formatter.h
@@ -16,10 +16,11 @@
 
 #include <__algorithm/ranges_copy.h>
 #include <__chrono/statically_widen.h>
-#include <__concepts/same_as.h>
 #include <__config>
 #include <__format/concepts.h>
+#include <__format/fmt_pair_like.h>
 #include <__format/formatter.h>
+#include <__format/range_format.h>
 #include <__format/range_formatter.h>
 #include <__iterator/back_insert_iterator.h>
 #include <__ranges/concepts.h>
@@ -42,51 +43,11 @@ concept __const_formattable_range =
 template <class _Rp, class _CharT>
 using __fmt_maybe_const _LIBCPP_NODEBUG = conditional_t<__const_formattable_range<_Rp, _CharT>, const _Rp, _Rp>;
 
-_LIBCPP_DIAGNOSTIC_PUSH
-_LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wshadow")
-_LIBCPP_GCC_DIAGNOSTIC_IGNORED("-Wshadow")
-// This shadows map, set, and string.
-enum class range_format { disabled, map, set, sequence, string, debug_string };
-_LIBCPP_DIAGNOSTIC_POP
-
 // There is no definition of this struct, it's purely intended to be used to
 // generate diagnostics.
 template <class _Rp>
 struct __instantiated_the_primary_template_of_format_kind;
 
-template <class _Rp>
-constexpr range_format format_kind = [] {
-  // [format.range.fmtkind]/1
-  // A program that instantiates the primary template of format_kind is ill-formed.
-  static_assert(sizeof(_Rp) != sizeof(_Rp), "create a template specialization of format_kind for your type");
-  return range_format::disabled;
-}();
-
-template <ranges::input_range _Rp>
-  requires same_as<_Rp, remove_cvref_t<_Rp>>
-inline constexpr range_format format_kind<_Rp> = [] {
-  // [format.range.fmtkind]/2
-
-  // 2.1 If same_as<remove_cvref_t<ranges::range_reference_t<R>>, R> is true,
-  // Otherwise format_kind<R> is range_format::disabled.
-  if constexpr (same_as<remove_cvref_t<ranges::range_reference_t<_Rp>>, _Rp>)
-    return range_format::disabled;
-  // 2.2 Otherwise, if the qualified-id R::key_type is valid and denotes a type:
-  else if constexpr (requires { typename _Rp::key_type; }) {
-    // 2.2.1 If the qualified-id R::mapped_type is valid and denotes a type ...
-    if constexpr (requires { typename _Rp::mapped_type; } &&
-                  // 2.2.1 ... If either U is a specialization of pair or U is a specialization
-                  // of tuple and tuple_size_v<U> == 2
-                  __fmt_pair_like<remove_cvref_t<ranges::range_reference_t<_Rp>>>)
-      return range_format::map;
-    else
-      // 2.2.2 Otherwise format_kind<R> is range_format::set.
-      return range_format::set;
-  } else
-    // 2.3 Otherwise, format_kind<R> is range_format::sequence.
-    return range_format::sequence;
-}();
-
 template <range_format _Kp, ranges::input_range _Rp, class _CharT>
 struct __range_default_formatter;
 
diff --git a/lib/libcxx/include/__format/range_format.h b/lib/libcxx/include/__format/range_format.h
new file mode 100644
index 0000000000..fe43923f9d
--- /dev/null
+++ b/lib/libcxx/include/__format/range_format.h
@@ -0,0 +1,71 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___FORMAT_RANGE_FORMAT_H
+#define _LIBCPP___FORMAT_RANGE_FORMAT_H
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+#include <__concepts/same_as.h>
+#include <__config>
+#include <__format/fmt_pair_like.h>
+#include <__ranges/concepts.h>
+#include <__type_traits/remove_cvref.h>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER >= 23
+
+_LIBCPP_DIAGNOSTIC_PUSH
+_LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wshadow")
+_LIBCPP_GCC_DIAGNOSTIC_IGNORED("-Wshadow")
+// This shadows map, set, and string.
+enum class range_format { disabled, map, set, sequence, string, debug_string };
+_LIBCPP_DIAGNOSTIC_POP
+
+template <class _Rp>
+constexpr range_format format_kind = [] {
+  // [format.range.fmtkind]/1
+  // A program that instantiates the primary template of format_kind is ill-formed.
+  static_assert(sizeof(_Rp) != sizeof(_Rp), "create a template specialization of format_kind for your type");
+  return range_format::disabled;
+}();
+
+template <ranges::input_range _Rp>
+  requires same_as<_Rp, remove_cvref_t<_Rp>>
+inline constexpr range_format format_kind<_Rp> = [] {
+  // [format.range.fmtkind]/2
+
+  // 2.1 If same_as<remove_cvref_t<ranges::range_reference_t<R>>, R> is true,
+  // Otherwise format_kind<R> is range_format::disabled.
+  if constexpr (same_as<remove_cvref_t<ranges::range_reference_t<_Rp>>, _Rp>)
+    return range_format::disabled;
+  // 2.2 Otherwise, if the qualified-id R::key_type is valid and denotes a type:
+  else if constexpr (requires { typename _Rp::key_type; }) {
+    // 2.2.1 If the qualified-id R::mapped_type is valid and denotes a type ...
+    if constexpr (requires { typename _Rp::mapped_type; } &&
+                  // 2.2.1 ... If either U is a specialization of pair or U is a specialization
+                  // of tuple and tuple_size_v<U> == 2
+                  __fmt_pair_like<remove_cvref_t<ranges::range_reference_t<_Rp>>>)
+      return range_format::map;
+    else
+      // 2.2.2 Otherwise format_kind<R> is range_format::set.
+      return range_format::set;
+  } else
+    // 2.3 Otherwise, format_kind<R> is range_format::sequence.
+    return range_format::sequence;
+}();
+
+#endif // _LIBCPP_STD_VER >= 23
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif
diff --git a/lib/libcxx/include/__format/range_formatter.h b/lib/libcxx/include/__format/range_formatter.h
index 0d7fe9970c..06d2b4cb4b 100644
--- a/lib/libcxx/include/__format/range_formatter.h
+++ b/lib/libcxx/include/__format/range_formatter.h
@@ -20,6 +20,7 @@
 #include <__config>
 #include <__format/buffer.h>
 #include <__format/concepts.h>
+#include <__format/fmt_pair_like.h>
 #include <__format/format_context.h>
 #include <__format/format_error.h>
 #include <__format/formatter.h>
diff --git a/lib/libcxx/include/__format/width_estimation_table.h b/lib/libcxx/include/__format/width_estimation_table.h
index 0ea0b4f413..ae10a77a5b 100644
--- a/lib/libcxx/include/__format/width_estimation_table.h
+++ b/lib/libcxx/include/__format/width_estimation_table.h
@@ -61,9 +61,10 @@
 #ifndef _LIBCPP___FORMAT_WIDTH_ESTIMATION_TABLE_H
 #define _LIBCPP___FORMAT_WIDTH_ESTIMATION_TABLE_H
 
-#include <__algorithm/ranges_upper_bound.h>
+#include <__algorithm/upper_bound.h>
 #include <__config>
 #include <__cstddef/ptrdiff_t.h>
+#include <__iterator/access.h>
 #include <cstdint>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -255,7 +256,8 @@ inline constexpr uint32_t __table_upper_bound = 0x0003fffd;
   if (__code_point < (__entries[0] >> 14))
     return 1;
 
-  ptrdiff_t __i = std::ranges::upper_bound(__entries, (__code_point << 14) | 0x3fffu) - __entries;
+  ptrdiff_t __i =
+      std::upper_bound(std::begin(__entries), std::end(__entries), (__code_point << 14) | 0x3fffu) - __entries;
   if (__i == 0)
     return 1;
 
diff --git a/lib/libcxx/include/__functional/bind.h b/lib/libcxx/include/__functional/bind.h
index 596cce03cd..cbe8660b82 100644
--- a/lib/libcxx/include/__functional/bind.h
+++ b/lib/libcxx/include/__functional/bind.h
@@ -81,17 +81,12 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Tp& __mu(reference_w
   return __t.get();
 }
 
-template <class _Ti, class... _Uj, size_t... _Indx>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __invoke_result_t<_Ti&, _Uj...>
-__mu_expand(_Ti& __ti, tuple<_Uj...>& __uj, __tuple_indices<_Indx...>) {
-  return __ti(std::forward<_Uj>(std::get<_Indx>(__uj))...);
-}
-
 template <class _Ti, class... _Uj, __enable_if_t<is_bind_expression<_Ti>::value, int> = 0>
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __invoke_result_t<_Ti&, _Uj...>
 __mu(_Ti& __ti, tuple<_Uj...>& __uj) {
-  typedef typename __make_tuple_indices<sizeof...(_Uj)>::type __indices;
-  return std::__mu_expand(__ti, __uj, __indices());
+  return [&]<size_t... _Indices>(__index_sequence<_Indices...>) -> __invoke_result_t<_Ti&, _Uj...> {
+    return __ti(std::forward<_Uj>(std::get<_Indices>(__uj))...);
+  }(__index_sequence_for<_Uj...>{});
 }
 
 template <bool _IsPh, class _Ti, class _Uj>
@@ -191,7 +186,7 @@ struct __bind_return<_Fp, const tuple<_BoundArgs...>, _TupleUj, true> {
 
 template <class _Fp, class _BoundArgs, size_t... _Indx, class _Args>
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 typename __bind_return<_Fp, _BoundArgs, _Args>::type
-__apply_functor(_Fp& __f, _BoundArgs& __bound_args, __tuple_indices<_Indx...>, _Args&& __args) {
+__apply_functor(_Fp& __f, _BoundArgs& __bound_args, __index_sequence<_Indx...>, _Args&& __args) {
   return std::__invoke(__f, std::__mu(std::get<_Indx>(__bound_args), __args)...);
 }
 
@@ -205,8 +200,6 @@ private:
   _Fd __f_;
   _Td __bound_args_;
 
-  typedef typename __make_tuple_indices<sizeof...(_BoundArgs)>::type __indices;
-
 public:
   template <
       class _Gp,
@@ -219,14 +212,16 @@ public:
   template <class... _Args>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 typename __bind_return<_Fd, _Td, tuple<_Args&&...> >::type
   operator()(_Args&&... __args) {
-    return std::__apply_functor(__f_, __bound_args_, __indices(), tuple<_Args&&...>(std::forward<_Args>(__args)...));
+    return std::__apply_functor(
+        __f_, __bound_args_, __index_sequence_for<_BoundArgs...>(), tuple<_Args&&...>(std::forward<_Args>(__args)...));
   }
 
   template <class... _Args>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
   typename __bind_return<const _Fd, const _Td, tuple<_Args&&...> >::type
   operator()(_Args&&... __args) const {
-    return std::__apply_functor(__f_, __bound_args_, __indices(), tuple<_Args&&...>(std::forward<_Args>(__args)...));
+    return std::__apply_functor(
+        __f_, __bound_args_, __index_sequence_for<_BoundArgs...>(), tuple<_Args&&...>(std::forward<_Args>(__args)...));
   }
 };
 
@@ -273,14 +268,14 @@ template <class _Rp, class _Fp, class... _BoundArgs>
 struct is_bind_expression<__bind_r<_Rp, _Fp, _BoundArgs...> > : public true_type {};
 
 template <class _Fp, class... _BoundArgs>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bind<_Fp, _BoundArgs...>
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bind<_Fp, _BoundArgs...>
 bind(_Fp&& __f, _BoundArgs&&... __bound_args) {
   typedef __bind<_Fp, _BoundArgs...> type;
   return type(std::forward<_Fp>(__f), std::forward<_BoundArgs>(__bound_args)...);
 }
 
 template <class _Rp, class _Fp, class... _BoundArgs>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bind_r<_Rp, _Fp, _BoundArgs...>
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bind_r<_Rp, _Fp, _BoundArgs...>
 bind(_Fp&& __f, _BoundArgs&&... __bound_args) {
   typedef __bind_r<_Rp, _Fp, _BoundArgs...> type;
   return type(std::forward<_Fp>(__f), std::forward<_BoundArgs>(__bound_args)...);
diff --git a/lib/libcxx/include/__functional/bind_back.h b/lib/libcxx/include/__functional/bind_back.h
index e44768d228..41177144d8 100644
--- a/lib/libcxx/include/__functional/bind_back.h
+++ b/lib/libcxx/include/__functional/bind_back.h
@@ -64,7 +64,7 @@ _LIBCPP_HIDE_FROM_ABI constexpr auto __bind_back(_Fn&& __f, _Args&&... __args) n
 
 #  if _LIBCPP_STD_VER >= 23
 template <class _Fn, class... _Args>
-_LIBCPP_HIDE_FROM_ABI constexpr auto bind_back(_Fn&& __f, _Args&&... __args) {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto bind_back(_Fn&& __f, _Args&&... __args) {
   static_assert(is_constructible_v<decay_t<_Fn>, _Fn>, "bind_back requires decay_t<F> to be constructible from F");
   static_assert(is_move_constructible_v<decay_t<_Fn>>, "bind_back requires decay_t<F> to be move constructible");
   static_assert((is_constructible_v<decay_t<_Args>, _Args> && ...),
diff --git a/lib/libcxx/include/__functional/bind_front.h b/lib/libcxx/include/__functional/bind_front.h
index 87ef3affe8..427accf963 100644
--- a/lib/libcxx/include/__functional/bind_front.h
+++ b/lib/libcxx/include/__functional/bind_front.h
@@ -43,7 +43,7 @@ struct __bind_front_t : __perfect_forward<__bind_front_op, _Fn, _BoundArgs...> {
 template <class _Fn, class... _Args>
   requires is_constructible_v<decay_t<_Fn>, _Fn> && is_move_constructible_v<decay_t<_Fn>> &&
            (is_constructible_v<decay_t<_Args>, _Args> && ...) && (is_move_constructible_v<decay_t<_Args>> && ...)
-_LIBCPP_HIDE_FROM_ABI constexpr auto bind_front(_Fn&& __f, _Args&&... __args) {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto bind_front(_Fn&& __f, _Args&&... __args) {
   return __bind_front_t<decay_t<_Fn>, decay_t<_Args>...>(std::forward<_Fn>(__f), std::forward<_Args>(__args)...);
 }
 
diff --git a/lib/libcxx/include/__functional/function.h b/lib/libcxx/include/__functional/function.h
index dc112ebfd0..121417f90f 100644
--- a/lib/libcxx/include/__functional/function.h
+++ b/lib/libcxx/include/__functional/function.h
@@ -15,16 +15,14 @@
 #include <__cstddef/nullptr_t.h>
 #include <__exception/exception.h>
 #include <__functional/binary_function.h>
-#include <__functional/invoke.h>
 #include <__functional/unary_function.h>
 #include <__memory/addressof.h>
 #include <__type_traits/aligned_storage.h>
 #include <__type_traits/decay.h>
-#include <__type_traits/is_core_convertible.h>
+#include <__type_traits/invoke.h>
 #include <__type_traits/is_scalar.h>
 #include <__type_traits/is_trivially_constructible.h>
 #include <__type_traits/is_trivially_destructible.h>
-#include <__type_traits/is_void.h>
 #include <__type_traits/strip_signature.h>
 #include <__utility/forward.h>
 #include <__utility/move.h>
@@ -95,29 +93,29 @@ template <class _Rp, class _A1, class _A2>
 struct __maybe_derive_from_binary_function<_Rp(_A1, _A2)> : public __binary_function<_A1, _A2, _Rp> {};
 
 template <class _Fp>
-_LIBCPP_HIDE_FROM_ABI bool __not_null(_Fp const&) {
-  return true;
+_LIBCPP_HIDE_FROM_ABI bool __is_null(_Fp const&) {
+  return false;
 }
 
 template <class _Fp>
-_LIBCPP_HIDE_FROM_ABI bool __not_null(_Fp* __ptr) {
-  return __ptr;
+_LIBCPP_HIDE_FROM_ABI bool __is_null(_Fp* __ptr) {
+  return !__ptr;
 }
 
 template <class _Ret, class _Class>
-_LIBCPP_HIDE_FROM_ABI bool __not_null(_Ret _Class::*__ptr) {
-  return __ptr;
+_LIBCPP_HIDE_FROM_ABI bool __is_null(_Ret _Class::* __ptr) {
+  return !__ptr;
 }
 
 template <class _Fp>
-_LIBCPP_HIDE_FROM_ABI bool __not_null(function<_Fp> const& __f) {
-  return !!__f;
+_LIBCPP_HIDE_FROM_ABI bool __is_null(function<_Fp> const& __f) {
+  return !__f;
 }
 
 #  if __has_extension(blocks)
 template <class _Rp, class... _Args>
-_LIBCPP_HIDE_FROM_ABI bool __not_null(_Rp (^__p)(_Args...)) {
-  return __p;
+_LIBCPP_HIDE_FROM_ABI bool __is_null(_Rp (^__p)(_Args...)) {
+  return !__p;
 }
 #  endif
 
@@ -206,12 +204,13 @@ public:
   _LIBCPP_HIDE_FROM_ABI explicit __value_func(_Fp&& __f) : __f_(nullptr) {
     typedef __function::__func<_Fp, _Rp(_ArgTypes...)> _Fun;
 
-    if (__function::__not_null(__f)) {
-      if (sizeof(_Fun) <= sizeof(__buf_) && is_nothrow_copy_constructible<_Fp>::value) {
-        __f_ = ::new (std::addressof(__buf_)) _Fun(std::move(__f));
-      } else {
-        __f_ = new _Fun(std::move(__f));
-      }
+    if (__function::__is_null(__f))
+      return;
+
+    if (sizeof(_Fun) <= sizeof(__buf_) && is_nothrow_copy_constructible<_Fp>::value) {
+      __f_ = ::new (std::addressof(__buf_)) _Fun(std::move(__f));
+    } else {
+      __f_ = new _Fun(std::move(__f));
     }
   }
 
@@ -356,7 +355,31 @@ struct __policy {
   // type.
   template <typename _Fun>
   _LIBCPP_HIDE_FROM_ABI static const __policy* __create() {
-    return __choose_policy<_Fun>(__use_small_storage<_Fun>());
+    if constexpr (__use_small_storage<_Fun>::value) {
+      static constexpr __policy __policy = {
+          nullptr,
+          nullptr,
+          false,
+#  if _LIBCPP_HAS_RTTI
+          &typeid(_Fun)
+#  else
+          nullptr
+#  endif
+      };
+      return &__policy;
+    } else {
+      static constexpr __policy __policy = {
+          std::addressof(__large_clone<_Fun>),
+          std::addressof(__large_destroy<_Fun>),
+          false,
+#  if _LIBCPP_HAS_RTTI
+          &typeid(_Fun)
+#  else
+          nullptr
+#  endif
+      };
+      return &__policy;
+    }
   }
 
   _LIBCPP_HIDE_FROM_ABI static const __policy* __create_empty() {
@@ -384,36 +407,6 @@ private:
   _LIBCPP_HIDE_FROM_ABI static void __large_destroy(void* __s) {
     delete static_cast<_Fun*>(__s);
   }
-
-  template <typename _Fun>
-  _LIBCPP_HIDE_FROM_ABI static const __policy* __choose_policy(/* is_small = */ false_type) {
-    static constexpr __policy __policy = {
-        std::addressof(__large_clone<_Fun>),
-        std::addressof(__large_destroy<_Fun>),
-        false,
-#  if _LIBCPP_HAS_RTTI
-        &typeid(_Fun)
-#  else
-        nullptr
-#  endif
-    };
-    return &__policy;
-  }
-
-  template <typename _Fun>
-  _LIBCPP_HIDE_FROM_ABI static const __policy* __choose_policy(/* is_small = */ true_type) {
-    static constexpr __policy __policy = {
-        nullptr,
-        nullptr,
-        false,
-#  if _LIBCPP_HAS_RTTI
-        &typeid(_Fun)
-#  else
-        nullptr
-#  endif
-    };
-    return &__policy;
-  }
 };
 
 // Used to choose between perfect forwarding or pass-by-value. Pass-by-value is
@@ -455,14 +448,15 @@ public:
 
   template <class _Fp, __enable_if_t<!is_same<__decay_t<_Fp>, __policy_func>::value, int> = 0>
   _LIBCPP_HIDE_FROM_ABI explicit __policy_func(_Fp&& __f) : __policy_(__policy::__create_empty()) {
-    if (__function::__not_null(__f)) {
-      __func_   = __call_func<_Fp>;
-      __policy_ = __policy::__create<_Fp>();
-      if (__use_small_storage<_Fp>()) {
-        ::new ((void*)&__buf_.__small) _Fp(std::move(__f));
-      } else {
-        __buf_.__large = ::new _Fp(std::move(__f));
-      }
+    if (__function::__is_null(__f))
+      return;
+
+    __func_   = __call_func<_Fp>;
+    __policy_ = __policy::__create<_Fp>();
+    if (__use_small_storage<_Fp>()) {
+      ::new ((void*)&__buf_.__small) _Fp(std::move(__f));
+    } else {
+      __buf_.__large = ::new _Fp(std::move(__f));
     }
   }
 
@@ -615,21 +609,9 @@ class function<_Rp(_ArgTypes...)>
 
   __func __f_;
 
-  template <class _Fp,
-            bool = _And<_IsNotSame<__remove_cvref_t<_Fp>, function>, __is_invocable<_Fp, _ArgTypes...> >::value>
-  struct __callable;
   template <class _Fp>
-  struct __callable<_Fp, true> {
-    static const bool value =
-        is_void<_Rp>::value || __is_core_convertible<__invoke_result_t<_Fp, _ArgTypes...>, _Rp>::value;
-  };
-  template <class _Fp>
-  struct __callable<_Fp, false> {
-    static const bool value = false;
-  };
-
-  template <class _Fp>
-  using _EnableIfLValueCallable _LIBCPP_NODEBUG = __enable_if_t<__callable<_Fp&>::value>;
+  using _EnableIfLValueCallable _LIBCPP_NODEBUG = __enable_if_t<
+      _And<_IsNotSame<__remove_cvref_t<_Fp>, function>, __is_invocable_r<_Rp, _Fp&, _ArgTypes...>>::value>;
 
 public:
   typedef _Rp result_type;
@@ -690,11 +672,11 @@ public:
 
 #  if _LIBCPP_HAS_RTTI
   // function target access:
-  _LIBCPP_HIDE_FROM_ABI const std::type_info& target_type() const _NOEXCEPT;
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const std::type_info& target_type() const _NOEXCEPT;
   template <typename _Tp>
-  _LIBCPP_HIDE_FROM_ABI _Tp* target() _NOEXCEPT;
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _Tp* target() _NOEXCEPT;
   template <typename _Tp>
-  _LIBCPP_HIDE_FROM_ABI const _Tp* target() const _NOEXCEPT;
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const _Tp* target() const _NOEXCEPT;
 #  endif // _LIBCPP_HAS_RTTI
 };
 
diff --git a/lib/libcxx/include/__functional/hash.h b/lib/libcxx/include/__functional/hash.h
index 83bbf1b5e2..d81ff1abbd 100644
--- a/lib/libcxx/include/__functional/hash.h
+++ b/lib/libcxx/include/__functional/hash.h
@@ -433,13 +433,10 @@ struct __hash_impl<long double> : __scalar_hash<long double> {
 template <class _Tp>
 struct hash : public __hash_impl<_Tp> {};
 
-#if _LIBCPP_STD_VER >= 17
-
 template <>
 struct hash<nullptr_t> : public __unary_function<nullptr_t, size_t> {
-  _LIBCPP_HIDE_FROM_ABI size_t operator()(nullptr_t) const _NOEXCEPT { return 662607004ull; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t operator()(nullptr_t) const _NOEXCEPT { return 662607004ull; }
 };
-#endif
 
 #ifndef _LIBCPP_CXX03_LANG
 template <class _Key, class _Hash>
@@ -452,18 +449,12 @@ template <class _Key, class _Hash = hash<_Key> >
 using __has_enabled_hash _LIBCPP_NODEBUG =
     integral_constant<bool, __check_hash_requirements<_Key, _Hash>::value && is_default_constructible<_Hash>::value >;
 
-#  if _LIBCPP_STD_VER >= 17
 template <class _Type, class>
 using __enable_hash_helper_imp _LIBCPP_NODEBUG = _Type;
 
 template <class _Type, class... _Keys>
 using __enable_hash_helper _LIBCPP_NODEBUG =
     __enable_hash_helper_imp<_Type, __enable_if_t<__all<__has_enabled_hash<_Keys>::value...>::value> >;
-#  else
-template <class _Type, class...>
-using __enable_hash_helper _LIBCPP_NODEBUG = _Type;
-#  endif
-
 #endif // !_LIBCPP_CXX03_LANG
 
 _LIBCPP_END_NAMESPACE_STD
diff --git a/lib/libcxx/include/__functional/identity.h b/lib/libcxx/include/__functional/identity.h
index 1b1c6cf73c..02dde2b4f3 100644
--- a/lib/libcxx/include/__functional/identity.h
+++ b/lib/libcxx/include/__functional/identity.h
@@ -44,7 +44,7 @@ struct __is_identity<reference_wrapper<const __identity> > : true_type {};
 
 struct identity {
   template <class _Tp>
-  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp&& operator()(_Tp&& __t) const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp&& operator()(_LIBCPP_LIFETIMEBOUND _Tp&& __t) const noexcept {
     return std::forward<_Tp>(__t);
   }
 
diff --git a/lib/libcxx/include/__functional/is_transparent.h b/lib/libcxx/include/__functional/is_transparent.h
index 567df1a662..c2c6fbce24 100644
--- a/lib/libcxx/include/__functional/is_transparent.h
+++ b/lib/libcxx/include/__functional/is_transparent.h
@@ -29,6 +29,14 @@ inline const bool __is_transparent_v<_Tp, _Key, __void_t<typename _Tp::is_transp
 
 #endif
 
+// Two types are considered transparently comparable if `comparator(key, arg)` is equivalent to `comparator(key,
+// <implicit cast to KeyT>(arg))`.
+//
+// This is different from `__is_transparent_v`, which is only a property of the comparator and doesn't provide
+// additional semantic guarantees.
+template <class _Comparator, class _KeyT, class _Arg, class = void>
+inline const bool __is_transparently_comparable_v = false;
+
 _LIBCPP_END_NAMESPACE_STD
 
 #endif // _LIBCPP___FUNCTIONAL_IS_TRANSPARENT
diff --git a/lib/libcxx/include/__functional/mem_fn.h b/lib/libcxx/include/__functional/mem_fn.h
index 690393988c..1c9340c4f4 100644
--- a/lib/libcxx/include/__functional/mem_fn.h
+++ b/lib/libcxx/include/__functional/mem_fn.h
@@ -43,7 +43,8 @@ public:
 };
 
 template <class _Rp, class _Tp>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __mem_fn<_Rp _Tp::*> mem_fn(_Rp _Tp::*__pm) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __mem_fn<_Rp _Tp::*>
+mem_fn(_Rp _Tp::* __pm) _NOEXCEPT {
   return __mem_fn<_Rp _Tp::*>(__pm);
 }
 
diff --git a/lib/libcxx/include/__functional/operations.h b/lib/libcxx/include/__functional/operations.h
index 7b0ea11db5..c0e719bb58 100644
--- a/lib/libcxx/include/__functional/operations.h
+++ b/lib/libcxx/include/__functional/operations.h
@@ -15,7 +15,9 @@
 #include <__functional/unary_function.h>
 #include <__fwd/functional.h>
 #include <__type_traits/desugars_to.h>
+#include <__type_traits/is_generic_transparent_comparator.h>
 #include <__type_traits/is_integral.h>
+#include <__type_traits/make_transparent.h>
 #include <__utility/forward.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -377,6 +379,14 @@ struct less<void> {
   typedef void is_transparent;
 };
 
+template <class _Tp>
+struct __make_transparent<_Tp, less<_Tp> > {
+  using type _LIBCPP_NODEBUG = less<>;
+};
+
+template <>
+inline const bool __is_generic_transparent_comparator_v<less<>> = true;
+
 template <class _Tp, class _Up>
 inline const bool __desugars_to_v<__less_tag, less<>, _Tp, _Up> = true;
 
@@ -466,6 +476,14 @@ struct greater<void> {
 
 template <class _Tp, class _Up>
 inline const bool __desugars_to_v<__greater_tag, greater<>, _Tp, _Up> = true;
+
+template <class _Tp>
+struct __make_transparent<_Tp, greater<_Tp>> {
+  using type _LIBCPP_NODEBUG = greater<>;
+};
+
+template <>
+inline const bool __is_generic_transparent_comparator_v<greater<>> = true;
 #endif
 
 // Logical operations
diff --git a/lib/libcxx/include/__functional/ranges_operations.h b/lib/libcxx/include/__functional/ranges_operations.h
index df95843e7c..dc9da061af 100644
--- a/lib/libcxx/include/__functional/ranges_operations.h
+++ b/lib/libcxx/include/__functional/ranges_operations.h
@@ -14,6 +14,7 @@
 #include <__concepts/totally_ordered.h>
 #include <__config>
 #include <__type_traits/desugars_to.h>
+#include <__type_traits/is_generic_transparent_comparator.h>
 #include <__utility/forward.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -108,6 +109,12 @@ inline const bool __desugars_to_v<__less_tag, ranges::less, _Tp, _Up> = true;
 template <class _Tp, class _Up>
 inline const bool __desugars_to_v<__greater_tag, ranges::greater, _Tp, _Up> = true;
 
+template <>
+inline const bool __is_generic_transparent_comparator_v<ranges::less> = true;
+
+template <>
+inline const bool __is_generic_transparent_comparator_v<ranges::greater> = true;
+
 #endif // _LIBCPP_STD_VER >= 20
 
 _LIBCPP_END_NAMESPACE_STD
diff --git a/lib/libcxx/include/__functional/reference_wrapper.h b/lib/libcxx/include/__functional/reference_wrapper.h
index 148703b21d..b1efd9f76d 100644
--- a/lib/libcxx/include/__functional/reference_wrapper.h
+++ b/lib/libcxx/include/__functional/reference_wrapper.h
@@ -58,7 +58,7 @@ public:
 
   // access
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 operator type&() const _NOEXCEPT { return *__f_; }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 type& get() const _NOEXCEPT { return *__f_; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 type& get() const _NOEXCEPT { return *__f_; }
 
   // invoke
   template <class... _ArgTypes>
@@ -128,23 +128,25 @@ reference_wrapper(_Tp&) -> reference_wrapper<_Tp>;
 #endif
 
 template <class _Tp>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reference_wrapper<_Tp> ref(_Tp& __t) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI
+_LIBCPP_CONSTEXPR_SINCE_CXX20 reference_wrapper<_Tp> ref(_Tp& __t) _NOEXCEPT {
   return reference_wrapper<_Tp>(__t);
 }
 
 template <class _Tp>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reference_wrapper<_Tp>
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reference_wrapper<_Tp>
 ref(reference_wrapper<_Tp> __t) _NOEXCEPT {
   return __t;
 }
 
 template <class _Tp>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reference_wrapper<const _Tp> cref(const _Tp& __t) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reference_wrapper<const _Tp>
+cref(const _Tp& __t) _NOEXCEPT {
   return reference_wrapper<const _Tp>(__t);
 }
 
 template <class _Tp>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reference_wrapper<const _Tp>
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reference_wrapper<const _Tp>
 cref(reference_wrapper<_Tp> __t) _NOEXCEPT {
   return __t;
 }
diff --git a/lib/libcxx/include/__functional/weak_result_type.h b/lib/libcxx/include/__functional/weak_result_type.h
index aa462e4d5c..4232bdc69d 100644
--- a/lib/libcxx/include/__functional/weak_result_type.h
+++ b/lib/libcxx/include/__functional/weak_result_type.h
@@ -13,9 +13,9 @@
 #include <__config>
 #include <__functional/binary_function.h>
 #include <__functional/unary_function.h>
-#include <__type_traits/integral_constant.h>
 #include <__type_traits/invoke.h>
 #include <__type_traits/is_same.h>
+#include <__type_traits/void_t.h>
 #include <__utility/declval.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -24,50 +24,36 @@
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-template <class _Tp>
-struct __has_result_type {
-private:
-  template <class _Up>
-  static false_type __test(...);
-  template <class _Up>
-  static true_type __test(typename _Up::result_type* = 0);
+template <class _Tp, class = void>
+inline const bool __has_result_type_v = false;
 
-public:
-  static const bool value = decltype(__test<_Tp>(0))::value;
-};
+template <class _Tp>
+inline const bool __has_result_type_v<_Tp, __void_t<typename _Tp::result_type*> > = true;
 
 // __weak_result_type
 
 template <class _Tp>
 struct __derives_from_unary_function {
 private:
-  struct __two {
-    char __lx;
-    char __lxx;
-  };
-  static __two __test(...);
+  static void __find_base(...);
   template <class _Ap, class _Rp>
-  static __unary_function<_Ap, _Rp> __test(const volatile __unary_function<_Ap, _Rp>*);
+  static __unary_function<_Ap, _Rp> __find_base(const volatile __unary_function<_Ap, _Rp>*);
 
 public:
-  static const bool value = !is_same<decltype(__test((_Tp*)0)), __two>::value;
-  typedef decltype(__test((_Tp*)0)) type;
+  using type              = decltype(__find_base(static_cast<_Tp*>(nullptr)));
+  static const bool value = !is_same<type, void>::value;
 };
 
 template <class _Tp>
 struct __derives_from_binary_function {
 private:
-  struct __two {
-    char __lx;
-    char __lxx;
-  };
-  static __two __test(...);
+  static void __find_base(...);
   template <class _A1, class _A2, class _Rp>
-  static __binary_function<_A1, _A2, _Rp> __test(const volatile __binary_function<_A1, _A2, _Rp>*);
+  static __binary_function<_A1, _A2, _Rp> __find_base(const volatile __binary_function<_A1, _A2, _Rp>*);
 
 public:
-  static const bool value = !is_same<decltype(__test((_Tp*)0)), __two>::value;
-  typedef decltype(__test((_Tp*)0)) type;
+  using type              = decltype(__find_base(static_cast<_Tp*>(nullptr)));
+  static const bool value = !is_same<type, void>::value;
 };
 
 template <class _Tp, bool = __derives_from_unary_function<_Tp>::value>
@@ -85,7 +71,7 @@ struct __maybe_derive_from_binary_function // bool is true
 template <class _Tp>
 struct __maybe_derive_from_binary_function<_Tp, false> {};
 
-template <class _Tp, bool = __has_result_type<_Tp>::value>
+template <class _Tp, bool = __has_result_type_v<_Tp> >
 struct __weak_result_type_imp // bool is true
     : public __maybe_derive_from_unary_function<_Tp>,
       public __maybe_derive_from_binary_function<_Tp> {
diff --git a/lib/libcxx/include/__fwd/ios.h b/lib/libcxx/include/__fwd/ios.h
index 831624f4b1..fd6738a6b3 100644
--- a/lib/libcxx/include/__fwd/ios.h
+++ b/lib/libcxx/include/__fwd/ios.h
@@ -31,7 +31,7 @@ using wios = basic_ios<wchar_t>;
 template <class _CharT, class _Traits>
 class _LIBCPP_PREFERRED_NAME(ios) _LIBCPP_IF_WIDE_CHARACTERS(_LIBCPP_PREFERRED_NAME(wios)) basic_ios;
 
-#if defined(_NEWLIB_VERSION)
+#if _LIBCPP_LIBC_NEWLIB
 // On newlib, off_t is 'long int'
 using streamoff = long int; // for char_traits in <string>
 #else
diff --git a/lib/libcxx/include/__fwd/tuple.h b/lib/libcxx/include/__fwd/tuple.h
index fb922b29f3..dc96c03e20 100644
--- a/lib/libcxx/include/__fwd/tuple.h
+++ b/lib/libcxx/include/__fwd/tuple.h
@@ -21,11 +21,25 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 template <size_t, class>
 struct tuple_element;
 
+template <size_t _Np, class _Tp>
+using __tuple_element_t _LIBCPP_NODEBUG = typename tuple_element<_Np, _Tp>::type;
+
 #ifndef _LIBCPP_CXX03_LANG
 
 template <class...>
 class tuple;
 
+template <class>
+inline const bool __is_tuple_v = false;
+
+template <class... _Tp>
+inline const bool __is_tuple_v<tuple<_Tp...>> = true;
+
+template <size_t _Ip, class... _Tp>
+struct tuple_element<_Ip, tuple<_Tp...> > {
+  using type _LIBCPP_NODEBUG = __type_pack_element<_Ip, _Tp...>;
+};
+
 template <class>
 struct tuple_size;
 
diff --git a/lib/libcxx/include/__hash_table b/lib/libcxx/include/__hash_table
index 78f2f3bfd2..ef487fb06d 100644
--- a/lib/libcxx/include/__hash_table
+++ b/lib/libcxx/include/__hash_table
@@ -10,6 +10,7 @@
 #ifndef _LIBCPP___HASH_TABLE
 #define _LIBCPP___HASH_TABLE
 
+#include <__algorithm/fill_n.h>
 #include <__algorithm/max.h>
 #include <__algorithm/min.h>
 #include <__assert>
@@ -28,7 +29,6 @@
 #include <__memory/swap_allocator.h>
 #include <__memory/unique_ptr.h>
 #include <__new/launder.h>
-#include <__type_traits/can_extract_key.h>
 #include <__type_traits/copy_cvref.h>
 #include <__type_traits/enable_if.h>
 #include <__type_traits/invoke.h>
@@ -44,7 +44,9 @@
 #include <__utility/forward.h>
 #include <__utility/move.h>
 #include <__utility/pair.h>
+#include <__utility/scope_guard.h>
 #include <__utility/swap.h>
+#include <__utility/try_key_extraction.h>
 #include <limits>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -81,18 +83,6 @@ struct __hash_node_base {
   typedef _NodePtr __node_pointer;
   typedef __node_base_pointer __next_pointer;
 
-// TODO(LLVM 22): Remove this check
-#ifndef _LIBCPP_ABI_FIX_UNORDERED_NODE_POINTER_UB
-  static_assert(sizeof(__node_base_pointer) == sizeof(__node_pointer) && _LIBCPP_ALIGNOF(__node_base_pointer) ==
-                    _LIBCPP_ALIGNOF(__node_pointer),
-                "It looks like you are using std::__hash_table (an implementation detail for the unordered containers) "
-                "with a fancy pointer type that thas a different representation depending on whether it points to a "
-                "__hash_table base pointer or a __hash_table node pointer (both of which are implementation details of "
-                "the standard library). This means that your ABI is being broken between LLVM 19 and LLVM 20. If you "
-                "don't care about your ABI being broken, define the _LIBCPP_ABI_TREE_REMOVE_NODE_POINTER_UB macro to "
-                "silence this diagnostic.");
-#endif
-
   __next_pointer __next_;
 
   _LIBCPP_HIDE_FROM_ABI __next_pointer __ptr() _NOEXCEPT {
@@ -122,6 +112,19 @@ struct __get_hash_node_value_type<__hash_value_type<_Key, _Tp> > {
 template <class _Tp>
 using __get_hash_node_value_type_t _LIBCPP_NODEBUG = typename __get_hash_node_value_type<_Tp>::type;
 
+template <class _Tp>
+struct __get_hash_node_key_type {
+  using type _LIBCPP_NODEBUG = _Tp;
+};
+
+template <class _Key, class _Tp>
+struct __get_hash_node_key_type<__hash_value_type<_Key, _Tp> > {
+  using type _LIBCPP_NODEBUG = _Key;
+};
+
+template <class _Tp>
+using __get_hash_node_key_type_t _LIBCPP_NODEBUG = typename __get_hash_node_key_type<_Tp>::type;
+
 template <class _Tp, class _VoidPtr>
 struct __hash_node : public __hash_node_base< __rebind_pointer_t<_VoidPtr, __hash_node<_Tp, _VoidPtr> > > {
   using __node_value_type _LIBCPP_NODEBUG = __get_hash_node_value_type_t<_Tp>;
@@ -152,7 +155,12 @@ public:
   }
 #endif
 
-  _LIBCPP_HIDE_FROM_ABI explicit __hash_node(__next_pointer __next, size_t __hash) : _Base(__next), __hash_(__hash) {}
+  template <class _Alloc, class... _Args>
+  _LIBCPP_HIDE_FROM_ABI explicit __hash_node(size_t __hash, _Alloc& __na, _Args&&... __args)
+      : _Base(nullptr), __hash_(__hash) {
+    allocator_traits<_Alloc>::construct(__na, std::addressof(__get_value()), std::forward<_Args>(__args)...);
+  }
+
   _LIBCPP_HIDE_FROM_ABI ~__hash_node() {}
 };
 
@@ -182,85 +190,16 @@ class __hash_map_iterator;
 template <class _HashIterator>
 class __hash_map_const_iterator;
 
-template <class _Tp>
-struct __hash_key_value_types {
-  static_assert(!is_reference<_Tp>::value && !is_const<_Tp>::value, "");
-  typedef _Tp key_type;
-  typedef _Tp __node_value_type;
-  typedef _Tp __container_value_type;
-  static const bool __is_map = false;
-
-  _LIBCPP_HIDE_FROM_ABI static key_type const& __get_key(_Tp const& __v) { return __v; }
-  _LIBCPP_HIDE_FROM_ABI static __container_value_type const& __get_value(__node_value_type const& __v) { return __v; }
-  _LIBCPP_HIDE_FROM_ABI static __container_value_type* __get_ptr(__node_value_type& __n) { return std::addressof(__n); }
-  _LIBCPP_HIDE_FROM_ABI static __container_value_type&& __move(__node_value_type& __v) { return std::move(__v); }
-};
-
-template <class _Key, class _Tp>
-struct __hash_key_value_types<__hash_value_type<_Key, _Tp> > {
-  typedef _Key key_type;
-  typedef _Tp mapped_type;
-  typedef __hash_value_type<_Key, _Tp> __node_value_type;
-  typedef pair<const _Key, _Tp> __container_value_type;
-  typedef __container_value_type __map_value_type;
-  static const bool __is_map = true;
-
-  _LIBCPP_HIDE_FROM_ABI static key_type const& __get_key(__container_value_type const& __v) { return __v.first; }
-
-  template <class _Up, __enable_if_t<is_same<__remove_cvref_t<_Up>, __node_value_type>::value, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI static __container_value_type const& __get_value(_Up& __t) {
-    return __t.__get_value();
-  }
-
-  template <class _Up, __enable_if_t<is_same<__remove_cvref_t<_Up>, __container_value_type>::value, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI static __container_value_type const& __get_value(_Up& __t) {
-    return __t;
-  }
-
-  _LIBCPP_HIDE_FROM_ABI static __container_value_type* __get_ptr(__container_value_type& __n) {
-    return std::addressof(__n);
-  }
-  _LIBCPP_HIDE_FROM_ABI static pair<key_type&&, mapped_type&&> __move(__node_value_type& __v) { return __v.__move(); }
-};
-
-template <class _Tp, class _AllocPtr, class _KVTypes = __hash_key_value_types<_Tp>, bool = _KVTypes::__is_map>
-struct __hash_map_pointer_types {};
-
-template <class _Tp, class _AllocPtr, class _KVTypes>
-struct __hash_map_pointer_types<_Tp, _AllocPtr, _KVTypes, true> {
-  typedef typename _KVTypes::__map_value_type _Mv;
-  typedef __rebind_pointer_t<_AllocPtr, _Mv> __map_value_type_pointer;
-  typedef __rebind_pointer_t<_AllocPtr, const _Mv> __const_map_value_type_pointer;
-};
-
 template <class _NodePtr, class _NodeT = typename pointer_traits<_NodePtr>::element_type>
 struct __hash_node_types;
 
 template <class _NodePtr, class _Tp, class _VoidPtr>
-struct __hash_node_types<_NodePtr, __hash_node<_Tp, _VoidPtr> >
-    : public __hash_key_value_types<_Tp>,
-      __hash_map_pointer_types<_Tp, _VoidPtr>
-
-{
-  typedef __hash_key_value_types<_Tp> __base;
-
-public:
-  typedef ptrdiff_t difference_type;
-  typedef size_t size_type;
-
-  typedef __rebind_pointer_t<_NodePtr, void> __void_pointer;
-
+struct __hash_node_types<_NodePtr, __hash_node<_Tp, _VoidPtr> > {
   typedef typename pointer_traits<_NodePtr>::element_type __node_type;
-  typedef _NodePtr __node_pointer;
 
-  typedef __hash_node_base<__node_pointer> __node_base_type;
-  typedef __rebind_pointer_t<_NodePtr, __node_base_type> __node_base_pointer;
-
-  typedef typename __node_base_type::__next_pointer __next_pointer;
+  typedef typename __hash_node_base<_NodePtr>::__next_pointer __next_pointer;
 
   using __node_value_type _LIBCPP_NODEBUG = __get_hash_node_value_type_t<_Tp>;
-  typedef __rebind_pointer_t<_VoidPtr, __node_value_type> __node_value_type_pointer;
-  typedef __rebind_pointer_t<_VoidPtr, const __node_value_type> __const_node_value_type_pointer;
 
 private:
   static_assert(!is_const<__node_type>::value, "_NodePtr should never be a pointer to const");
@@ -281,13 +220,6 @@ struct __hash_node_types_from_iterator<__hash_local_iterator<_NodePtr> > : __has
 template <class _NodePtr>
 struct __hash_node_types_from_iterator<__hash_const_local_iterator<_NodePtr> > : __hash_node_types<_NodePtr> {};
 
-template <class _NodeValueTp, class _VoidPtr>
-struct __make_hash_node_types {
-  typedef __hash_node<_NodeValueTp, _VoidPtr> _NodeTp;
-  typedef __rebind_pointer_t<_VoidPtr, _NodeTp> _NodePtr;
-  typedef __hash_node_types<_NodePtr> type;
-};
-
 template <class _NodePtr>
 class __hash_iterator {
   typedef __hash_node_types<_NodePtr> _NodeTypes;
@@ -299,9 +231,9 @@ class __hash_iterator {
 public:
   typedef forward_iterator_tag iterator_category;
   typedef typename _NodeTypes::__node_value_type value_type;
-  typedef typename _NodeTypes::difference_type difference_type;
+  using difference_type = ptrdiff_t;
   typedef value_type& reference;
-  typedef typename _NodeTypes::__node_value_type_pointer pointer;
+  using pointer = __rebind_pointer_t<_NodePtr, value_type>;
 
   _LIBCPP_HIDE_FROM_ABI __hash_iterator() _NOEXCEPT : __node_(nullptr) {}
 
@@ -366,9 +298,9 @@ public:
 
   typedef forward_iterator_tag iterator_category;
   typedef typename _NodeTypes::__node_value_type value_type;
-  typedef typename _NodeTypes::difference_type difference_type;
+  using difference_type = ptrdiff_t;
   typedef const value_type& reference;
-  typedef typename _NodeTypes::__const_node_value_type_pointer pointer;
+  using pointer = __rebind_pointer_t<_NodePtr, const value_type>;
 
   _LIBCPP_HIDE_FROM_ABI __hash_const_iterator() _NOEXCEPT : __node_(nullptr) {}
 
@@ -431,9 +363,9 @@ class __hash_local_iterator {
 public:
   typedef forward_iterator_tag iterator_category;
   typedef typename _NodeTypes::__node_value_type value_type;
-  typedef typename _NodeTypes::difference_type difference_type;
+  using difference_type = ptrdiff_t;
   typedef value_type& reference;
-  typedef typename _NodeTypes::__node_value_type_pointer pointer;
+  using pointer = __rebind_pointer_t<_NodePtr, value_type>;
 
   _LIBCPP_HIDE_FROM_ABI __hash_local_iterator() _NOEXCEPT : __node_(nullptr) {}
 
@@ -509,9 +441,9 @@ public:
 
   typedef forward_iterator_tag iterator_category;
   typedef typename _NodeTypes::__node_value_type value_type;
-  typedef typename _NodeTypes::difference_type difference_type;
+  using difference_type = ptrdiff_t;
   typedef const value_type& reference;
-  typedef typename _NodeTypes::__const_node_value_type_pointer pointer;
+  using pointer = __rebind_pointer_t<_ConstNodePtr, const value_type>;
 
   _LIBCPP_HIDE_FROM_ABI __hash_const_local_iterator() _NOEXCEPT : __node_(nullptr) {}
 
@@ -617,8 +549,6 @@ public:
   typedef typename __alloc_traits::pointer pointer;
 
 private:
-  typedef __hash_node_types<pointer> _NodeTypes;
-
   allocator_type& __na_;
 
 public:
@@ -633,7 +563,7 @@ public:
 
   _LIBCPP_HIDE_FROM_ABI void operator()(pointer __p) _NOEXCEPT {
     if (__value_constructed) {
-      __alloc_traits::destroy(__na_, _NodeTypes::__get_ptr(__p->__get_value()));
+      __alloc_traits::destroy(__na_, std::addressof(__p->__get_value()));
       std::__destroy_at(std::addressof(*__p));
     }
     if (__p)
@@ -684,18 +614,16 @@ template <class _Tp, class _Hash, class _Equal, class _Alloc>
 class __hash_table {
 public:
   using value_type = __get_hash_node_value_type_t<_Tp>;
+  using key_type   = __get_hash_node_key_type_t<_Tp>;
+
   typedef _Hash hasher;
   typedef _Equal key_equal;
   typedef _Alloc allocator_type;
 
 private:
   typedef allocator_traits<allocator_type> __alloc_traits;
-  typedef typename __make_hash_node_types<_Tp, typename __alloc_traits::void_pointer>::type _NodeTypes;
 
 public:
-  typedef typename _NodeTypes::__node_value_type __node_value_type;
-  typedef typename _NodeTypes::__container_value_type __container_value_type;
-  typedef typename _NodeTypes::key_type key_type;
   typedef value_type& reference;
   typedef const value_type& const_reference;
   typedef typename __alloc_traits::pointer pointer;
@@ -703,22 +631,23 @@ public:
 #ifndef _LIBCPP_ABI_FIX_UNORDERED_CONTAINER_SIZE_TYPE
   typedef typename __alloc_traits::size_type size_type;
 #else
-  typedef typename _NodeTypes::size_type size_type;
+  using size_type = size_t;
 #endif
-  typedef typename _NodeTypes::difference_type difference_type;
+  using difference_type = ptrdiff_t;
 
 public:
   // Create __node
 
-  typedef typename _NodeTypes::__node_type __node;
-  typedef __rebind_alloc<__alloc_traits, __node> __node_allocator;
-  typedef allocator_traits<__node_allocator> __node_traits;
-  typedef typename _NodeTypes::__void_pointer __void_pointer;
-  typedef typename _NodeTypes::__node_pointer __node_pointer;
-  typedef typename _NodeTypes::__node_pointer __node_const_pointer;
-  typedef typename _NodeTypes::__node_base_type __first_node;
-  typedef typename _NodeTypes::__node_base_pointer __node_base_pointer;
-  typedef typename _NodeTypes::__next_pointer __next_pointer;
+  using __void_pointer _LIBCPP_NODEBUG = typename __alloc_traits::void_pointer;
+
+  using __node _LIBCPP_NODEBUG           = __hash_node<_Tp, __void_pointer>;
+  using __node_allocator _LIBCPP_NODEBUG = __rebind_alloc<__alloc_traits, __node>;
+  using __node_traits _LIBCPP_NODEBUG    = allocator_traits<__node_allocator>;
+  using __node_pointer _LIBCPP_NODEBUG   = __rebind_pointer_t<__void_pointer, __node>;
+
+  using __first_node _LIBCPP_NODEBUG        = __hash_node_base<__node_pointer>;
+  using __node_base_pointer _LIBCPP_NODEBUG = __rebind_pointer_t<__void_pointer, __first_node>;
+  using __next_pointer _LIBCPP_NODEBUG      = __node_base_pointer;
 
 private:
   // check for sane allocator pointer rebinding semantics. Rebinding the
@@ -747,6 +676,38 @@ private:
 
   _LIBCPP_HIDE_FROM_ABI size_type& size() _NOEXCEPT { return __size_; }
 
+  _LIBCPP_HIDE_FROM_ABI void
+  __copy_construct(__next_pointer __other_iter, __next_pointer __own_iter, size_t __current_chash) {
+    auto __bucket_count = bucket_count();
+
+    for (; __other_iter; __other_iter = __other_iter->__next_) {
+      __node_holder __new_node = __construct_node_hash(__other_iter->__hash(), __other_iter->__upcast()->__get_value());
+
+      size_t __new_chash = std::__constrain_hash(__new_node->__hash(), __bucket_count);
+      if (__new_chash != __current_chash) {
+        __bucket_list_[__new_chash] = __own_iter;
+        __current_chash             = __new_chash;
+      }
+
+      __own_iter->__next_ = static_cast<__next_pointer>(__new_node.release());
+      __own_iter          = __own_iter->__next_;
+    }
+  }
+
+  _LIBCPP_HIDE_FROM_ABI void __copy_construct(__next_pointer __other_iter) {
+    __next_pointer __own_iter = __first_node_.__ptr();
+    {
+      __node_holder __new_node = __construct_node_hash(__other_iter->__hash(), __other_iter->__upcast()->__get_value());
+      __own_iter->__next_      = static_cast<__next_pointer>(__new_node.release());
+    }
+
+    size_t __current_chash          = std::__constrain_hash(__own_iter->__next_->__hash(), bucket_count());
+    __bucket_list_[__current_chash] = __own_iter;
+    __other_iter                    = __other_iter->__next_;
+    __own_iter                      = __own_iter->__next_;
+    __copy_construct(__other_iter, __own_iter, __current_chash);
+  }
+
 public:
   _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __size_; }
 
@@ -811,40 +772,66 @@ public:
   _LIBCPP_HIDE_FROM_ABI iterator __node_insert_multi(__node_pointer __nd);
   _LIBCPP_HIDE_FROM_ABI iterator __node_insert_multi(const_iterator __p, __node_pointer __nd);
 
-  template <class _Key, class... _Args>
-  _LIBCPP_HIDE_FROM_ABI pair<iterator, bool> __emplace_unique_key_args(_Key const& __k, _Args&&... __args);
-
-  template <class... _Args>
-  _LIBCPP_HIDE_FROM_ABI pair<iterator, bool> __emplace_unique_impl(_Args&&... __args);
-
-  template <class _Pp>
-  _LIBCPP_HIDE_FROM_ABI pair<iterator, bool> __emplace_unique(_Pp&& __x) {
-    return __emplace_unique_extract_key(std::forward<_Pp>(__x), __can_extract_key<_Pp, key_type>());
-  }
-
-  template <class _First,
-            class _Second,
-            __enable_if_t<__can_extract_map_key<_First, key_type, __container_value_type>::value, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI pair<iterator, bool> __emplace_unique(_First&& __f, _Second&& __s) {
-    return __emplace_unique_key_args(__f, std::forward<_First>(__f), std::forward<_Second>(__s));
-  }
-
   template <class... _Args>
   _LIBCPP_HIDE_FROM_ABI pair<iterator, bool> __emplace_unique(_Args&&... __args) {
-    return __emplace_unique_impl(std::forward<_Args>(__args)...);
-  }
-
-  template <class _Pp>
-  _LIBCPP_HIDE_FROM_ABI pair<iterator, bool> __emplace_unique_extract_key(_Pp&& __x, __extract_key_fail_tag) {
-    return __emplace_unique_impl(std::forward<_Pp>(__x));
-  }
-  template <class _Pp>
-  _LIBCPP_HIDE_FROM_ABI pair<iterator, bool> __emplace_unique_extract_key(_Pp&& __x, __extract_key_self_tag) {
-    return __emplace_unique_key_args(__x, std::forward<_Pp>(__x));
-  }
-  template <class _Pp>
-  _LIBCPP_HIDE_FROM_ABI pair<iterator, bool> __emplace_unique_extract_key(_Pp&& __x, __extract_key_first_tag) {
-    return __emplace_unique_key_args(__x.first, std::forward<_Pp>(__x));
+    return std::__try_key_extraction<key_type>(
+        [this](const key_type& __key, _Args&&... __args2) {
+          size_t __hash   = hash_function()(__key);
+          size_type __bc  = bucket_count();
+          bool __inserted = false;
+          __next_pointer __nd;
+          size_t __chash;
+          if (__bc != 0) {
+            __chash = std::__constrain_hash(__hash, __bc);
+            __nd    = __bucket_list_[__chash];
+            if (__nd != nullptr) {
+              for (__nd = __nd->__next_;
+                   __nd != nullptr &&
+                   (__nd->__hash() == __hash || std::__constrain_hash(__nd->__hash(), __bc) == __chash);
+                   __nd = __nd->__next_) {
+                if ((__nd->__hash() == __hash) && key_eq()(__nd->__upcast()->__get_value(), __key))
+                  goto __done;
+              }
+            }
+          }
+          {
+            __node_holder __h = __construct_node_hash(__hash, std::forward<_Args>(__args2)...);
+            if (size() + 1 > __bc * max_load_factor()) {
+              __rehash_unique(std::max<size_type>(2 * __bc + !std::__is_hash_power2(__bc),
+                                                  size_type(__math::ceil(float(size() + 1) / max_load_factor()))));
+              __bc    = bucket_count();
+              __chash = std::__constrain_hash(__hash, __bc);
+            }
+            // insert_after __bucket_list_[__chash], or __first_node if bucket is null
+            __next_pointer __pn = __bucket_list_[__chash];
+            if (__pn == nullptr) {
+              __pn          = __first_node_.__ptr();
+              __h->__next_  = __pn->__next_;
+              __pn->__next_ = __h.get()->__ptr();
+              // fix up __bucket_list_
+              __bucket_list_[__chash] = __pn;
+              if (__h->__next_ != nullptr)
+                __bucket_list_[std::__constrain_hash(__h->__next_->__hash(), __bc)] = __h.get()->__ptr();
+            } else {
+              __h->__next_  = __pn->__next_;
+              __pn->__next_ = static_cast<__next_pointer>(__h.get());
+            }
+            __nd = static_cast<__next_pointer>(__h.release());
+            // increment size
+            ++size();
+            __inserted = true;
+          }
+        __done:
+          return pair<iterator, bool>(iterator(__nd), __inserted);
+        },
+        [this](_Args&&... __args2) {
+          __node_holder __h        = __construct_node(std::forward<_Args>(__args2)...);
+          pair<iterator, bool> __r = __node_insert_unique(__h.get());
+          if (__r.second)
+            __h.release();
+          return __r;
+        },
+        std::forward<_Args>(__args)...);
   }
 
   template <class... _Args>
@@ -854,9 +841,7 @@ public:
 
   template <class _ValueT = _Tp, __enable_if_t<__is_hash_value_type<_ValueT>::value, int> = 0>
   _LIBCPP_HIDE_FROM_ABI void __insert_unique_from_orphaned_node(value_type&& __value) {
-    using __key_type = typename _NodeTypes::key_type;
-
-    __node_holder __h = __construct_node(const_cast<__key_type&&>(__value.first), std::move(__value.second));
+    __node_holder __h = __construct_node(const_cast<key_type&&>(__value.first), std::move(__value.second));
     __node_insert_unique(__h.get());
     __h.release();
   }
@@ -870,9 +855,7 @@ public:
 
   template <class _ValueT = _Tp, __enable_if_t<__is_hash_value_type<_ValueT>::value, int> = 0>
   _LIBCPP_HIDE_FROM_ABI void __insert_multi_from_orphaned_node(value_type&& __value) {
-    using __key_type = typename _NodeTypes::key_type;
-
-    __node_holder __h = __construct_node(const_cast<__key_type&&>(__value.first), std::move(__value.second));
+    __node_holder __h = __construct_node(const_cast<key_type&&>(__value.first), std::move(__value.second));
     __node_insert_multi(__h.get());
     __h.release();
   }
@@ -1017,8 +1000,8 @@ private:
   template <class... _Args>
   _LIBCPP_HIDE_FROM_ABI __node_holder __construct_node(_Args&&... __args);
 
-  template <class _First, class... _Rest>
-  _LIBCPP_HIDE_FROM_ABI __node_holder __construct_node_hash(size_t __hash, _First&& __f, _Rest&&... __rest);
+  template <class... _Args>
+  _LIBCPP_HIDE_FROM_ABI __node_holder __construct_node_hash(size_t __hash, _Args&&... __args);
 
   _LIBCPP_HIDE_FROM_ABI void __copy_assign_alloc(const __hash_table& __u) {
     __copy_assign_alloc(__u, integral_constant<bool, __node_traits::propagate_on_container_copy_assignment::value>());
@@ -1042,17 +1025,29 @@ private:
   }
   _LIBCPP_HIDE_FROM_ABI void __move_assign_alloc(__hash_table&, false_type) _NOEXCEPT {}
 
-  _LIBCPP_HIDE_FROM_ABI void __deallocate_node(__next_pointer __np) _NOEXCEPT;
+  _LIBCPP_HIDE_FROM_ABI void __deallocate_node(__node_pointer __nd) _NOEXCEPT {
+    auto& __alloc = __node_alloc();
+    __node_traits::destroy(__alloc, std::addressof(__nd->__get_value()));
+    std::__destroy_at(std::__to_address(__nd));
+    __node_traits::deallocate(__alloc, __nd, 1);
+  }
+
+  _LIBCPP_HIDE_FROM_ABI void __deallocate_node_list(__next_pointer __np) _NOEXCEPT {
+    while (__np != nullptr) {
+      __next_pointer __next = __np->__next_;
+      __deallocate_node(__np->__upcast());
+      __np = __next;
+    }
+  }
+
   _LIBCPP_HIDE_FROM_ABI __next_pointer __detach() _NOEXCEPT;
 
   template <class _From, class _ValueT = _Tp, __enable_if_t<__is_hash_value_type<_ValueT>::value, int> = 0>
   _LIBCPP_HIDE_FROM_ABI void __assign_value(__get_hash_node_value_type_t<_Tp>& __lhs, _From&& __rhs) {
-    using __key_type = typename _NodeTypes::key_type;
-
     // This is technically UB, since the object was constructed as `const`.
     // Clang doesn't optimize on this currently though.
-    const_cast<__key_type&>(__lhs.first) = const_cast<__copy_cvref_t<_From, __key_type>&&>(__rhs.first);
-    __lhs.second                         = std::forward<_From>(__rhs).second;
+    const_cast<key_type&>(__lhs.first) = const_cast<__copy_cvref_t<_From, key_type>&&>(__rhs.first);
+    __lhs.second                       = std::forward<_From>(__rhs).second;
   }
 
   template <class _From, class _ValueT = _Tp, __enable_if_t<!__is_hash_value_type<_ValueT>::value, int> = 0>
@@ -1101,16 +1096,29 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__hash_table(const allocator_type& __a
       __max_load_factor_(1.0f) {}
 
 template <class _Tp, class _Hash, class _Equal, class _Alloc>
-__hash_table<_Tp, _Hash, _Equal, _Alloc>::__hash_table(const __hash_table& __u)
+__hash_table<_Tp, _Hash, _Equal, _Alloc>::__hash_table(const __hash_table& __other)
     : __bucket_list_(nullptr,
-                     __bucket_list_deleter(allocator_traits<__pointer_allocator>::select_on_container_copy_construction(
-                                               __u.__bucket_list_.get_deleter().__alloc()),
+                     __bucket_list_deleter(__pointer_alloc_traits::select_on_container_copy_construction(
+                                               __other.__bucket_list_.get_deleter().__alloc()),
                                            0)),
-      __node_alloc_(allocator_traits<__node_allocator>::select_on_container_copy_construction(__u.__node_alloc())),
+      __node_alloc_(__node_traits::select_on_container_copy_construction(__other.__node_alloc())),
       __size_(0),
-      __hasher_(__u.hash_function()),
-      __max_load_factor_(__u.__max_load_factor_),
-      __key_eq_(__u.__key_eq_) {}
+      __hasher_(__other.hash_function()),
+      __max_load_factor_(__other.__max_load_factor_),
+      __key_eq_(__other.__key_eq_) {
+  if (__other.size() == 0)
+    return;
+
+  auto& __bucket_list_del = __bucket_list_.get_deleter();
+  auto __bucket_count     = __other.bucket_count();
+  __bucket_list_.reset(__pointer_alloc_traits::allocate(__bucket_list_del.__alloc(), __bucket_count));
+  __bucket_list_del.size() = __bucket_count;
+
+  std::fill_n(__bucket_list_.get(), __bucket_count, nullptr);
+
+  __copy_construct(__other.__first_node_.__next_);
+  __size_ = __other.size();
+}
 
 template <class _Tp, class _Hash, class _Equal, class _Alloc>
 __hash_table<_Tp, _Hash, _Equal, _Alloc>::__hash_table(const __hash_table& __u, const allocator_type& __a)
@@ -1169,7 +1177,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::~__hash_table() {
   static_assert(is_copy_constructible<hasher>::value, "Hasher must be copy-constructible.");
 #endif
 
-  __deallocate_node(__first_node_.__next_);
+  __deallocate_node_list(__first_node_.__next_);
 }
 
 template <class _Tp, class _Hash, class _Equal, class _Alloc>
@@ -1184,28 +1192,76 @@ void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__copy_assign_alloc(const __hash_
 }
 
 template <class _Tp, class _Hash, class _Equal, class _Alloc>
-__hash_table<_Tp, _Hash, _Equal, _Alloc>& __hash_table<_Tp, _Hash, _Equal, _Alloc>::operator=(const __hash_table& __u) {
-  if (this != std::addressof(__u)) {
-    __copy_assign_alloc(__u);
-    hash_function()   = __u.hash_function();
-    key_eq()          = __u.key_eq();
-    max_load_factor() = __u.max_load_factor();
-    __assign_multi(__u.begin(), __u.end());
-  }
-  return *this;
-}
+__hash_table<_Tp, _Hash, _Equal, _Alloc>&
+__hash_table<_Tp, _Hash, _Equal, _Alloc>::operator=(const __hash_table& __other) {
+  if (this == std::addressof(__other))
+    return *this;
 
-template <class _Tp, class _Hash, class _Equal, class _Alloc>
-void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__deallocate_node(__next_pointer __np) _NOEXCEPT {
-  __node_allocator& __na = __node_alloc();
-  while (__np != nullptr) {
-    __next_pointer __next    = __np->__next_;
-    __node_pointer __real_np = __np->__upcast();
-    __node_traits::destroy(__na, _NodeTypes::__get_ptr(__real_np->__get_value()));
-    std::__destroy_at(std::addressof(*__real_np));
-    __node_traits::deallocate(__na, __real_np, 1);
-    __np = __next;
+  __copy_assign_alloc(__other);
+  hash_function()   = __other.hash_function();
+  key_eq()          = __other.key_eq();
+  max_load_factor() = __other.max_load_factor();
+
+  if (__other.size() == 0) {
+    clear();
+    return *this;
   }
+
+  auto __bucket_count = __other.bucket_count();
+  if (__bucket_count != bucket_count()) {
+    auto& __bucket_list_del = __bucket_list_.get_deleter();
+    __bucket_list_.reset(__pointer_alloc_traits::allocate(__bucket_list_del.__alloc(), __bucket_count));
+    __bucket_list_del.size() = __bucket_count;
+  }
+  std::fill_n(__bucket_list_.get(), __bucket_count, nullptr);
+
+  if (!__first_node_.__next_) {
+    __copy_construct(__other.__first_node_.__next_);
+    __size_ = __other.size();
+    return *this;
+  }
+
+  __next_pointer __other_iter = __other.__first_node_.__next_;
+  __next_pointer __own_iter   = __first_node_.__ptr();
+  {
+    __node_pointer __next = __own_iter->__next_->__upcast();
+    __assign_value(__next->__get_value(), __other_iter->__upcast()->__get_value());
+    __next->__hash_ = __other_iter->__hash();
+  }
+  size_t __current_chash          = std::__constrain_hash(__own_iter->__next_->__hash(), __bucket_count);
+  __bucket_list_[__current_chash] = __own_iter;
+  __other_iter                    = __other_iter->__next_;
+  __own_iter                      = __own_iter->__next_;
+
+  // Go through the nodes of the incoming hash table and copy then into the destination hash table, reusing as many
+  // existing nodes as posssible in the destination.
+  while (__other_iter && __own_iter->__next_) {
+    __node_pointer __next = __own_iter->__next_->__upcast();
+    __assign_value(__next->__get_value(), __other_iter->__upcast()->__get_value());
+    __next->__hash_ = __other_iter->__hash();
+
+    size_t __new_chash = std::__constrain_hash(__next->__hash_, __bucket_count);
+    if (__new_chash != __current_chash) {
+      __bucket_list_[__new_chash] = __own_iter;
+      __current_chash             = __new_chash;
+    }
+
+    __other_iter = __other_iter->__next_;
+    __own_iter   = __own_iter->__next_;
+  }
+
+  // At this point we either have consumed the whole incoming hash table, or we don't have any more nodes to reuse in
+  // the destination. Either continue with constructing new nodes, or deallocate the left over nodes.
+  if (__own_iter->__next_) {
+    __deallocate_node_list(__own_iter->__next_);
+    __own_iter->__next_ = nullptr;
+  } else {
+    __copy_construct(__other_iter, __own_iter, __current_chash);
+  }
+
+  __size_ = __other.size();
+
+  return *this;
 }
 
 template <class _Tp, class _Hash, class _Equal, class _Alloc>
@@ -1251,23 +1307,14 @@ void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__move_assign(__hash_table& __u,
     max_load_factor() = __u.max_load_factor();
     if (bucket_count() != 0) {
       __next_pointer __cache = __detach();
-#if _LIBCPP_HAS_EXCEPTIONS
-      try {
-#endif // _LIBCPP_HAS_EXCEPTIONS
-        const_iterator __i = __u.begin();
-        while (__cache != nullptr && __u.size() != 0) {
-          __assign_value(__cache->__upcast()->__get_value(), std::move(__u.remove(__i++)->__get_value()));
-          __next_pointer __next = __cache->__next_;
-          __node_insert_multi(__cache->__upcast());
-          __cache = __next;
-        }
-#if _LIBCPP_HAS_EXCEPTIONS
-      } catch (...) {
-        __deallocate_node(__cache);
-        throw;
+      auto __guard           = std::__make_scope_guard([&] { __deallocate_node_list(__cache); });
+      const_iterator __i     = __u.begin();
+      while (__cache != nullptr && __u.size() != 0) {
+        __assign_value(__cache->__upcast()->__get_value(), std::move(__u.remove(__i++)->__get_value()));
+        __next_pointer __next = __cache->__next_;
+        __node_insert_multi(__cache->__upcast());
+        __cache = __next;
       }
-#endif // _LIBCPP_HAS_EXCEPTIONS
-      __deallocate_node(__cache);
     }
     const_iterator __i = __u.begin();
     while (__u.size() != 0)
@@ -1290,27 +1337,18 @@ template <class _InputIterator>
 void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__assign_unique(_InputIterator __first, _InputIterator __last) {
   typedef iterator_traits<_InputIterator> _ITraits;
   typedef typename _ITraits::value_type _ItValueType;
-  static_assert(is_same<_ItValueType, __container_value_type>::value,
-                "__assign_unique may only be called with the containers value type");
+  static_assert(
+      is_same<_ItValueType, value_type>::value, "__assign_unique may only be called with the containers value type");
 
   if (bucket_count() != 0) {
     __next_pointer __cache = __detach();
-#if _LIBCPP_HAS_EXCEPTIONS
-    try {
-#endif // _LIBCPP_HAS_EXCEPTIONS
-      for (; __cache != nullptr && __first != __last; ++__first) {
-        __assign_value(__cache->__upcast()->__get_value(), *__first);
-        __next_pointer __next = __cache->__next_;
-        __node_insert_unique(__cache->__upcast());
-        __cache = __next;
-      }
-#if _LIBCPP_HAS_EXCEPTIONS
-    } catch (...) {
-      __deallocate_node(__cache);
-      throw;
+    auto __guard           = std::__make_scope_guard([&] { __deallocate_node_list(__cache); });
+    for (; __cache != nullptr && __first != __last; ++__first) {
+      __assign_value(__cache->__upcast()->__get_value(), *__first);
+      __next_pointer __next = __cache->__next_;
+      __node_insert_unique(__cache->__upcast());
+      __cache = __next;
     }
-#endif // _LIBCPP_HAS_EXCEPTIONS
-    __deallocate_node(__cache);
   }
   for (; __first != __last; ++__first)
     __emplace_unique(*__first);
@@ -1321,31 +1359,20 @@ template <class _InputIterator>
 void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__assign_multi(_InputIterator __first, _InputIterator __last) {
   typedef iterator_traits<_InputIterator> _ITraits;
   typedef typename _ITraits::value_type _ItValueType;
-  static_assert(
-      (is_same<_ItValueType, __container_value_type>::value || is_same<_ItValueType, __node_value_type>::value),
-      "__assign_multi may only be called with the containers value type"
-      " or the nodes value type");
+  static_assert(is_same<_ItValueType, value_type>::value,
+                "__assign_multi may only be called with the containers value type or the nodes value type");
   if (bucket_count() != 0) {
     __next_pointer __cache = __detach();
-#if _LIBCPP_HAS_EXCEPTIONS
-    try {
-#endif // _LIBCPP_HAS_EXCEPTIONS
-      for (; __cache != nullptr && __first != __last; ++__first) {
-        __assign_value(__cache->__upcast()->__get_value(), *__first);
-        __next_pointer __next              = __cache->__next_;
-        __node_insert_multi(__cache->__upcast());
-        __cache = __next;
-      }
-#if _LIBCPP_HAS_EXCEPTIONS
-    } catch (...) {
-      __deallocate_node(__cache);
-      throw;
+    auto __guard           = std::__make_scope_guard([&] { __deallocate_node_list(__cache); });
+    for (; __cache != nullptr && __first != __last; ++__first) {
+      __assign_value(__cache->__upcast()->__get_value(), *__first);
+      __next_pointer __next = __cache->__next_;
+      __node_insert_multi(__cache->__upcast());
+      __cache = __next;
     }
-#endif // _LIBCPP_HAS_EXCEPTIONS
-    __deallocate_node(__cache);
   }
   for (; __first != __last; ++__first)
-    __emplace_multi(_NodeTypes::__get_value(*__first));
+    __emplace_multi(*__first);
 }
 
 template <class _Tp, class _Hash, class _Equal, class _Alloc>
@@ -1375,7 +1402,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::end() const _NOEXCEPT {
 template <class _Tp, class _Hash, class _Equal, class _Alloc>
 void __hash_table<_Tp, _Hash, _Equal, _Alloc>::clear() _NOEXCEPT {
   if (size() > 0) {
-    __deallocate_node(__first_node_.__next_);
+    __deallocate_node_list(__first_node_.__next_);
     __first_node_.__next_ = nullptr;
     size_type __bc        = bucket_count();
     for (size_type __i = 0; __i < __bc; ++__i)
@@ -1561,69 +1588,6 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_multi(const_iterator __p
   return __node_insert_multi(__cp);
 }
 
-template <class _Tp, class _Hash, class _Equal, class _Alloc>
-template <class _Key, class... _Args>
-pair<typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator, bool>
-__hash_table<_Tp, _Hash, _Equal, _Alloc>::__emplace_unique_key_args(_Key const& __k, _Args&&... __args) {
-  size_t __hash   = hash_function()(__k);
-  size_type __bc  = bucket_count();
-  bool __inserted = false;
-  __next_pointer __nd;
-  size_t __chash;
-  if (__bc != 0) {
-    __chash = std::__constrain_hash(__hash, __bc);
-    __nd    = __bucket_list_[__chash];
-    if (__nd != nullptr) {
-      for (__nd = __nd->__next_;
-           __nd != nullptr && (__nd->__hash() == __hash || std::__constrain_hash(__nd->__hash(), __bc) == __chash);
-           __nd = __nd->__next_) {
-        if ((__nd->__hash() == __hash) && key_eq()(__nd->__upcast()->__get_value(), __k))
-          goto __done;
-      }
-    }
-  }
-  {
-    __node_holder __h = __construct_node_hash(__hash, std::forward<_Args>(__args)...);
-    if (size() + 1 > __bc * max_load_factor() || __bc == 0) {
-      __rehash_unique(std::max<size_type>(
-          2 * __bc + !std::__is_hash_power2(__bc), size_type(__math::ceil(float(size() + 1) / max_load_factor()))));
-      __bc    = bucket_count();
-      __chash = std::__constrain_hash(__hash, __bc);
-    }
-    // insert_after __bucket_list_[__chash], or __first_node if bucket is null
-    __next_pointer __pn = __bucket_list_[__chash];
-    if (__pn == nullptr) {
-      __pn          = __first_node_.__ptr();
-      __h->__next_  = __pn->__next_;
-      __pn->__next_ = __h.get()->__ptr();
-      // fix up __bucket_list_
-      __bucket_list_[__chash] = __pn;
-      if (__h->__next_ != nullptr)
-        __bucket_list_[std::__constrain_hash(__h->__next_->__hash(), __bc)] = __h.get()->__ptr();
-    } else {
-      __h->__next_  = __pn->__next_;
-      __pn->__next_ = static_cast<__next_pointer>(__h.get());
-    }
-    __nd = static_cast<__next_pointer>(__h.release());
-    // increment size
-    ++size();
-    __inserted = true;
-  }
-__done:
-  return pair<iterator, bool>(iterator(__nd), __inserted);
-}
-
-template <class _Tp, class _Hash, class _Equal, class _Alloc>
-template <class... _Args>
-pair<typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator, bool>
-__hash_table<_Tp, _Hash, _Equal, _Alloc>::__emplace_unique_impl(_Args&&... __args) {
-  __node_holder __h        = __construct_node(std::forward<_Args>(__args)...);
-  pair<iterator, bool> __r = __node_insert_unique(__h.get());
-  if (__r.second)
-    __h.release();
-  return __r;
-}
-
 template <class _Tp, class _Hash, class _Equal, class _Alloc>
 template <class... _Args>
 typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator
@@ -1764,41 +1728,45 @@ void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__rehash(size_type __n) _LIBCPP_D
 
 template <class _Tp, class _Hash, class _Equal, class _Alloc>
 template <bool _UniqueKeys>
-void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__do_rehash(size_type __nbc) {
-  __pointer_allocator& __npa = __bucket_list_.get_deleter().__alloc();
-  __bucket_list_.reset(__nbc > 0 ? __pointer_alloc_traits::allocate(__npa, __nbc) : nullptr);
-  __bucket_list_.get_deleter().size() = __nbc;
-  if (__nbc > 0) {
-    for (size_type __i = 0; __i < __nbc; ++__i)
-      __bucket_list_[__i] = nullptr;
-    __next_pointer __pp = __first_node_.__ptr();
-    __next_pointer __cp = __pp->__next_;
-    if (__cp != nullptr) {
-      size_type __chash       = std::__constrain_hash(__cp->__hash(), __nbc);
-      __bucket_list_[__chash] = __pp;
-      size_type __phash       = __chash;
-      for (__pp = __cp, void(), __cp = __cp->__next_; __cp != nullptr; __cp = __pp->__next_) {
-        __chash = std::__constrain_hash(__cp->__hash(), __nbc);
-        if (__chash == __phash)
-          __pp = __cp;
-        else {
-          if (__bucket_list_[__chash] == nullptr) {
-            __bucket_list_[__chash] = __pp;
-            __pp                    = __cp;
-            __phash                 = __chash;
-          } else {
-            __next_pointer __np = __cp;
-            if _LIBCPP_CONSTEXPR_SINCE_CXX17 (!_UniqueKeys) {
-              for (; __np->__next_ != nullptr &&
-                     key_eq()(__cp->__upcast()->__get_value(), __np->__next_->__upcast()->__get_value());
-                   __np = __np->__next_)
-                ;
-            }
-            __pp->__next_                    = __np->__next_;
-            __np->__next_                    = __bucket_list_[__chash]->__next_;
-            __bucket_list_[__chash]->__next_ = __cp;
-          }
+void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__do_rehash(size_type __bucket_count) {
+  __pointer_allocator& __ptr_alloc = __bucket_list_.get_deleter().__alloc();
+  __bucket_list_.reset(__bucket_count > 0 ? __pointer_alloc_traits::allocate(__ptr_alloc, __bucket_count) : nullptr);
+  __bucket_list_.get_deleter().size() = __bucket_count;
+
+  if (__bucket_count == 0)
+    return;
+
+  for (size_type __i = 0; __i < __bucket_count; ++__i)
+    __bucket_list_[__i] = nullptr;
+  __next_pointer __pp = __first_node_.__ptr();
+  __next_pointer __cp = __pp->__next_;
+
+  if (!__cp)
+    return;
+
+  size_type __chash       = std::__constrain_hash(__cp->__hash(), __bucket_count);
+  __bucket_list_[__chash] = __pp;
+  size_type __phash       = __chash;
+  for (__pp = __cp, void(), __cp = __cp->__next_; __cp != nullptr; __cp = __pp->__next_) {
+    __chash = std::__constrain_hash(__cp->__hash(), __bucket_count);
+    if (__chash == __phash)
+      __pp = __cp;
+    else {
+      if (__bucket_list_[__chash] == nullptr) {
+        __bucket_list_[__chash] = __pp;
+        __pp                    = __cp;
+        __phash                 = __chash;
+      } else {
+        __next_pointer __np = __cp;
+        if _LIBCPP_CONSTEXPR (!_UniqueKeys) {
+          for (; __np->__next_ != nullptr &&
+                 key_eq()(__cp->__upcast()->__get_value(), __np->__next_->__upcast()->__get_value());
+               __np = __np->__next_)
+            ;
         }
+        __pp->__next_                    = __np->__next_;
+        __np->__next_                    = __bucket_list_[__chash]->__next_;
+        __bucket_list_[__chash]->__next_ = __cp;
       }
     }
   }
@@ -1854,16 +1822,13 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__construct_node(_Args&&... __args) {
   __node_allocator& __na = __node_alloc();
   __node_holder __h(__node_traits::allocate(__na, 1), _Dp(__na));
 
-  // Begin the lifetime of the node itself. Note that this doesn't begin the lifetime of the value
-  // held inside the node, since we need to use the allocator's construct() method for that.
+  // Begin the lifetime of the node itself and the value_type contained within.
   //
   // We don't use the allocator's construct() method to construct the node itself since the
   // Cpp17FooInsertable named requirements don't require the allocator's construct() method
   // to work on anything other than the value_type.
-  std::__construct_at(std::addressof(*__h), /* next = */ nullptr, /* hash = */ 0);
+  std::__construct_at(std::addressof(*__h), /* hash = */ 0, __na, std::forward<_Args>(__args)...);
 
-  // Now construct the value_type using the allocator's construct() method.
-  __node_traits::construct(__na, _NodeTypes::__get_ptr(__h->__get_value()), std::forward<_Args>(__args)...);
   __h.get_deleter().__value_constructed = true;
 
   __h->__hash_ = hash_function()(__h->__get_value());
@@ -1871,15 +1836,13 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__construct_node(_Args&&... __args) {
 }
 
 template <class _Tp, class _Hash, class _Equal, class _Alloc>
-template <class _First, class... _Rest>
+template <class... _Args>
 typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_holder
-__hash_table<_Tp, _Hash, _Equal, _Alloc>::__construct_node_hash(size_t __hash, _First&& __f, _Rest&&... __rest) {
-  static_assert(!__is_hash_value_type<_First, _Rest...>::value, "Construct cannot be called with a hash value type");
+__hash_table<_Tp, _Hash, _Equal, _Alloc>::__construct_node_hash(size_t __hash, _Args&&... __args) {
+  static_assert(!__is_hash_value_type<_Args...>::value, "Construct cannot be called with a hash value type");
   __node_allocator& __na = __node_alloc();
   __node_holder __h(__node_traits::allocate(__na, 1), _Dp(__na));
-  std::__construct_at(std::addressof(*__h), /* next = */ nullptr, /* hash = */ __hash);
-  __node_traits::construct(
-      __na, _NodeTypes::__get_ptr(__h->__get_value()), std::forward<_First>(__f), std::forward<_Rest>(__rest)...);
+  std::__construct_at(std::addressof(*__h), /* hash = */ __hash, __na, std::forward<_Args>(__args)...);
   __h.get_deleter().__value_constructed = true;
   return __h;
 }
@@ -1899,12 +1862,63 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::erase(const_iterator __p) {
 template <class _Tp, class _Hash, class _Equal, class _Alloc>
 typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator
 __hash_table<_Tp, _Hash, _Equal, _Alloc>::erase(const_iterator __first, const_iterator __last) {
-  for (const_iterator __p = __first; __first != __last; __p = __first) {
-    ++__first;
-    erase(__p);
+  if (__first == __last)
+    return iterator(__last.__node_);
+
+  // current node
+  __next_pointer __current = __first.__node_;
+  size_type __bucket_count = bucket_count();
+  size_t __chash           = std::__constrain_hash(__current->__hash(), __bucket_count);
+  // find previous node
+  __next_pointer __before_first = __bucket_list_[__chash];
+  for (; __before_first->__next_ != __current; __before_first = __before_first->__next_)
+    ;
+
+  __next_pointer __last_node = __last.__node_;
+
+  // If __before_first is in the same bucket (i.e. the first element we erase is not the first in the bucket), clear
+  // this bucket first without re-linking it
+  if (__before_first != __first_node_.__ptr() &&
+      std::__constrain_hash(__before_first->__hash(), __bucket_count) == __chash) {
+    while (__current != __last_node) {
+      auto __next = __current->__next_;
+      __deallocate_node(__current->__upcast());
+      __current = __next;
+      --__size_;
+
+      if (__next) {
+        if (auto __next_chash = std::__constrain_hash(__next->__hash(), __bucket_count); __next_chash != __chash) {
+          __bucket_list_[__next_chash] = __before_first;
+          __chash                      = __next_chash;
+          break;
+        }
+      }
+    }
   }
-  __next_pointer __np = __last.__node_;
-  return iterator(__np);
+
+  while (__current != __last_node) {
+    auto __next = __current->__next_;
+    __deallocate_node(__current->__upcast());
+    __current = __next;
+    --__size_;
+
+    // When switching buckets, set the old bucket to be empty and update the next bucket to have __before_first as its
+    // before-first element
+    if (__next) {
+      if (auto __next_chash = std::__constrain_hash(__next->__hash(), __bucket_count); __next_chash != __chash) {
+        __bucket_list_[__chash]      = nullptr;
+        __bucket_list_[__next_chash] = __before_first;
+        __chash                      = __next_chash;
+      }
+    } else { // When __next is a nullptr we've fully erased the last bucket. Update the bucket list accordingly.
+      __bucket_list_[__chash] = nullptr;
+    }
+  }
+
+  // re-link __before_first with __last
+  __before_first->__next_ = __current;
+
+  return iterator(__last.__node_);
 }
 
 template <class _Tp, class _Hash, class _Equal, class _Alloc>
diff --git a/lib/libcxx/include/__ios/fpos.h b/lib/libcxx/include/__ios/fpos.h
index e5c21b4391..af114421c8 100644
--- a/lib/libcxx/include/__ios/fpos.h
+++ b/lib/libcxx/include/__ios/fpos.h
@@ -30,7 +30,7 @@ public:
 
   _LIBCPP_HIDE_FROM_ABI operator streamoff() const { return __off_; }
 
-  _LIBCPP_HIDE_FROM_ABI _StateT state() const { return __st_; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _StateT state() const { return __st_; }
   _LIBCPP_HIDE_FROM_ABI void state(_StateT __st) { __st_ = __st; }
 
   _LIBCPP_HIDE_FROM_ABI fpos& operator+=(streamoff __off) {
@@ -38,7 +38,7 @@ public:
     return *this;
   }
 
-  _LIBCPP_HIDE_FROM_ABI fpos operator+(streamoff __off) const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI fpos operator+(streamoff __off) const {
     fpos __t(*this);
     __t += __off;
     return __t;
@@ -49,7 +49,7 @@ public:
     return *this;
   }
 
-  _LIBCPP_HIDE_FROM_ABI fpos operator-(streamoff __off) const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI fpos operator-(streamoff __off) const {
     fpos __t(*this);
     __t -= __off;
     return __t;
@@ -57,7 +57,7 @@ public:
 };
 
 template <class _StateT>
-inline _LIBCPP_HIDE_FROM_ABI streamoff operator-(const fpos<_StateT>& __x, const fpos<_StateT>& __y) {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI streamoff operator-(const fpos<_StateT>& __x, const fpos<_StateT>& __y) {
   return streamoff(__x) - streamoff(__y);
 }
 
diff --git a/lib/libcxx/include/__iterator/back_insert_iterator.h b/lib/libcxx/include/__iterator/back_insert_iterator.h
index 3a11fae4cb..d051c08751 100644
--- a/lib/libcxx/include/__iterator/back_insert_iterator.h
+++ b/lib/libcxx/include/__iterator/back_insert_iterator.h
@@ -26,15 +26,9 @@ _LIBCPP_PUSH_MACROS
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-_LIBCPP_SUPPRESS_DEPRECATED_PUSH
 template <class _Container>
 class back_insert_iterator
-#if _LIBCPP_STD_VER <= 14 || !defined(_LIBCPP_ABI_NO_ITERATOR_BASES)
-    : public iterator<output_iterator_tag, void, void, void, void>
-#endif
-{
-  _LIBCPP_SUPPRESS_DEPRECATED_POP
-
+    : public __iterator_base<back_insert_iterator<_Container>, output_iterator_tag, void, void, void, void> {
 protected:
   _Container* container;
 
diff --git a/lib/libcxx/include/__iterator/bounded_iter.h b/lib/libcxx/include/__iterator/bounded_iter.h
index d12750d1f8..d2a0906112 100644
--- a/lib/libcxx/include/__iterator/bounded_iter.h
+++ b/lib/libcxx/include/__iterator/bounded_iter.h
@@ -74,12 +74,12 @@ struct __bounded_iter {
   _LIBCPP_HIDE_FROM_ABI __bounded_iter(__bounded_iter const&) = default;
   _LIBCPP_HIDE_FROM_ABI __bounded_iter(__bounded_iter&&)      = default;
 
-  template < class _OtherIterator,
-             __enable_if_t<
-                 _And< is_convertible<const _OtherIterator&, _Iterator>,
-                       _Or<is_same<reference, __iter_reference<_OtherIterator> >,
-                           is_same<reference, __make_const_lvalue_ref<__iter_reference<_OtherIterator> > > > >::value,
-                 int> = 0>
+  template <class _OtherIterator,
+            __enable_if_t<
+                _And<is_convertible<const _OtherIterator&, _Iterator>,
+                     _Or<is_same<reference, __iterator_reference<_OtherIterator> >,
+                         is_same<reference, __make_const_lvalue_ref<__iterator_reference<_OtherIterator> > > > >::value,
+                int> = 0>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR __bounded_iter(__bounded_iter<_OtherIterator> const& __other) _NOEXCEPT
       : __current_(__other.__current_),
         __begin_(__other.__begin_),
@@ -116,8 +116,7 @@ public:
   // These operations check that the iterator is dereferenceable. Since the class invariant is
   // that the iterator is always within `[begin, end]`, we only need to check it's not pointing to
   // `end`. This is easier for the optimizer because it aligns with the `iter != container.end()`
-  // checks that typical callers already use (see
-  // https://github.com/llvm/llvm-project/issues/78829).
+  // checks that typical callers already use (see https://llvm.org/PR78829).
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 reference operator*() const _NOEXCEPT {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(
         __current_ != __end_, "__bounded_iter::operator*: Attempt to dereference an iterator at the end");
diff --git a/lib/libcxx/include/__iterator/concepts.h b/lib/libcxx/include/__iterator/concepts.h
index 20a1ab4691..3b43920443 100644
--- a/lib/libcxx/include/__iterator/concepts.h
+++ b/lib/libcxx/include/__iterator/concepts.h
@@ -117,15 +117,12 @@ template <class _Tp>
 concept __signed_integer_like = signed_integral<_Tp>;
 
 template <class _Ip>
-concept weakly_incrementable =
-    // TODO: remove this once the clang bug is fixed (bugs.llvm.org/PR48173).
-    !same_as<_Ip, bool> && // Currently, clang does not handle bool correctly.
-    movable<_Ip> && requires(_Ip __i) {
-      typename iter_difference_t<_Ip>;
-      requires __signed_integer_like<iter_difference_t<_Ip>>;
-      { ++__i } -> same_as<_Ip&>; // not required to be equality-preserving
-      __i++;                      // not required to be equality-preserving
-    };
+concept weakly_incrementable = movable<_Ip> && requires(_Ip __i) {
+  typename iter_difference_t<_Ip>;
+  requires __signed_integer_like<iter_difference_t<_Ip>>;
+  { ++__i } -> same_as<_Ip&>; // not required to be equality-preserving
+  __i++;                      // not required to be equality-preserving
+};
 
 // [iterator.concept.inc]
 template <class _Ip>
diff --git a/lib/libcxx/include/__iterator/cpp17_iterator_concepts.h b/lib/libcxx/include/__iterator/cpp17_iterator_concepts.h
index ba3536b686..ecd30d8e11 100644
--- a/lib/libcxx/include/__iterator/cpp17_iterator_concepts.h
+++ b/lib/libcxx/include/__iterator/cpp17_iterator_concepts.h
@@ -68,7 +68,8 @@ concept __cpp17_default_constructible = is_default_constructible_v<_Tp>;
 template <class _Iter>
 concept __cpp17_iterator =
     __cpp17_copy_constructible<_Iter> && __cpp17_copy_assignable<_Iter> && __cpp17_destructible<_Iter> &&
-    (is_signed_v<__iter_diff_t<_Iter>> || is_void_v<__iter_diff_t<_Iter>>) && requires(_Iter __iter) {
+    (is_signed_v<__iterator_difference_type<_Iter>> || is_void_v<__iterator_difference_type<_Iter>>) &&
+    requires(_Iter __iter) {
       { *__iter };
       { ++__iter } -> same_as<_Iter&>;
     };
@@ -81,8 +82,8 @@ concept __cpp17_input_iterator =
       { __lhs != std::as_const(__rhs) } -> __boolean_testable;
       { std::as_const(__lhs) != std::as_const(__rhs) } -> __boolean_testable;
 
-      { *__lhs } -> same_as<__iter_reference<_Iter>>;
-      { *std::as_const(__lhs) } -> same_as<__iter_reference<_Iter>>;
+      { *__lhs } -> same_as<__iterator_reference<_Iter>>;
+      { *std::as_const(__lhs) } -> same_as<__iterator_reference<_Iter>>;
 
       { ++__lhs } -> same_as<_Iter&>;
       { (void)__lhs++ };
@@ -101,19 +102,19 @@ template <class _Iter>
 concept __cpp17_forward_iterator =
     __cpp17_input_iterator<_Iter> && __cpp17_default_constructible<_Iter> && requires(_Iter __iter) {
       { __iter++ } -> convertible_to<const _Iter&>;
-      { *__iter++ } -> same_as<__iter_reference<_Iter>>;
+      { *__iter++ } -> same_as<__iterator_reference<_Iter>>;
     };
 
 template <class _Iter>
 concept __cpp17_bidirectional_iterator = __cpp17_forward_iterator<_Iter> && requires(_Iter __iter) {
   { --__iter } -> same_as<_Iter&>;
   { __iter-- } -> convertible_to<const _Iter&>;
-  { *__iter-- } -> same_as<__iter_reference<_Iter>>;
+  { *__iter-- } -> same_as<__iterator_reference<_Iter>>;
 };
 
 template <class _Iter>
 concept __cpp17_random_access_iterator =
-    __cpp17_bidirectional_iterator<_Iter> && requires(_Iter __iter, __iter_diff_t<_Iter> __n) {
+    __cpp17_bidirectional_iterator<_Iter> && requires(_Iter __iter, __iterator_difference_type<_Iter> __n) {
       { __iter += __n } -> same_as<_Iter&>;
 
       { __iter + __n } -> same_as<_Iter>;
@@ -125,13 +126,13 @@ concept __cpp17_random_access_iterator =
       { __iter - __n } -> same_as<_Iter>;
       { std::as_const(__iter) - __n } -> same_as<_Iter>;
 
-      { __iter - __iter } -> same_as<__iter_diff_t<_Iter>>;
-      { std::as_const(__iter) - __iter } -> same_as<__iter_diff_t<_Iter>>;
-      { __iter - std::as_const(__iter) } -> same_as<__iter_diff_t<_Iter>>;
-      { std::as_const(__iter) - std::as_const(__iter) } -> same_as<__iter_diff_t<_Iter>>;
+      { __iter - __iter } -> same_as<__iterator_difference_type<_Iter>>;
+      { std::as_const(__iter) - __iter } -> same_as<__iterator_difference_type<_Iter>>;
+      { __iter - std::as_const(__iter) } -> same_as<__iterator_difference_type<_Iter>>;
+      { std::as_const(__iter) - std::as_const(__iter) } -> same_as<__iterator_difference_type<_Iter>>;
 
-      { __iter[__n] } -> convertible_to<__iter_reference<_Iter>>;
-      { std::as_const(__iter)[__n] } -> convertible_to<__iter_reference<_Iter>>;
+      { __iter[__n] } -> convertible_to<__iterator_reference<_Iter>>;
+      { std::as_const(__iter)[__n] } -> convertible_to<__iterator_reference<_Iter>>;
 
       { __iter < __iter } -> __boolean_testable;
       { std::as_const(__iter) < __iter } -> __boolean_testable;
diff --git a/lib/libcxx/include/__iterator/distance.h b/lib/libcxx/include/__iterator/distance.h
index 1732aa527f..1a9fbf27f7 100644
--- a/lib/libcxx/include/__iterator/distance.h
+++ b/lib/libcxx/include/__iterator/distance.h
@@ -10,41 +10,66 @@
 #ifndef _LIBCPP___ITERATOR_DISTANCE_H
 #define _LIBCPP___ITERATOR_DISTANCE_H
 
+#include <__algorithm/for_each_segment.h>
+#include <__concepts/same_as.h>
 #include <__config>
 #include <__iterator/concepts.h>
 #include <__iterator/incrementable_traits.h>
 #include <__iterator/iterator_traits.h>
+#include <__iterator/segmented_iterator.h>
 #include <__ranges/access.h>
 #include <__ranges/concepts.h>
 #include <__ranges/size.h>
 #include <__type_traits/decay.h>
+#include <__type_traits/enable_if.h>
 #include <__type_traits/remove_cvref.h>
+#include <__utility/move.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
 #endif
 
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-template <class _InputIter>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 typename iterator_traits<_InputIter>::difference_type
-__distance(_InputIter __first, _InputIter __last, input_iterator_tag) {
-  typename iterator_traits<_InputIter>::difference_type __r(0);
-  for (; __first != __last; ++__first)
-    ++__r;
-  return __r;
+#if _LIBCPP_STD_VER >= 20
+template <class _Iter>
+using __iter_distance_t _LIBCPP_NODEBUG = std::iter_difference_t<_Iter>;
+#else
+template <class _Iter>
+using __iter_distance_t _LIBCPP_NODEBUG = typename iterator_traits<_Iter>::difference_type;
+#endif
+
+template <class _RandIter, __enable_if_t<__has_random_access_iterator_category<_RandIter>::value, int> = 0>
+inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 __iter_distance_t<_RandIter>
+__distance(_RandIter __first, _RandIter __last) {
+  return __last - __first;
 }
 
-template <class _RandIter>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 typename iterator_traits<_RandIter>::difference_type
-__distance(_RandIter __first, _RandIter __last, random_access_iterator_tag) {
-  return __last - __first;
+template <class _InputIter, class _Sent>
+inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 __iter_distance_t<_InputIter>
+__distance(_InputIter __first, _Sent __last) {
+  __iter_distance_t<_InputIter> __r(0);
+#if _LIBCPP_STD_VER >= 20
+  if constexpr (same_as<_InputIter, _Sent> && __is_segmented_iterator_v<_InputIter>) {
+    std::__for_each_segment(__first, __last, [&__r](auto __lfirst, auto __llast) {
+      __r += std::__distance(__lfirst, __llast);
+    });
+  } else
+#endif
+  {
+    for (; __first != __last; ++__first)
+      ++__r;
+  }
+  return __r;
 }
 
 template <class _InputIter>
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 typename iterator_traits<_InputIter>::difference_type
 distance(_InputIter __first, _InputIter __last) {
-  return std::__distance(__first, __last, typename iterator_traits<_InputIter>::iterator_category());
+  return std::__distance(__first, __last);
 }
 
 #if _LIBCPP_STD_VER >= 20
@@ -56,12 +81,7 @@ struct __distance {
   template <class _Ip, sentinel_for<_Ip> _Sp>
     requires(!sized_sentinel_for<_Sp, _Ip>)
   _LIBCPP_HIDE_FROM_ABI constexpr iter_difference_t<_Ip> operator()(_Ip __first, _Sp __last) const {
-    iter_difference_t<_Ip> __n = 0;
-    while (__first != __last) {
-      ++__first;
-      ++__n;
-    }
-    return __n;
+    return std::__distance(std::move(__first), std::move(__last));
   }
 
   template <class _Ip, sized_sentinel_for<decay_t<_Ip>> _Sp>
@@ -92,4 +112,6 @@ inline constexpr auto distance = __distance{};
 
 _LIBCPP_END_NAMESPACE_STD
 
+_LIBCPP_POP_MACROS
+
 #endif // _LIBCPP___ITERATOR_DISTANCE_H
diff --git a/lib/libcxx/include/__iterator/front_insert_iterator.h b/lib/libcxx/include/__iterator/front_insert_iterator.h
index d79c4d78b6..2ab5383a1d 100644
--- a/lib/libcxx/include/__iterator/front_insert_iterator.h
+++ b/lib/libcxx/include/__iterator/front_insert_iterator.h
@@ -26,15 +26,9 @@ _LIBCPP_PUSH_MACROS
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-_LIBCPP_SUPPRESS_DEPRECATED_PUSH
 template <class _Container>
 class front_insert_iterator
-#if _LIBCPP_STD_VER <= 14 || !defined(_LIBCPP_ABI_NO_ITERATOR_BASES)
-    : public iterator<output_iterator_tag, void, void, void, void>
-#endif
-{
-  _LIBCPP_SUPPRESS_DEPRECATED_POP
-
+    : public __iterator_base<front_insert_iterator<_Container>, output_iterator_tag, void, void, void, void> {
 protected:
   _Container* container;
 
diff --git a/lib/libcxx/include/__iterator/insert_iterator.h b/lib/libcxx/include/__iterator/insert_iterator.h
index 95768cb8e0..6a5818b478 100644
--- a/lib/libcxx/include/__iterator/insert_iterator.h
+++ b/lib/libcxx/include/__iterator/insert_iterator.h
@@ -35,15 +35,9 @@ template <class _Container>
 using __insert_iterator_iter_t _LIBCPP_NODEBUG = typename _Container::iterator;
 #endif
 
-_LIBCPP_SUPPRESS_DEPRECATED_PUSH
 template <class _Container>
 class insert_iterator
-#if _LIBCPP_STD_VER <= 14 || !defined(_LIBCPP_ABI_NO_ITERATOR_BASES)
-    : public iterator<output_iterator_tag, void, void, void, void>
-#endif
-{
-  _LIBCPP_SUPPRESS_DEPRECATED_POP
-
+    : public __iterator_base<insert_iterator<_Container>, output_iterator_tag, void, void, void, void> {
 protected:
   _Container* container;
   __insert_iterator_iter_t<_Container> iter;
diff --git a/lib/libcxx/include/__iterator/istream_iterator.h b/lib/libcxx/include/__iterator/istream_iterator.h
index cdb8056cfe..f4b13f09c7 100644
--- a/lib/libcxx/include/__iterator/istream_iterator.h
+++ b/lib/libcxx/include/__iterator/istream_iterator.h
@@ -25,15 +25,14 @@
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-_LIBCPP_SUPPRESS_DEPRECATED_PUSH
 template <class _Tp, class _CharT = char, class _Traits = char_traits<_CharT>, class _Distance = ptrdiff_t>
 class istream_iterator
-#if _LIBCPP_STD_VER <= 14 || !defined(_LIBCPP_ABI_NO_ITERATOR_BASES)
-    : public iterator<input_iterator_tag, _Tp, _Distance, const _Tp*, const _Tp&>
-#endif
-{
-  _LIBCPP_SUPPRESS_DEPRECATED_POP
-
+    : public __iterator_base<istream_iterator<_Tp, _CharT, _Traits, _Distance>,
+                             input_iterator_tag,
+                             _Tp,
+                             _Distance,
+                             const _Tp*,
+                             const _Tp&> {
 public:
   typedef input_iterator_tag iterator_category;
   typedef _Tp value_type;
diff --git a/lib/libcxx/include/__iterator/istreambuf_iterator.h b/lib/libcxx/include/__iterator/istreambuf_iterator.h
index b7b28cd1a0..4fc87a84f0 100644
--- a/lib/libcxx/include/__iterator/istreambuf_iterator.h
+++ b/lib/libcxx/include/__iterator/istreambuf_iterator.h
@@ -25,15 +25,14 @@
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-_LIBCPP_SUPPRESS_DEPRECATED_PUSH
 template <class _CharT, class _Traits>
 class istreambuf_iterator
-#if _LIBCPP_STD_VER <= 14 || !defined(_LIBCPP_ABI_NO_ITERATOR_BASES)
-    : public iterator<input_iterator_tag, _CharT, typename _Traits::off_type, _CharT*, _CharT>
-#endif
-{
-  _LIBCPP_SUPPRESS_DEPRECATED_POP
-
+    : public __iterator_base<istreambuf_iterator<_CharT, _Traits>,
+                             input_iterator_tag,
+                             _CharT,
+                             typename _Traits::off_type,
+                             _CharT*,
+                             _CharT> {
 public:
   typedef input_iterator_tag iterator_category;
   typedef _CharT value_type;
diff --git a/lib/libcxx/include/__iterator/iter_move.h b/lib/libcxx/include/__iterator/iter_move.h
index 5cc1615259..a726b6e329 100644
--- a/lib/libcxx/include/__iterator/iter_move.h
+++ b/lib/libcxx/include/__iterator/iter_move.h
@@ -40,7 +40,7 @@ void iter_move() = delete;
 
 template <class _Tp>
 concept __unqualified_iter_move = __class_or_enum<remove_cvref_t<_Tp>> && requires(_Tp&& __t) {
-  // NOLINTNEXTLINE(libcpp-robust-against-adl) iter_swap ADL calls should only be made through ranges::iter_swap
+  // NOLINTNEXTLINE(libcpp-robust-against-adl) iter_move ADL calls should only be made through ranges::iter_move
   iter_move(std::forward<_Tp>(__t));
 };
 
diff --git a/lib/libcxx/include/__iterator/iterator.h b/lib/libcxx/include/__iterator/iterator.h
index d7fcd8c4dd..c599f61797 100644
--- a/lib/libcxx/include/__iterator/iterator.h
+++ b/lib/libcxx/include/__iterator/iterator.h
@@ -28,6 +28,19 @@ struct _LIBCPP_DEPRECATED_IN_CXX17 iterator {
   typedef _Category iterator_category;
 };
 
+_LIBCPP_SUPPRESS_DEPRECATED_PUSH
+#ifdef _LIBCPP_ABI_NO_ITERATOR_BASES
+template <class _Derived>
+struct __no_iterator_base {};
+
+template <class _Derived, class _Category, class _Tp, class _Distance, class _Pointer, class _Reference>
+using __iterator_base _LIBCPP_NODEBUG = __no_iterator_base<_Derived>;
+#else
+template <class _Derived, class _Category, class _Tp, class _Distance, class _Pointer, class _Reference>
+using __iterator_base _LIBCPP_NODEBUG = iterator<_Category, _Tp, _Distance, _Pointer, _Reference>;
+#endif
+_LIBCPP_SUPPRESS_DEPRECATED_POP
+
 _LIBCPP_END_NAMESPACE_STD
 
 #endif // _LIBCPP___ITERATOR_ITERATOR_H
diff --git a/lib/libcxx/include/__iterator/iterator_traits.h b/lib/libcxx/include/__iterator/iterator_traits.h
index f727e8ff36..ebf315a53b 100644
--- a/lib/libcxx/include/__iterator/iterator_traits.h
+++ b/lib/libcxx/include/__iterator/iterator_traits.h
@@ -420,44 +420,43 @@ using __has_exactly_bidirectional_iterator_category _LIBCPP_NODEBUG =
                           !__has_iterator_category_convertible_to<_Tp, random_access_iterator_tag>::value>;
 
 template <class _InputIterator>
-using __iter_value_type _LIBCPP_NODEBUG = typename iterator_traits<_InputIterator>::value_type;
+using __iterator_value_type _LIBCPP_NODEBUG = typename iterator_traits<_InputIterator>::value_type;
 
 #if _LIBCPP_STD_VER >= 23
 template <class _InputIterator>
-using __iter_key_type _LIBCPP_NODEBUG = remove_const_t<tuple_element_t<0, __iter_value_type<_InputIterator>>>;
+using __iter_key_type _LIBCPP_NODEBUG = remove_const_t<tuple_element_t<0, __iterator_value_type<_InputIterator>>>;
 
 template <class _InputIterator>
-using __iter_mapped_type _LIBCPP_NODEBUG = tuple_element_t<1, __iter_value_type<_InputIterator>>;
+using __iter_mapped_type _LIBCPP_NODEBUG = tuple_element_t<1, __iterator_value_type<_InputIterator>>;
 
 template <class _InputIterator>
 using __iter_to_alloc_type _LIBCPP_NODEBUG =
-    pair<const tuple_element_t<0, __iter_value_type<_InputIterator>>,
-         tuple_element_t<1, __iter_value_type<_InputIterator>>>;
+    pair<const tuple_element_t<0, __iterator_value_type<_InputIterator>>,
+         tuple_element_t<1, __iterator_value_type<_InputIterator>>>;
 #else
 template <class _InputIterator>
-using __iter_key_type _LIBCPP_NODEBUG =
-    __remove_const_t<typename iterator_traits<_InputIterator>::value_type::first_type>;
+using __iter_key_type _LIBCPP_NODEBUG = __remove_const_t<typename __iterator_value_type<_InputIterator>::first_type>;
 
 template <class _InputIterator>
-using __iter_mapped_type _LIBCPP_NODEBUG = typename iterator_traits<_InputIterator>::value_type::second_type;
+using __iter_mapped_type _LIBCPP_NODEBUG = typename __iterator_value_type<_InputIterator>::second_type;
 
 template <class _InputIterator>
 using __iter_to_alloc_type _LIBCPP_NODEBUG =
-    pair<const typename iterator_traits<_InputIterator>::value_type::first_type,
-         typename iterator_traits<_InputIterator>::value_type::second_type>;
+    pair<const typename __iterator_value_type<_InputIterator>::first_type,
+         typename __iterator_value_type<_InputIterator>::second_type>;
 #endif // _LIBCPP_STD_VER >= 23
 
 template <class _Iter>
-using __iterator_category_type _LIBCPP_NODEBUG = typename iterator_traits<_Iter>::iterator_category;
+using __iterator_iterator_category _LIBCPP_NODEBUG = typename iterator_traits<_Iter>::iterator_category;
 
 template <class _Iter>
-using __iterator_pointer_type _LIBCPP_NODEBUG = typename iterator_traits<_Iter>::pointer;
+using __iterator_pointer _LIBCPP_NODEBUG = typename iterator_traits<_Iter>::pointer;
 
 template <class _Iter>
-using __iter_diff_t _LIBCPP_NODEBUG = typename iterator_traits<_Iter>::difference_type;
+using __iterator_difference_type _LIBCPP_NODEBUG = typename iterator_traits<_Iter>::difference_type;
 
 template <class _Iter>
-using __iter_reference _LIBCPP_NODEBUG = typename iterator_traits<_Iter>::reference;
+using __iterator_reference _LIBCPP_NODEBUG = typename iterator_traits<_Iter>::reference;
 
 #if _LIBCPP_STD_VER >= 20
 
diff --git a/lib/libcxx/include/__iterator/ostream_iterator.h b/lib/libcxx/include/__iterator/ostream_iterator.h
index 2b459f4628..64e79f010f 100644
--- a/lib/libcxx/include/__iterator/ostream_iterator.h
+++ b/lib/libcxx/include/__iterator/ostream_iterator.h
@@ -24,15 +24,9 @@
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-_LIBCPP_SUPPRESS_DEPRECATED_PUSH
 template <class _Tp, class _CharT = char, class _Traits = char_traits<_CharT> >
 class ostream_iterator
-#if _LIBCPP_STD_VER <= 14 || !defined(_LIBCPP_ABI_NO_ITERATOR_BASES)
-    : public iterator<output_iterator_tag, void, void, void, void>
-#endif
-{
-  _LIBCPP_SUPPRESS_DEPRECATED_POP
-
+    : public __iterator_base<ostream_iterator<_Tp, _CharT, _Traits>, output_iterator_tag, void, void, void, void> {
 public:
   typedef output_iterator_tag iterator_category;
   typedef void value_type;
diff --git a/lib/libcxx/include/__iterator/ostreambuf_iterator.h b/lib/libcxx/include/__iterator/ostreambuf_iterator.h
index 7133331a7b..4a3b2fa024 100644
--- a/lib/libcxx/include/__iterator/ostreambuf_iterator.h
+++ b/lib/libcxx/include/__iterator/ostreambuf_iterator.h
@@ -25,15 +25,9 @@
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-_LIBCPP_SUPPRESS_DEPRECATED_PUSH
 template <class _CharT, class _Traits>
 class ostreambuf_iterator
-#if _LIBCPP_STD_VER <= 14 || !defined(_LIBCPP_ABI_NO_ITERATOR_BASES)
-    : public iterator<output_iterator_tag, void, void, void, void>
-#endif
-{
-  _LIBCPP_SUPPRESS_DEPRECATED_POP
-
+    : public __iterator_base<ostreambuf_iterator<_CharT, _Traits>, output_iterator_tag, void, void, void, void> {
 public:
   typedef output_iterator_tag iterator_category;
   typedef void value_type;
diff --git a/lib/libcxx/include/__iterator/reverse_iterator.h b/lib/libcxx/include/__iterator/reverse_iterator.h
index 8935e5a8ff..834695dd16 100644
--- a/lib/libcxx/include/__iterator/reverse_iterator.h
+++ b/lib/libcxx/include/__iterator/reverse_iterator.h
@@ -46,21 +46,16 @@
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-_LIBCPP_SUPPRESS_DEPRECATED_PUSH
 template <class _Iter>
 class reverse_iterator
-#if _LIBCPP_STD_VER <= 14 || !defined(_LIBCPP_ABI_NO_ITERATOR_BASES)
-    : public iterator<typename iterator_traits<_Iter>::iterator_category,
-                      typename iterator_traits<_Iter>::value_type,
-                      typename iterator_traits<_Iter>::difference_type,
-                      typename iterator_traits<_Iter>::pointer,
-                      typename iterator_traits<_Iter>::reference>
-#endif
-{
-  _LIBCPP_SUPPRESS_DEPRECATED_POP
-
+    : public __iterator_base<reverse_iterator<_Iter>,
+                             typename iterator_traits<_Iter>::iterator_category,
+                             typename iterator_traits<_Iter>::value_type,
+                             typename iterator_traits<_Iter>::difference_type,
+                             typename iterator_traits<_Iter>::pointer,
+                             typename iterator_traits<_Iter>::reference> {
 private:
-#ifndef _LIBCPP_ABI_NO_ITERATOR_BASES
+#ifndef _LIBCPP_ABI_NO_REVERSE_ITERATOR_SECOND_MEMBER
   _Iter __t_; // no longer used as of LWG #2360, not removed due to ABI break
 #endif
 
@@ -91,7 +86,7 @@ public:
   using reference       = typename iterator_traits<_Iter>::reference;
 #endif
 
-#ifndef _LIBCPP_ABI_NO_ITERATOR_BASES
+#ifndef _LIBCPP_ABI_NO_REVERSE_ITERATOR_SECOND_MEMBER
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 reverse_iterator() : __t_(), current() {}
 
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 explicit reverse_iterator(_Iter __x) : __t_(__x), current(__x) {}
diff --git a/lib/libcxx/include/__iterator/segmented_iterator.h b/lib/libcxx/include/__iterator/segmented_iterator.h
index af27a7be41..dc56a74013 100644
--- a/lib/libcxx/include/__iterator/segmented_iterator.h
+++ b/lib/libcxx/include/__iterator/segmented_iterator.h
@@ -67,18 +67,13 @@ struct __segmented_iterator_traits;
 */
 
 template <class _Tp, size_t = 0>
-struct __has_specialization : false_type {};
+inline const bool __has_specialization_v = false;
 
 template <class _Tp>
-struct __has_specialization<_Tp, sizeof(_Tp) * 0> : true_type {};
+inline const bool __has_specialization_v<_Tp, sizeof(_Tp) * 0> = true;
 
 template <class _Iterator>
-using __is_segmented_iterator _LIBCPP_NODEBUG = __has_specialization<__segmented_iterator_traits<_Iterator> >;
-
-template <class _SegmentedIterator>
-struct __has_random_access_local_iterator
-    : __has_random_access_iterator_category<
-          typename __segmented_iterator_traits< _SegmentedIterator >::__local_iterator > {};
+inline const bool __is_segmented_iterator_v = __has_specialization_v<__segmented_iterator_traits<_Iterator> >;
 
 _LIBCPP_END_NAMESPACE_STD
 
diff --git a/lib/libcxx/include/__iterator/static_bounded_iter.h b/lib/libcxx/include/__iterator/static_bounded_iter.h
index 8f4fbdf6df..d8fc7d185e 100644
--- a/lib/libcxx/include/__iterator/static_bounded_iter.h
+++ b/lib/libcxx/include/__iterator/static_bounded_iter.h
@@ -99,9 +99,9 @@ struct __static_bounded_iter {
 
   template <class _OtherIterator,
             __enable_if_t<
-                _And< is_convertible<const _OtherIterator&, _Iterator>,
-                      _Or<is_same<reference, __iter_reference<_OtherIterator> >,
-                          is_same<reference, __make_const_lvalue_ref<__iter_reference<_OtherIterator> > > > >::value,
+                _And<is_convertible<const _OtherIterator&, _Iterator>,
+                     _Or<is_same<reference, __iterator_reference<_OtherIterator> >,
+                         is_same<reference, __make_const_lvalue_ref<__iterator_reference<_OtherIterator> > > > >::value,
                 int> = 0>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR
   __static_bounded_iter(__static_bounded_iter<_OtherIterator, _Size> const& __other) _NOEXCEPT
diff --git a/lib/libcxx/include/__iterator/wrap_iter.h b/lib/libcxx/include/__iterator/wrap_iter.h
index 2b5bc489dd..98745f600a 100644
--- a/lib/libcxx/include/__iterator/wrap_iter.h
+++ b/lib/libcxx/include/__iterator/wrap_iter.h
@@ -49,12 +49,12 @@ private:
 
 public:
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 __wrap_iter() _NOEXCEPT : __i_() {}
-  template <
-      class _OtherIter,
-      __enable_if_t< _And< is_convertible<const _OtherIter&, _Iter>,
-                           _Or<is_same<reference, __iter_reference<_OtherIter> >,
-                               is_same<reference, __make_const_lvalue_ref<__iter_reference<_OtherIter> > > > >::value,
-                     int> = 0>
+  template <class _OtherIter,
+            __enable_if_t<
+                _And<is_convertible<const _OtherIter&, _Iter>,
+                     _Or<is_same<reference, __iterator_reference<_OtherIter> >,
+                         is_same<reference, __make_const_lvalue_ref<__iterator_reference<_OtherIter> > > > >::value,
+                int> = 0>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 __wrap_iter(const __wrap_iter<_OtherIter>& __u) _NOEXCEPT
       : __i_(__u.__i_) {}
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 reference operator*() const _NOEXCEPT { return *__i_; }
@@ -117,6 +117,8 @@ private:
   friend class span;
   template <class _Tp, size_t _Size>
   friend struct array;
+  template <class _Tp, class>
+  friend struct __optional_iterator;
 };
 
 template <class _Iter1>
diff --git a/lib/libcxx/include/__locale b/lib/libcxx/include/__locale
index 757a53951f..5b1787451f 100644
--- a/lib/libcxx/include/__locale
+++ b/lib/libcxx/include/__locale
@@ -57,9 +57,8 @@ _LIBCPP_HIDE_FROM_ABI const _Facet& use_facet(const locale&);
 class _LIBCPP_EXPORTED_FROM_ABI locale {
 public:
   // locale is essentially a shared_ptr that doesn't support weak_ptrs and never got a move constructor,
-  // so it is trivially relocatable. Like shared_ptr, it is also replaceable.
+  // so it is trivially relocatable.
   using __trivially_relocatable _LIBCPP_NODEBUG = locale;
-  using __replaceable _LIBCPP_NODEBUG           = locale;
 
   // types:
   class _LIBCPP_EXPORTED_FROM_ABI facet;
@@ -389,7 +388,7 @@ public:
   static const mask xdigit       = _ISXDIGIT;
   static const mask blank        = _ISBLANK;
   static const mask __regex_word = 0x8000;
-#  elif defined(_NEWLIB_VERSION)
+#  elif _LIBCPP_LIBC_NEWLIB
   // Same type as Newlib's _ctype_ array in newlib/libc/include/ctype.h.
   typedef char mask;
   // In case char is signed, static_cast is needed to avoid warning on
@@ -585,7 +584,7 @@ public:
 #  ifdef _CACHED_RUNES
   static const size_t table_size = _CACHED_RUNES;
 #  else
-  static const size_t table_size = 256; // FIXME: Don't hardcode this.
+  static const size_t table_size = 256;
 #  endif
   _LIBCPP_HIDE_FROM_ABI const mask* table() const _NOEXCEPT { return __tab_; }
   static const mask* classic_table() _NOEXCEPT;
@@ -1478,9 +1477,6 @@ public:
 
 protected:
   ~numpunct_byname() override;
-
-private:
-  void __init(const char*);
 };
 
 #  if _LIBCPP_HAS_WIDE_CHARACTERS
@@ -1495,9 +1491,6 @@ public:
 
 protected:
   ~numpunct_byname() override;
-
-private:
-  void __init(const char*);
 };
 #  endif // _LIBCPP_HAS_WIDE_CHARACTERS
 
diff --git a/lib/libcxx/include/__locale_dir/locale_base_api.h b/lib/libcxx/include/__locale_dir/locale_base_api.h
index 5e6c69e95e..0474c1db35 100644
--- a/lib/libcxx/include/__locale_dir/locale_base_api.h
+++ b/lib/libcxx/include/__locale_dir/locale_base_api.h
@@ -57,15 +57,11 @@
 //  float               __strtof(const char*, char**, __locale_t);
 //  double              __strtod(const char*, char**, __locale_t);
 //  long double         __strtold(const char*, char**, __locale_t);
-//  long long           __strtoll(const char*, char**, __locale_t);
-//  unsigned long long  __strtoull(const char*, char**, __locale_t);
 // }
 //
 // Character manipulation functions
 // --------------------------------
 // namespace __locale {
-//  int     __isdigit(int, __locale_t);  // required by the headers
-//  int     __isxdigit(int, __locale_t); // required by the headers
 //  int     __toupper(int, __locale_t);
 //  int     __tolower(int, __locale_t);
 //  int     __strcoll(const char*, const char*, __locale_t);
@@ -106,7 +102,6 @@
 //
 //  int     __snprintf(char*, size_t, __locale_t, const char*, ...); // required by the headers
 //  int     __asprintf(char**, __locale_t, const char*, ...);        // required by the headers
-//  int     __sscanf(const char*, __locale_t, const char*, ...);     // required by the headers
 // }
 
 #if _LIBCPP_HAS_LOCALIZATION
@@ -115,7 +110,6 @@
 #    include <__locale_dir/support/apple.h>
 #  elif defined(__FreeBSD__)
 #    include <__locale_dir/support/freebsd.h>
-/* zig patch: https://github.com/llvm/llvm-project/pull/143055 */
 #  elif defined(__NetBSD__)
 #    include <__locale_dir/support/netbsd.h>
 #  elif defined(_LIBCPP_MSVCRT_LIKE)
@@ -124,20 +118,20 @@
 #    include <__locale_dir/support/fuchsia.h>
 #  elif defined(__linux__)
 #    include <__locale_dir/support/linux.h>
+#  elif _LIBCPP_LIBC_NEWLIB
+#    include <__locale_dir/support/newlib.h>
+#  elif defined(_AIX)
+#    include <__locale_dir/support/aix.h>
 #  else
 
 // TODO: This is a temporary definition to bridge between the old way we defined the locale base API
 //       (by providing global non-reserved names) and the new API. As we move individual platforms
 //       towards the new way of defining the locale base API, this should disappear since each platform
 //       will define those directly.
-#    if defined(_AIX) || defined(__MVS__)
+#    if defined(__MVS__)
 #      include <__locale_dir/locale_base_api/ibm.h>
-#    elif defined(__ANDROID__)
-#      include <__locale_dir/locale_base_api/android.h>
 #    elif defined(__OpenBSD__)
 #      include <__locale_dir/locale_base_api/openbsd.h>
-#    elif defined(__wasi__) || _LIBCPP_HAS_MUSL_LIBC
-#      include <__locale_dir/locale_base_api/musl.h>
 #    endif
 
 #    include <__locale_dir/locale_base_api/bsd_locale_fallbacks.h>
@@ -197,21 +191,9 @@ inline _LIBCPP_HIDE_FROM_ABI long double __strtold(const char* __nptr, char** __
   return strtold_l(__nptr, __endptr, __loc);
 }
 
-inline _LIBCPP_HIDE_FROM_ABI long long __strtoll(const char* __nptr, char** __endptr, int __base, __locale_t __loc) {
-  return strtoll_l(__nptr, __endptr, __base, __loc);
-}
-
-inline _LIBCPP_HIDE_FROM_ABI unsigned long long
-__strtoull(const char* __nptr, char** __endptr, int __base, __locale_t __loc) {
-  return strtoull_l(__nptr, __endptr, __base, __loc);
-}
-
 //
 // Character manipulation functions
 //
-inline _LIBCPP_HIDE_FROM_ABI int __isdigit(int __ch, __locale_t __loc) { return isdigit_l(__ch, __loc); }
-inline _LIBCPP_HIDE_FROM_ABI int __isxdigit(int __ch, __locale_t __loc) { return isxdigit_l(__ch, __loc); }
-
 #    if defined(_LIBCPP_BUILDING_LIBRARY)
 inline _LIBCPP_HIDE_FROM_ABI int __strcoll(const char* __s1, const char* __s2, __locale_t __loc) {
   return strcoll_l(__s1, __s2, __loc);
@@ -307,11 +289,6 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__printf__, 3, 4) int __
     char** __s, __locale_t __loc, const char* __format, _Args&&... __args) {
   return std::__libcpp_asprintf_l(__s, __loc, __format, std::forward<_Args>(__args)...);
 }
-template <class... _Args>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__scanf__, 3, 4) int __sscanf(
-    const char* __s, __locale_t __loc, const char* __format, _Args&&... __args) {
-  return std::__libcpp_sscanf_l(__s, __loc, __format, std::forward<_Args>(__args)...);
-}
 _LIBCPP_DIAGNOSTIC_POP
 #    undef _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT
 
diff --git a/lib/libcxx/include/__locale_dir/locale_base_api/bsd_locale_fallbacks.h b/lib/libcxx/include/__locale_dir/locale_base_api/bsd_locale_fallbacks.h
index b62a1b737e..8cdbe0cd15 100644
--- a/lib/libcxx/include/__locale_dir/locale_base_api/bsd_locale_fallbacks.h
+++ b/lib/libcxx/include/__locale_dir/locale_base_api/bsd_locale_fallbacks.h
@@ -125,16 +125,6 @@ inline _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 3, 4) int __libcpp_asprintf_l(
   return __res;
 }
 
-inline _LIBCPP_ATTRIBUTE_FORMAT(__scanf__, 3, 4) int __libcpp_sscanf_l(
-    const char* __s, locale_t __l, const char* __format, ...) {
-  va_list __va;
-  va_start(__va, __format);
-  __locale_guard __current(__l);
-  int __res = vsscanf(__s, __format, __va);
-  va_end(__va);
-  return __res;
-}
-
 _LIBCPP_END_NAMESPACE_STD
 
 #endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_BSD_LOCALE_FALLBACKS_H
diff --git a/lib/libcxx/include/__locale_dir/messages.h b/lib/libcxx/include/__locale_dir/messages.h
index c04bf04025..686f472840 100644
--- a/lib/libcxx/include/__locale_dir/messages.h
+++ b/lib/libcxx/include/__locale_dir/messages.h
@@ -22,7 +22,7 @@
 
 #  if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__))
 // Most unix variants have catopen.  These are the specific ones that don't.
-#    if !defined(__BIONIC__) && !defined(_NEWLIB_VERSION) && !defined(__EMSCRIPTEN__)
+#    if !defined(__BIONIC__) && !_LIBCPP_LIBC_NEWLIB && !defined(__EMSCRIPTEN__)
 #      define _LIBCPP_HAS_CATOPEN 1
 #      include <nl_types.h>
 #    else
diff --git a/lib/libcxx/include/__locale_dir/money.h b/lib/libcxx/include/__locale_dir/money.h
index c129666550..12ba38467d 100644
--- a/lib/libcxx/include/__locale_dir/money.h
+++ b/lib/libcxx/include/__locale_dir/money.h
@@ -433,7 +433,7 @@ bool money_get<_CharT, _InputIterator>::__do_get(
           __err |= ios_base::failbit;
           return false;
         }
-        for (++__b; __fd > 0; --__fd, ++__b) {
+        for (++__b; __fd > 0; --__fd, (void)++__b) {
           if (__b == __e || !__ct.is(ctype_base::digit, *__b)) {
             __err |= ios_base::failbit;
             return false;
@@ -451,7 +451,7 @@ bool money_get<_CharT, _InputIterator>::__do_get(
     }
   }
   if (__trailing_sign) {
-    for (unsigned __i = 1; __i < __trailing_sign->size(); ++__i, ++__b) {
+    for (unsigned __i = 1; __i < __trailing_sign->size(); ++__i, (void)++__b) {
       if (__b == __e || *__b != (*__trailing_sign)[__i]) {
         __err |= ios_base::failbit;
         return false;
diff --git a/lib/libcxx/include/__locale_dir/num.h b/lib/libcxx/include/__locale_dir/num.h
index 7ca8ffe348..b7ea02e7cb 100644
--- a/lib/libcxx/include/__locale_dir/num.h
+++ b/lib/libcxx/include/__locale_dir/num.h
@@ -9,8 +9,10 @@
 #ifndef _LIBCPP___LOCALE_DIR_NUM_H
 #define _LIBCPP___LOCALE_DIR_NUM_H
 
+#include <__algorithm/copy.h>
 #include <__algorithm/find.h>
 #include <__algorithm/reverse.h>
+#include <__algorithm/simd_utils.h>
 #include <__charconv/to_chars_integral.h>
 #include <__charconv/traits.h>
 #include <__config>
@@ -22,6 +24,7 @@
 #include <__locale_dir/scan_keyword.h>
 #include <__memory/unique_ptr.h>
 #include <__system_error/errc.h>
+#include <__type_traits/is_signed.h>
 #include <cerrno>
 #include <ios>
 #include <streambuf>
@@ -46,9 +49,9 @@ struct _LIBCPP_EXPORTED_FROM_ABI __num_get_base {
   static int __get_base(ios_base&);
   static const char __src[33]; // "0123456789abcdefABCDEFxX+-pPiInN"
   // count of leading characters in __src used for parsing integers ("012..X+-")
-  static const size_t __int_chr_cnt = 26;
+  static inline const size_t __int_chr_cnt = 26;
   // count of leading characters in __src used for parsing floating-point values ("012..-pP")
-  static const size_t __fp_chr_cnt = 28;
+  static inline const size_t __fp_chr_cnt = 28;
 };
 
 template <class _CharT>
@@ -71,7 +74,8 @@ struct __num_get : protected __num_get_base {
 
   [[__deprecated__("This exists only for ABI compatibility")]] static string
   __stage2_int_prep(ios_base& __iob, _CharT* __atoms, _CharT& __thousands_sep);
-  static int __stage2_int_loop(
+
+  [[__deprecated__("This exists only for ABI compatibility")]] static int __stage2_int_loop(
       _CharT __ct,
       int __base,
       char* __a,
@@ -83,11 +87,24 @@ struct __num_get : protected __num_get_base {
       unsigned*& __g_end,
       _CharT* __atoms);
 
-  _LIBCPP_HIDE_FROM_ABI static string __stage2_int_prep(ios_base& __iob, _CharT& __thousands_sep) {
-    locale __loc                 = __iob.getloc();
-    const numpunct<_CharT>& __np = use_facet<numpunct<_CharT> >(__loc);
-    __thousands_sep              = __np.thousands_sep();
-    return __np.grouping();
+  _LIBCPP_HIDE_FROM_ABI static ptrdiff_t __atoms_offset(const _CharT* __atoms, _CharT __val) {
+    // TODO: Remove the manual vectorization once https://llvm.org/PR168551 is resolved
+#  if _LIBCPP_HAS_ALGORITHM_VECTOR_UTILS
+    if constexpr (is_same<_CharT, char>::value) {
+      // TODO(LLVM 24): This can be removed, since -Wpsabi doesn't warn on [[gnu::always_inline]] functions anymore.
+      _LIBCPP_DIAGNOSTIC_PUSH
+      _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wpsabi")
+      using __vec   = __simd_vector<char, 32>;
+      __vec __chars = std::__broadcast<__vec>(__val);
+      __vec __cmp   = std::__partial_load<__vec, __int_chr_cnt>(__atoms);
+      auto __res    = __chars == __cmp;
+      if (std::__none_of(__res))
+        return __int_chr_cnt;
+      return std::min(__int_chr_cnt, std::__find_first_set(__res));
+      _LIBCPP_DIAGNOSTIC_POP
+    }
+#  endif
+    return std::find(__atoms, __atoms + __int_chr_cnt, __val) - __atoms;
   }
 
   _LIBCPP_HIDE_FROM_ABI const _CharT* __do_widen(ios_base& __iob, _CharT* __atoms) const {
@@ -120,54 +137,6 @@ string __num_get<_CharT>::__stage2_float_prep(
   return __np.grouping();
 }
 
-template <class _CharT>
-int __num_get<_CharT>::__stage2_int_loop(
-    _CharT __ct,
-    int __base,
-    char* __a,
-    char*& __a_end,
-    unsigned& __dc,
-    _CharT __thousands_sep,
-    const string& __grouping,
-    unsigned* __g,
-    unsigned*& __g_end,
-    _CharT* __atoms) {
-  if (__a_end == __a && (__ct == __atoms[24] || __ct == __atoms[25])) {
-    *__a_end++ = __ct == __atoms[24] ? '+' : '-';
-    __dc       = 0;
-    return 0;
-  }
-  if (__grouping.size() != 0 && __ct == __thousands_sep) {
-    if (__g_end - __g < __num_get_buf_sz) {
-      *__g_end++ = __dc;
-      __dc       = 0;
-    }
-    return 0;
-  }
-  ptrdiff_t __f = std::find(__atoms, __atoms + __int_chr_cnt, __ct) - __atoms;
-  if (__f >= 24)
-    return -1;
-  switch (__base) {
-  case 8:
-  case 10:
-    if (__f >= __base)
-      return -1;
-    break;
-  case 16:
-    if (__f < 22)
-      break;
-    if (__a_end != __a && __a_end - __a <= 2 && __a_end[-1] == '0') {
-      __dc       = 0;
-      *__a_end++ = __src[__f];
-      return 0;
-    }
-    return -1;
-  }
-  *__a_end++ = __src[__f];
-  ++__dc;
-  return 0;
-}
-
 template <class _CharT>
 int __num_get<_CharT>::__stage2_float_loop(
     _CharT __ct,
@@ -272,65 +241,6 @@ _LIBCPP_HIDE_FROM_ABI _Tp __num_get_float(const char* __a, const char* __a_end,
   return 0;
 }
 
-template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI _Tp
-__num_get_signed_integral(const char* __a, const char* __a_end, ios_base::iostate& __err, int __base) {
-  if (__a != __a_end) {
-    __libcpp_remove_reference_t<decltype(errno)> __save_errno = errno;
-    errno                                                     = 0;
-    char* __p2;
-    long long __ll = __locale::__strtoll(__a, &__p2, __base, _LIBCPP_GET_C_LOCALE);
-    __libcpp_remove_reference_t<decltype(errno)> __current_errno = errno;
-    if (__current_errno == 0)
-      errno = __save_errno;
-    if (__p2 != __a_end) {
-      __err = ios_base::failbit;
-      return 0;
-    } else if (__current_errno == ERANGE || __ll < numeric_limits<_Tp>::min() || numeric_limits<_Tp>::max() < __ll) {
-      __err = ios_base::failbit;
-      if (__ll > 0)
-        return numeric_limits<_Tp>::max();
-      else
-        return numeric_limits<_Tp>::min();
-    }
-    return static_cast<_Tp>(__ll);
-  }
-  __err = ios_base::failbit;
-  return 0;
-}
-
-template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI _Tp
-__num_get_unsigned_integral(const char* __a, const char* __a_end, ios_base::iostate& __err, int __base) {
-  if (__a != __a_end) {
-    const bool __negate = *__a == '-';
-    if (__negate && ++__a == __a_end) {
-      __err = ios_base::failbit;
-      return 0;
-    }
-    __libcpp_remove_reference_t<decltype(errno)> __save_errno = errno;
-    errno                                                     = 0;
-    char* __p2;
-    unsigned long long __ll = __locale::__strtoull(__a, &__p2, __base, _LIBCPP_GET_C_LOCALE);
-    __libcpp_remove_reference_t<decltype(errno)> __current_errno = errno;
-    if (__current_errno == 0)
-      errno = __save_errno;
-    if (__p2 != __a_end) {
-      __err = ios_base::failbit;
-      return 0;
-    } else if (__current_errno == ERANGE || numeric_limits<_Tp>::max() < __ll) {
-      __err = ios_base::failbit;
-      return numeric_limits<_Tp>::max();
-    }
-    _Tp __res = static_cast<_Tp>(__ll);
-    if (__negate)
-      __res = -__res;
-    return __res;
-  }
-  __err = ios_base::failbit;
-  return 0;
-}
-
 template <class _CharT, class _InputIterator = istreambuf_iterator<_CharT> >
 class num_get : public locale::facet, private __num_get<_CharT> {
 public:
@@ -468,137 +378,196 @@ protected:
     return __b;
   }
 
-  template <class _Signed>
-  _LIBCPP_HIDE_FROM_ABI iter_type
-  __do_get_signed(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, _Signed& __v) const {
-    // Stage 1
-    int __base = this->__get_base(__iob);
-    // Stage 2
-    char_type __thousands_sep;
-    const int __atoms_size = __num_get_base::__int_chr_cnt;
-    char_type __atoms1[__atoms_size];
-    const char_type* __atoms = this->__do_widen(__iob, __atoms1);
-    string __grouping        = this->__stage2_int_prep(__iob, __thousands_sep);
-    string __buf;
-    __buf.resize(__buf.capacity());
-    char* __a     = &__buf[0];
-    char* __a_end = __a;
-    unsigned __g[__num_get_base::__num_get_buf_sz];
-    unsigned* __g_end = __g;
-    unsigned __dc     = 0;
-    for (; __b != __e; ++__b) {
-      if (__a_end == __a + __buf.size()) {
-        size_t __tmp = __buf.size();
-        __buf.resize(2 * __buf.size());
-        __buf.resize(__buf.capacity());
-        __a     = &__buf[0];
-        __a_end = __a + __tmp;
-      }
-      if (this->__stage2_int_loop(
-              *__b,
-              __base,
-              __a,
-              __a_end,
-              __dc,
-              __thousands_sep,
-              __grouping,
-              __g,
-              __g_end,
-              const_cast<char_type*>(__atoms)))
-        break;
-    }
-    if (__grouping.size() != 0 && __g_end - __g < __num_get_base::__num_get_buf_sz)
-      *__g_end++ = __dc;
-    // Stage 3
-    __v = std::__num_get_signed_integral<_Signed>(__a, __a_end, __err, __base);
-    // Digit grouping checked
-    __check_grouping(__grouping, __g, __g_end, __err);
-    // EOF checked
-    if (__b == __e)
-      __err |= ios_base::eofbit;
-    return __b;
-  }
+  template <class _MaybeSigned>
+  iter_type __do_get_integral(
+      iter_type __first, iter_type __last, ios_base& __iob, ios_base::iostate& __err, _MaybeSigned& __v) const {
+    using _Unsigned = __make_unsigned_t<_MaybeSigned>;
 
-  template <class _Unsigned>
-  _LIBCPP_HIDE_FROM_ABI iter_type
-  __do_get_unsigned(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, _Unsigned& __v) const {
     // Stage 1
     int __base = this->__get_base(__iob);
-    // Stage 2
-    char_type __thousands_sep;
-    const int __atoms_size = __num_get_base::__int_chr_cnt;
-    char_type __atoms1[__atoms_size];
-    const char_type* __atoms = this->__do_widen(__iob, __atoms1);
-    string __grouping        = this->__stage2_int_prep(__iob, __thousands_sep);
-    string __buf;
-    __buf.resize(__buf.capacity());
-    char* __a     = &__buf[0];
-    char* __a_end = __a;
+
+    // Stages 2 & 3
+    // These are combined into a single step where we parse the characters and calculate the value in one go instead of
+    // storing the relevant characters first (in an allocated buffer) and parse the characters after we extracted them.
+    // This makes the whole process significantly faster, since we avoid potential allocations and copies.
+
+    const auto& __numpunct    = use_facet<numpunct<_CharT> >(__iob.getloc());
+    char_type __thousands_sep = __numpunct.thousands_sep();
+    string __grouping         = __numpunct.grouping();
+
+    char_type __atoms_buffer[__num_get_base::__int_chr_cnt];
+    const char_type* __atoms = this->__do_widen(__iob, __atoms_buffer);
     unsigned __g[__num_get_base::__num_get_buf_sz];
     unsigned* __g_end = __g;
     unsigned __dc     = 0;
-    for (; __b != __e; ++__b) {
-      if (__a_end == __a + __buf.size()) {
-        size_t __tmp = __buf.size();
-        __buf.resize(2 * __buf.size());
-        __buf.resize(__buf.capacity());
-        __a     = &__buf[0];
-        __a_end = __a + __tmp;
-      }
-      if (this->__stage2_int_loop(
-              *__b,
-              __base,
-              __a,
-              __a_end,
-              __dc,
-              __thousands_sep,
-              __grouping,
-              __g,
-              __g_end,
-              const_cast<char_type*>(__atoms)))
-        break;
+
+    if (__first == __last) {
+      __err |= ios_base::eofbit | ios_base::failbit;
+      __v = 0;
+      return __first;
     }
+
+    while (!__grouping.empty() && *__first == __thousands_sep) {
+      ++__first;
+      if (__g_end - __g < this->__num_get_buf_sz)
+        *__g_end++ = 0;
+    }
+
+    bool __negate = false;
+    // __c == '+' || __c == '-'
+    if (auto __c = *__first; __c == __atoms[24] || __c == __atoms[25]) {
+      __negate = __c == __atoms[25];
+      ++__first;
+    }
+
+    if (__first == __last) {
+      __err |= ios_base::eofbit | ios_base::failbit;
+      __v = 0;
+      return __first;
+    }
+
+    bool __parsed_num = false;
+
+    // If we don't have a pre-set base, figure it out and swallow any prefix
+    if (__base == 0) {
+      auto __c = *__first;
+      // __c == '0'
+      if (__c == __atoms[0]) {
+        ++__first;
+        if (__first == __last) {
+          __err |= ios_base::eofbit;
+          __v = 0;
+          return __first;
+        }
+        // __c2 == 'x' || __c2 == 'X'
+        if (auto __c2 = *__first; __c2 == __atoms[22] || __c2 == __atoms[23]) {
+          __base = 16;
+          ++__first;
+        } else {
+          __base = 8;
+          __parsed_num = true; // We only swallowed '0', so we've started to parse a number
+        }
+      } else {
+        __base = 10;
+      }
+
+      // If the base has been specified explicitly, try to swallow the appropriate prefix. We only need to do something
+      // special for hex, since decimal has no prefix and octal's prefix is '0', which doesn't change the value that
+      // we'll parse if we don't swallow it.
+    } else if (__base == 16) {
+      // Try to swallow '0x'
+
+      // *__first == '0'
+      if (*__first == __atoms[0]) {
+        ++__first;
+        if (__first == __last) {
+          __err |= ios_base::eofbit;
+          __v = 0;
+          return __first;
+        }
+        // __c == 'x' || __c == 'X'
+        if (auto __c = *__first; __c == __atoms[22] || __c == __atoms[23])
+          ++__first;
+        else
+          __parsed_num = true; // We only swallowed '0', so we've started to parse a number
+      }
+    }
+
+    // Calculate the actual number
+    _Unsigned __val   = 0;
+    bool __overflowed = false;
+    for (; __first != __last; ++__first) {
+      auto __c = *__first;
+      if (!__grouping.empty() && __c == __thousands_sep) {
+        if (__g_end - __g < this->__num_get_buf_sz) {
+          *__g_end++ = __dc;
+          __dc       = 0;
+        }
+        continue;
+      }
+      auto __offset = this->__atoms_offset(__atoms, __c);
+      if (__offset >= 22) // Not a valid integer character
+        break;
+
+      if (__base == 16 && __offset >= 16)
+        __offset -= 6;
+      if (__offset >= __base)
+        break;
+      // __val = (__val * __base) + __offset
+      __overflowed |= __builtin_mul_overflow(__val, __base, std::addressof(__val)) ||
+                      __builtin_add_overflow(__val, __offset, std::addressof(__val));
+      __parsed_num = true;
+      ++__dc;
+    }
+
+    if (!__parsed_num) {
+      __err |= ios_base::failbit;
+      __v = 0;
+    } else if (__overflowed) {
+      __err |= ios_base::failbit;
+      __v = is_signed<_MaybeSigned>::value && __negate
+              ? numeric_limits<_MaybeSigned>::min()
+              : numeric_limits<_MaybeSigned>::max();
+    } else if (!__negate) {
+      if (__val > static_cast<_Unsigned>(numeric_limits<_MaybeSigned>::max())) {
+        __err |= ios_base::failbit;
+        __v = numeric_limits<_MaybeSigned>::max();
+      } else {
+        __v = __val;
+      }
+    } else if (is_signed<_MaybeSigned>::value) {
+      if (__val > static_cast<_Unsigned>(numeric_limits<_MaybeSigned>::max()) + 1) {
+        __err |= ios_base::failbit;
+        __v = numeric_limits<_MaybeSigned>::min();
+      } else if (__val == static_cast<_Unsigned>(numeric_limits<_MaybeSigned>::max()) + 1) {
+        __v = numeric_limits<_MaybeSigned>::min();
+      } else {
+        __v = -__val;
+      }
+    } else {
+      __v = -__val;
+    }
+
     if (__grouping.size() != 0 && __g_end - __g < __num_get_base::__num_get_buf_sz)
       *__g_end++ = __dc;
-    // Stage 3
-    __v = std::__num_get_unsigned_integral<_Unsigned>(__a, __a_end, __err, __base);
+
     // Digit grouping checked
     __check_grouping(__grouping, __g, __g_end, __err);
     // EOF checked
-    if (__b == __e)
+    if (__first == __last)
       __err |= ios_base::eofbit;
-    return __b;
+    return __first;
   }
 
   virtual iter_type do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, bool& __v) const;
 
   virtual iter_type do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, long& __v) const {
-    return this->__do_get_signed(__b, __e, __iob, __err, __v);
+    return this->__do_get_integral(__b, __e, __iob, __err, __v);
   }
 
   virtual iter_type
   do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, long long& __v) const {
-    return this->__do_get_signed(__b, __e, __iob, __err, __v);
+    return this->__do_get_integral(__b, __e, __iob, __err, __v);
   }
 
   virtual iter_type
   do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, unsigned short& __v) const {
-    return this->__do_get_unsigned(__b, __e, __iob, __err, __v);
+    return this->__do_get_integral(__b, __e, __iob, __err, __v);
   }
 
   virtual iter_type
   do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, unsigned int& __v) const {
-    return this->__do_get_unsigned(__b, __e, __iob, __err, __v);
+    return this->__do_get_integral(__b, __e, __iob, __err, __v);
   }
 
   virtual iter_type
   do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, unsigned long& __v) const {
-    return this->__do_get_unsigned(__b, __e, __iob, __err, __v);
+    return this->__do_get_integral(__b, __e, __iob, __err, __v);
   }
 
   virtual iter_type
   do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, unsigned long long& __v) const {
-    return this->__do_get_unsigned(__b, __e, __iob, __err, __v);
+    return this->__do_get_integral(__b, __e, __iob, __err, __v);
   }
 
   virtual iter_type do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, float& __v) const {
@@ -652,40 +621,13 @@ _InputIterator num_get<_CharT, _InputIterator>::do_get(
 template <class _CharT, class _InputIterator>
 _InputIterator num_get<_CharT, _InputIterator>::do_get(
     iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, void*& __v) const {
-  // Stage 1
-  int __base = 16;
-  // Stage 2
-  char_type __atoms[__num_get_base::__int_chr_cnt];
-  char_type __thousands_sep = char_type();
-  string __grouping;
-  std::use_facet<ctype<_CharT> >(__iob.getloc())
-      .widen(__num_get_base::__src, __num_get_base::__src + __num_get_base::__int_chr_cnt, __atoms);
-  string __buf;
-  __buf.resize(__buf.capacity());
-  char* __a     = &__buf[0];
-  char* __a_end = __a;
-  unsigned __g[__num_get_base::__num_get_buf_sz];
-  unsigned* __g_end = __g;
-  unsigned __dc     = 0;
-  for (; __b != __e; ++__b) {
-    if (__a_end == __a + __buf.size()) {
-      size_t __tmp = __buf.size();
-      __buf.resize(2 * __buf.size());
-      __buf.resize(__buf.capacity());
-      __a     = &__buf[0];
-      __a_end = __a + __tmp;
-    }
-    if (this->__stage2_int_loop(*__b, __base, __a, __a_end, __dc, __thousands_sep, __grouping, __g, __g_end, __atoms))
-      break;
-  }
-  // Stage 3
-  __buf.resize(__a_end - __a);
-  if (__locale::__sscanf(__buf.c_str(), _LIBCPP_GET_C_LOCALE, "%p", &__v) != 1)
-    __err = ios_base::failbit;
-  // EOF checked
-  if (__b == __e)
-    __err |= ios_base::eofbit;
-  return __b;
+  auto __flags = __iob.flags();
+  __iob.flags((__flags & ~ios_base::basefield & ~ios_base::uppercase) | ios_base::hex);
+  uintptr_t __ptr;
+  auto __res = __do_get_integral(__b, __e, __iob, __err, __ptr);
+  __iob.flags(__flags);
+  __v = reinterpret_cast<void*>(__ptr);
+  return __res;
 }
 
 extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS num_get<char>;
@@ -748,6 +690,13 @@ void __num_put<_CharT>::__widen_and_group_int(
     __op = __ob + (__np - __nb);
 }
 
+_LIBCPP_HIDE_FROM_ABI inline bool __isdigit(char __c) { return __c >= '0' && __c <= '9'; }
+
+_LIBCPP_HIDE_FROM_ABI inline bool __isxdigit(char __c) {
+  auto __lower = __c | 0x20;
+  return std::__isdigit(__c) || (__lower >= 'a' && __lower <= 'f');
+}
+
 template <class _CharT>
 void __num_put<_CharT>::__widen_and_group_float(
     char* __nb, char* __np, char* __ne, _CharT* __ob, _CharT*& __op, _CharT*& __oe, const locale& __loc) {
@@ -763,11 +712,11 @@ void __num_put<_CharT>::__widen_and_group_float(
     *__oe++ = __ct.widen(*__nf++);
     *__oe++ = __ct.widen(*__nf++);
     for (__ns = __nf; __ns < __ne; ++__ns)
-      if (!__locale::__isxdigit(*__ns, _LIBCPP_GET_C_LOCALE))
+      if (!std::__isxdigit(*__ns))
         break;
   } else {
     for (__ns = __nf; __ns < __ne; ++__ns)
-      if (!__locale::__isdigit(*__ns, _LIBCPP_GET_C_LOCALE))
+      if (!std::__isdigit(*__ns))
         break;
   }
   if (__grouping.empty()) {
@@ -885,9 +834,7 @@ num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob, char_ty
   const numpunct<char_type>& __np = std::use_facet<numpunct<char_type> >(__iob.getloc());
   typedef typename numpunct<char_type>::string_type string_type;
   string_type __nm = __v ? __np.truename() : __np.falsename();
-  for (typename string_type::iterator __i = __nm.begin(); __i != __nm.end(); ++__i, ++__s)
-    *__s = *__i;
-  return __s;
+  return std::copy(__nm.begin(), __nm.end(), __s);
 }
 
 template <class _CharT, class _OutputIterator>
diff --git a/lib/libcxx/include/__locale_dir/pad_and_output.h b/lib/libcxx/include/__locale_dir/pad_and_output.h
index a1cb37d078..bdd4d2856d 100644
--- a/lib/libcxx/include/__locale_dir/pad_and_output.h
+++ b/lib/libcxx/include/__locale_dir/pad_and_output.h
@@ -13,6 +13,8 @@
 
 #if _LIBCPP_HAS_LOCALIZATION
 
+#  include <__algorithm/copy.h>
+#  include <__algorithm/fill_n.h>
 #  include <ios>
 
 #  if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -30,12 +32,9 @@ _LIBCPP_HIDE_FROM_ABI _OutputIterator __pad_and_output(
     __ns -= __sz;
   else
     __ns = 0;
-  for (; __ob < __op; ++__ob, ++__s)
-    *__s = *__ob;
-  for (; __ns; --__ns, ++__s)
-    *__s = __fl;
-  for (; __ob < __oe; ++__ob, ++__s)
-    *__s = *__ob;
+  __s = std::copy(__ob, __op, __s);
+  __s = std::fill_n(__s, __ns, __fl);
+  __s = std::copy(__op, __oe, __s);
   __iob.width(0);
   return __s;
 }
diff --git a/lib/libcxx/include/__locale_dir/support/bsd_like.h b/lib/libcxx/include/__locale_dir/support/bsd_like.h
index 2b03e18920..6f533b4e1e 100644
--- a/lib/libcxx/include/__locale_dir/support/bsd_like.h
+++ b/lib/libcxx/include/__locale_dir/support/bsd_like.h
@@ -24,7 +24,6 @@
 #  include <wctype.h>
 #endif
 
-/* zig patch: https://github.com/llvm/llvm-project/pull/143055 */
 #if __has_include(<xlocale.h>)
 #  include <xlocale.h>
 #endif
@@ -80,22 +79,9 @@ inline _LIBCPP_HIDE_FROM_ABI long double __strtold(const char* __nptr, char** __
   return ::strtold_l(__nptr, __endptr, __loc);
 }
 
-inline _LIBCPP_HIDE_FROM_ABI long long __strtoll(const char* __nptr, char** __endptr, int __base, __locale_t __loc) {
-  return ::strtoll_l(__nptr, __endptr, __base, __loc);
-}
-
-inline _LIBCPP_HIDE_FROM_ABI unsigned long long
-__strtoull(const char* __nptr, char** __endptr, int __base, __locale_t __loc) {
-  return ::strtoull_l(__nptr, __endptr, __base, __loc);
-}
-
 //
 // Character manipulation functions
 //
-inline _LIBCPP_HIDE_FROM_ABI int __isdigit(int __c, __locale_t __loc) { return ::isdigit_l(__c, __loc); }
-
-inline _LIBCPP_HIDE_FROM_ABI int __isxdigit(int __c, __locale_t __loc) { return ::isxdigit_l(__c, __loc); }
-
 #if defined(_LIBCPP_BUILDING_LIBRARY)
 inline _LIBCPP_HIDE_FROM_ABI int __toupper(int __c, __locale_t __loc) { return ::toupper_l(__c, __loc); }
 
@@ -216,12 +202,6 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__printf__, 3, 4) int __
     char** __s, __locale_t __loc, const char* __format, _Args&&... __args) {
   return ::asprintf_l(__s, __loc, __format, std::forward<_Args>(__args)...); // non-standard
 }
-
-template <class... _Args>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__scanf__, 3, 4) int __sscanf(
-    const char* __s, __locale_t __loc, const char* __format, _Args&&... __args) {
-  return ::sscanf_l(__s, __loc, __format, std::forward<_Args>(__args)...);
-}
 _LIBCPP_DIAGNOSTIC_POP
 #undef _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT
 
diff --git a/lib/libcxx/include/__locale_dir/support/fuchsia.h b/lib/libcxx/include/__locale_dir/support/fuchsia.h
index 4b9e63facb..528bfeb0cb 100644
--- a/lib/libcxx/include/__locale_dir/support/fuchsia.h
+++ b/lib/libcxx/include/__locale_dir/support/fuchsia.h
@@ -141,13 +141,6 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__printf__, 3, 4) int __
   __locale_guard __current(__loc);
   return ::asprintf(__s, __format, std::forward<_Args>(__args)...); // non-standard
 }
-template <class... _Args>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__scanf__, 3, 4) int __sscanf(
-    const char* __s, __locale_t __loc, const char* __format, _Args&&... __args) {
-  __locale_guard __current(__loc);
-  return std::sscanf(__s, __format, std::forward<_Args>(__args)...);
-}
-
 _LIBCPP_DIAGNOSTIC_POP
 #undef _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT
 
diff --git a/lib/libcxx/include/__locale_dir/support/linux.h b/lib/libcxx/include/__locale_dir/support/linux.h
index 23bcf44c31..1a589be49b 100644
--- a/lib/libcxx/include/__locale_dir/support/linux.h
+++ b/lib/libcxx/include/__locale_dir/support/linux.h
@@ -94,32 +94,9 @@ inline _LIBCPP_HIDE_FROM_ABI long double __strtold(const char* __nptr, char** __
   return ::strtold_l(__nptr, __endptr, __loc);
 }
 
-inline _LIBCPP_HIDE_FROM_ABI long long __strtoll(const char* __nptr, char** __endptr, int __base, __locale_t __loc) {
-#if !_LIBCPP_HAS_MUSL_LIBC
-  return ::strtoll_l(__nptr, __endptr, __base, __loc);
-#else
-  (void)__loc;
-  return ::strtoll(__nptr, __endptr, __base);
-#endif
-}
-
-inline _LIBCPP_HIDE_FROM_ABI unsigned long long
-__strtoull(const char* __nptr, char** __endptr, int __base, __locale_t __loc) {
-#if !_LIBCPP_HAS_MUSL_LIBC
-  return ::strtoull_l(__nptr, __endptr, __base, __loc);
-#else
-  (void)__loc;
-  return ::strtoull(__nptr, __endptr, __base);
-#endif
-}
-
 //
 // Character manipulation functions
 //
-inline _LIBCPP_HIDE_FROM_ABI int __isdigit(int __c, __locale_t __loc) { return isdigit_l(__c, __loc); }
-
-inline _LIBCPP_HIDE_FROM_ABI int __isxdigit(int __c, __locale_t __loc) { return isxdigit_l(__c, __loc); }
-
 #if defined(_LIBCPP_BUILDING_LIBRARY)
 inline _LIBCPP_HIDE_FROM_ABI int __toupper(int __c, __locale_t __loc) { return toupper_l(__c, __loc); }
 
@@ -261,20 +238,6 @@ inline _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 3, 4) int __asprintf(
   va_end(__va);
   return __res;
 }
-
-#ifndef _LIBCPP_COMPILER_GCC // GCC complains that this can't be always_inline due to C-style varargs
-_LIBCPP_HIDE_FROM_ABI
-#endif
-inline _LIBCPP_ATTRIBUTE_FORMAT(__scanf__, 3, 4) int __sscanf(
-    const char* __s, __locale_t __loc, const char* __format, ...) {
-  va_list __va;
-  va_start(__va, __format);
-  __locale_guard __current(__loc);
-  int __res = std::vsscanf(__s, __format, __va);
-  va_end(__va);
-  return __res;
-}
-
 } // namespace __locale
 _LIBCPP_END_NAMESPACE_STD
 
diff --git a/lib/libcxx/include/__locale_dir/support/netbsd.h b/lib/libcxx/include/__locale_dir/support/netbsd.h
index b1e67ade55..190857f6f8 100644
--- a/lib/libcxx/include/__locale_dir/support/netbsd.h
+++ b/lib/libcxx/include/__locale_dir/support/netbsd.h
@@ -6,8 +6,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-/* zig patch: https://github.com/llvm/llvm-project/pull/143055 */
-
 #ifndef _LIBCPP___LOCALE_DIR_SUPPORT_NETBSD_H
 #define _LIBCPP___LOCALE_DIR_SUPPORT_NETBSD_H
 
diff --git a/lib/libcxx/include/__locale_dir/support/newlib.h b/lib/libcxx/include/__locale_dir/support/newlib.h
new file mode 100644
index 0000000000..05c8a449cf
--- /dev/null
+++ b/lib/libcxx/include/__locale_dir/support/newlib.h
@@ -0,0 +1,243 @@
+//===-----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___LOCALE_DIR_SUPPORT_NEWLIB_H
+#define _LIBCPP___LOCALE_DIR_SUPPORT_NEWLIB_H
+
+#include <__config>
+#include <__cstddef/size_t.h>
+#include <__std_mbstate_t.h>
+#include <clocale> // std::lconv
+#include <cstdio>
+#include <cstdlib>
+#include <ctype.h>
+#include <stdarg.h>
+#include <string.h>
+#include <time.h>
+#if _LIBCPP_HAS_WIDE_CHARACTERS
+#  include <cwchar>
+#  include <wctype.h>
+#endif
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+namespace __locale {
+
+struct __locale_guard {
+  _LIBCPP_HIDE_FROM_ABI __locale_guard(locale_t& __loc) : __old_loc_(::uselocale(__loc)) {}
+
+  _LIBCPP_HIDE_FROM_ABI ~__locale_guard() {
+    if (__old_loc_)
+      ::uselocale(__old_loc_);
+  }
+
+  locale_t __old_loc_;
+
+  __locale_guard(__locale_guard const&)            = delete;
+  __locale_guard& operator=(__locale_guard const&) = delete;
+};
+
+//
+// Locale management
+//
+#define _LIBCPP_COLLATE_MASK LC_COLLATE_MASK
+#define _LIBCPP_CTYPE_MASK LC_CTYPE_MASK
+#define _LIBCPP_MONETARY_MASK LC_MONETARY_MASK
+#define _LIBCPP_NUMERIC_MASK LC_NUMERIC_MASK
+#define _LIBCPP_TIME_MASK LC_TIME_MASK
+#define _LIBCPP_MESSAGES_MASK LC_MESSAGES_MASK
+#define _LIBCPP_ALL_MASK LC_ALL_MASK
+#define _LIBCPP_LC_ALL LC_ALL
+
+using __locale_t _LIBCPP_NODEBUG = ::locale_t;
+
+#if defined(_LIBCPP_BUILDING_LIBRARY)
+using __lconv_t _LIBCPP_NODEBUG = std::lconv;
+
+inline _LIBCPP_HIDE_FROM_ABI __locale_t __newlocale(int __category_mask, const char* __locale, __locale_t __base) {
+  return ::newlocale(__category_mask, __locale, __base);
+}
+
+inline _LIBCPP_HIDE_FROM_ABI void __freelocale(__locale_t __loc) { ::freelocale(__loc); }
+
+inline _LIBCPP_HIDE_FROM_ABI char* __setlocale(int __category, char const* __locale) {
+  return ::setlocale(__category, __locale);
+}
+
+inline _LIBCPP_HIDE_FROM_ABI __lconv_t* __localeconv(__locale_t& __loc) {
+  __locale_guard __current(__loc);
+  return std::localeconv();
+}
+#endif // _LIBCPP_BUILDING_LIBRARY
+
+//
+// Strtonum functions
+//
+inline _LIBCPP_HIDE_FROM_ABI float __strtof(const char* __nptr, char** __endptr, __locale_t __loc) {
+  return ::strtof_l(__nptr, __endptr, __loc);
+}
+
+inline _LIBCPP_HIDE_FROM_ABI double __strtod(const char* __nptr, char** __endptr, __locale_t __loc) {
+  return ::strtod_l(__nptr, __endptr, __loc);
+}
+
+inline _LIBCPP_HIDE_FROM_ABI long double __strtold(const char* __nptr, char** __endptr, __locale_t __loc) {
+  return ::strtold_l(__nptr, __endptr, __loc);
+}
+
+//
+// Character manipulation functions
+//
+#if defined(_LIBCPP_BUILDING_LIBRARY)
+inline _LIBCPP_HIDE_FROM_ABI int __toupper(int __c, __locale_t __loc) { return toupper_l(__c, __loc); }
+
+inline _LIBCPP_HIDE_FROM_ABI int __tolower(int __c, __locale_t __loc) { return tolower_l(__c, __loc); }
+
+inline _LIBCPP_HIDE_FROM_ABI int __strcoll(const char* __s1, const char* __s2, __locale_t __loc) {
+  return strcoll_l(__s1, __s2, __loc);
+}
+
+inline _LIBCPP_HIDE_FROM_ABI size_t __strxfrm(char* __dest, const char* __src, size_t __n, __locale_t __loc) {
+  return strxfrm_l(__dest, __src, __n, __loc);
+}
+
+#  if _LIBCPP_HAS_WIDE_CHARACTERS
+inline _LIBCPP_HIDE_FROM_ABI int __iswctype(wint_t __c, wctype_t __type, __locale_t __loc) {
+  return iswctype_l(__c, __type, __loc);
+}
+
+inline _LIBCPP_HIDE_FROM_ABI int __iswspace(wint_t __c, __locale_t __loc) { return iswspace_l(__c, __loc); }
+
+inline _LIBCPP_HIDE_FROM_ABI int __iswprint(wint_t __c, __locale_t __loc) { return iswprint_l(__c, __loc); }
+
+inline _LIBCPP_HIDE_FROM_ABI int __iswcntrl(wint_t __c, __locale_t __loc) { return iswcntrl_l(__c, __loc); }
+
+inline _LIBCPP_HIDE_FROM_ABI int __iswupper(wint_t __c, __locale_t __loc) { return iswupper_l(__c, __loc); }
+
+inline _LIBCPP_HIDE_FROM_ABI int __iswlower(wint_t __c, __locale_t __loc) { return iswlower_l(__c, __loc); }
+
+inline _LIBCPP_HIDE_FROM_ABI int __iswalpha(wint_t __c, __locale_t __loc) { return iswalpha_l(__c, __loc); }
+
+inline _LIBCPP_HIDE_FROM_ABI int __iswblank(wint_t __c, __locale_t __loc) { return iswblank_l(__c, __loc); }
+
+inline _LIBCPP_HIDE_FROM_ABI int __iswdigit(wint_t __c, __locale_t __loc) { return iswdigit_l(__c, __loc); }
+
+inline _LIBCPP_HIDE_FROM_ABI int __iswpunct(wint_t __c, __locale_t __loc) { return iswpunct_l(__c, __loc); }
+
+inline _LIBCPP_HIDE_FROM_ABI int __iswxdigit(wint_t __c, __locale_t __loc) { return iswxdigit_l(__c, __loc); }
+
+inline _LIBCPP_HIDE_FROM_ABI wint_t __towupper(wint_t __c, __locale_t __loc) { return towupper_l(__c, __loc); }
+
+inline _LIBCPP_HIDE_FROM_ABI wint_t __towlower(wint_t __c, __locale_t __loc) { return towlower_l(__c, __loc); }
+
+inline _LIBCPP_HIDE_FROM_ABI int __wcscoll(const wchar_t* __ws1, const wchar_t* __ws2, __locale_t __loc) {
+  return wcscoll_l(__ws1, __ws2, __loc);
+}
+
+inline _LIBCPP_HIDE_FROM_ABI size_t __wcsxfrm(wchar_t* __dest, const wchar_t* __src, size_t __n, __locale_t __loc) {
+  return wcsxfrm_l(__dest, __src, __n, __loc);
+}
+#  endif // _LIBCPP_HAS_WIDE_CHARACTERS
+
+inline _LIBCPP_HIDE_FROM_ABI
+size_t __strftime(char* __s, size_t __max, const char* __format, const struct tm* __tm, __locale_t __loc) {
+  return strftime_l(__s, __max, __format, __tm, __loc);
+}
+
+//
+// Other functions
+//
+inline _LIBCPP_HIDE_FROM_ABI decltype(MB_CUR_MAX) __mb_len_max(__locale_t __loc) {
+  __locale_guard __current(__loc);
+  return MB_CUR_MAX;
+}
+
+#  if _LIBCPP_HAS_WIDE_CHARACTERS
+inline _LIBCPP_HIDE_FROM_ABI wint_t __btowc(int __c, __locale_t __loc) {
+  __locale_guard __current(__loc);
+  return std::btowc(__c);
+}
+
+inline _LIBCPP_HIDE_FROM_ABI int __wctob(wint_t __c, __locale_t __loc) {
+  __locale_guard __current(__loc);
+  return std::wctob(__c);
+}
+
+inline _LIBCPP_HIDE_FROM_ABI size_t
+__wcsnrtombs(char* __dest, const wchar_t** __src, size_t __nwc, size_t __len, mbstate_t* __ps, __locale_t __loc) {
+  __locale_guard __current(__loc);
+  return ::wcsnrtombs(__dest, __src, __nwc, __len, __ps); // non-standard
+}
+
+inline _LIBCPP_HIDE_FROM_ABI size_t __wcrtomb(char* __s, wchar_t __wc, mbstate_t* __ps, __locale_t __loc) {
+  __locale_guard __current(__loc);
+  return std::wcrtomb(__s, __wc, __ps);
+}
+
+inline _LIBCPP_HIDE_FROM_ABI size_t
+__mbsnrtowcs(wchar_t* __dest, const char** __src, size_t __nms, size_t __len, mbstate_t* __ps, __locale_t __loc) {
+  __locale_guard __current(__loc);
+  return ::mbsnrtowcs(__dest, __src, __nms, __len, __ps); // non-standard
+}
+
+inline _LIBCPP_HIDE_FROM_ABI size_t
+__mbrtowc(wchar_t* __pwc, const char* __s, size_t __n, mbstate_t* __ps, __locale_t __loc) {
+  __locale_guard __current(__loc);
+  return std::mbrtowc(__pwc, __s, __n, __ps);
+}
+
+inline _LIBCPP_HIDE_FROM_ABI int __mbtowc(wchar_t* __pwc, const char* __pmb, size_t __max, __locale_t __loc) {
+  __locale_guard __current(__loc);
+  return std::mbtowc(__pwc, __pmb, __max);
+}
+
+inline _LIBCPP_HIDE_FROM_ABI size_t __mbrlen(const char* __s, size_t __n, mbstate_t* __ps, __locale_t __loc) {
+  __locale_guard __current(__loc);
+  return std::mbrlen(__s, __n, __ps);
+}
+
+inline _LIBCPP_HIDE_FROM_ABI size_t
+__mbsrtowcs(wchar_t* __dest, const char** __src, size_t __len, mbstate_t* __ps, __locale_t __loc) {
+  __locale_guard __current(__loc);
+  return std::mbsrtowcs(__dest, __src, __len, __ps);
+}
+#  endif // _LIBCPP_HAS_WIDE_CHARACTERS
+#endif   // _LIBCPP_BUILDING_LIBRARY
+
+#ifndef _LIBCPP_COMPILER_GCC // GCC complains that this can't be always_inline due to C-style varargs
+_LIBCPP_HIDE_FROM_ABI
+#endif
+inline _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 4, 5) int __snprintf(
+    char* __s, size_t __n, __locale_t __loc, const char* __format, ...) {
+  va_list __va;
+  va_start(__va, __format);
+  __locale_guard __current(__loc);
+  int __res = std::vsnprintf(__s, __n, __format, __va);
+  va_end(__va);
+  return __res;
+}
+
+#ifndef _LIBCPP_COMPILER_GCC // GCC complains that this can't be always_inline due to C-style varargs
+_LIBCPP_HIDE_FROM_ABI
+#endif
+inline _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 3, 4) int __asprintf(
+    char** __s, __locale_t __loc, const char* __format, ...) {
+  va_list __va;
+  va_start(__va, __format);
+  __locale_guard __current(__loc);
+  int __res = ::vasprintf(__s, __format, __va); // non-standard
+  va_end(__va);
+  return __res;
+}
+} // namespace __locale
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___LOCALE_DIR_SUPPORT_NEWLIB_H
diff --git a/lib/libcxx/include/__locale_dir/support/no_locale/characters.h b/lib/libcxx/include/__locale_dir/support/no_locale/characters.h
index 1281b8bd13..73eba3ec54 100644
--- a/lib/libcxx/include/__locale_dir/support/no_locale/characters.h
+++ b/lib/libcxx/include/__locale_dir/support/no_locale/characters.h
@@ -29,10 +29,6 @@ namespace __locale {
 //
 // Character manipulation functions
 //
-inline _LIBCPP_HIDE_FROM_ABI int __isdigit(int __c, __locale_t) { return std::isdigit(__c); }
-
-inline _LIBCPP_HIDE_FROM_ABI int __isxdigit(int __c, __locale_t) { return std::isxdigit(__c); }
-
 #if defined(_LIBCPP_BUILDING_LIBRARY)
 inline _LIBCPP_HIDE_FROM_ABI int __toupper(int __c, __locale_t) { return std::toupper(__c); }
 
diff --git a/lib/libcxx/include/__locale_dir/support/no_locale/strtonum.h b/lib/libcxx/include/__locale_dir/support/no_locale/strtonum.h
index 0e7a32993e..59544e10e4 100644
--- a/lib/libcxx/include/__locale_dir/support/no_locale/strtonum.h
+++ b/lib/libcxx/include/__locale_dir/support/no_locale/strtonum.h
@@ -34,15 +34,6 @@ inline _LIBCPP_HIDE_FROM_ABI long double __strtold(const char* __nptr, char** __
   return std::strtold(__nptr, __endptr);
 }
 
-inline _LIBCPP_HIDE_FROM_ABI long long __strtoll(const char* __nptr, char** __endptr, int __base, __locale_t) {
-  return std::strtoll(__nptr, __endptr, __base);
-}
-
-inline _LIBCPP_HIDE_FROM_ABI unsigned long long
-__strtoull(const char* __nptr, char** __endptr, int __base, __locale_t) {
-  return std::strtoull(__nptr, __endptr, __base);
-}
-
 } // namespace __locale
 _LIBCPP_END_NAMESPACE_STD
 
diff --git a/lib/libcxx/include/__locale_dir/support/windows.h b/lib/libcxx/include/__locale_dir/support/windows.h
index 0df8709f11..644ef68adf 100644
--- a/lib/libcxx/include/__locale_dir/support/windows.h
+++ b/lib/libcxx/include/__locale_dir/support/windows.h
@@ -186,21 +186,9 @@ inline _LIBCPP_HIDE_FROM_ABI double __strtod(const char* __nptr, char** __endptr
   return ::_strtod_l(__nptr, __endptr, __loc);
 }
 
-inline _LIBCPP_HIDE_FROM_ABI long long __strtoll(const char* __nptr, char** __endptr, int __base, __locale_t __loc) {
-  return ::_strtoi64_l(__nptr, __endptr, __base, __loc);
-}
-inline _LIBCPP_HIDE_FROM_ABI unsigned long long
-__strtoull(const char* __nptr, char** __endptr, int __base, __locale_t __loc) {
-  return ::_strtoui64_l(__nptr, __endptr, __base, __loc);
-}
-
 //
 // Character manipulation functions
 //
-inline _LIBCPP_HIDE_FROM_ABI int __isdigit(int __c, __locale_t __loc) { return _isdigit_l(__c, __loc); }
-
-inline _LIBCPP_HIDE_FROM_ABI int __isxdigit(int __c, __locale_t __loc) { return _isxdigit_l(__c, __loc); }
-
 #if defined(_LIBCPP_BUILDING_LIBRARY)
 inline _LIBCPP_HIDE_FROM_ABI int __toupper(int __c, __locale_t __loc) { return ::_toupper_l(__c, __loc); }
 
@@ -280,23 +268,6 @@ _LIBCPP_EXPORTED_FROM_ABI _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 4, 5) int __snpri
 _LIBCPP_EXPORTED_FROM_ABI
 _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 3, 4) int __asprintf(char** __ret, __locale_t __loc, const char* __format, ...);
 
-_LIBCPP_DIAGNOSTIC_PUSH
-_LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wgcc-compat")
-_LIBCPP_GCC_DIAGNOSTIC_IGNORED("-Wformat-nonliteral") // GCC doesn't support [[gnu::format]] on variadic templates
-#ifdef _LIBCPP_COMPILER_CLANG_BASED
-#  define _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(...) _LIBCPP_ATTRIBUTE_FORMAT(__VA_ARGS__)
-#else
-#  define _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(...) /* nothing */
-#endif
-
-template <class... _Args>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__scanf__, 3, 4) int __sscanf(
-    const char* __dest, __locale_t __loc, const char* __format, _Args&&... __args) {
-  return ::_sscanf_l(__dest, __format, __loc, std::forward<_Args>(__args)...);
-}
-_LIBCPP_DIAGNOSTIC_POP
-#undef _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT
-
 #if defined(_LIBCPP_BUILDING_LIBRARY)
 struct __locale_guard {
   _LIBCPP_HIDE_FROM_ABI __locale_guard(__locale_t __l) : __status(_configthreadlocale(_ENABLE_PER_THREAD_LOCALE)) {
diff --git a/lib/libcxx/include/__locale_dir/time.h b/lib/libcxx/include/__locale_dir/time.h
index 5f60d5f36b..78698e9651 100644
--- a/lib/libcxx/include/__locale_dir/time.h
+++ b/lib/libcxx/include/__locale_dir/time.h
@@ -601,17 +601,13 @@ private:
     template <>                                                                                                        \
     _LIBCPP_EXPORTED_FROM_ABI __time_get_storage<_CharT>::__time_get_storage(const string&);                           \
     template <>                                                                                                        \
-    _LIBCPP_EXPORTED_FROM_ABI void __time_get_storage<_CharT>::init(const ctype<_CharT>&);                             \
+    void __time_get_storage<_CharT>::init(const ctype<_CharT>&);                                                       \
     template <>                                                                                                        \
-    _LIBCPP_EXPORTED_FROM_ABI __time_get_storage<_CharT>::string_type __time_get_storage<_CharT>::__analyze(           \
-        char, const ctype<_CharT>&);                                                                                   \
+    __time_get_storage<_CharT>::string_type __time_get_storage<_CharT>::__analyze(char, const ctype<_CharT>&);         \
     extern template _LIBCPP_EXPORTED_FROM_ABI time_base::dateorder __time_get_storage<_CharT>::__do_date_order()       \
         const;                                                                                                         \
     extern template _LIBCPP_EXPORTED_FROM_ABI __time_get_storage<_CharT>::__time_get_storage(const char*);             \
-    extern template _LIBCPP_EXPORTED_FROM_ABI __time_get_storage<_CharT>::__time_get_storage(const string&);           \
-    extern template _LIBCPP_EXPORTED_FROM_ABI void __time_get_storage<_CharT>::init(const ctype<_CharT>&);             \
-    extern template _LIBCPP_EXPORTED_FROM_ABI __time_get_storage<_CharT>::string_type                                  \
-    __time_get_storage<_CharT>::__analyze(char, const ctype<_CharT>&);
+    extern template _LIBCPP_EXPORTED_FROM_ABI __time_get_storage<_CharT>::__time_get_storage(const string&);
 
 _LIBCPP_TIME_GET_STORAGE_EXPLICIT_INSTANTIATION(char)
 #  if _LIBCPP_HAS_WIDE_CHARACTERS
diff --git a/lib/libcxx/include/__math/hypot.h b/lib/libcxx/include/__math/hypot.h
index 8e8c35b4a4..2b12d7be21 100644
--- a/lib/libcxx/include/__math/hypot.h
+++ b/lib/libcxx/include/__math/hypot.h
@@ -53,7 +53,7 @@ inline _LIBCPP_HIDE_FROM_ABI __promote_t<_A1, _A2> hypot(_A1 __x, _A2 __y) _NOEX
 // Computes the three-dimensional hypotenuse: `std::hypot(x,y,z)`.
 // The naive implementation might over-/underflow which is why this implementation is more involved:
 //    If the square of an argument might run into issues, we scale the arguments appropriately.
-// See https://github.com/llvm/llvm-project/issues/92782 for a detailed discussion and summary.
+// See https://llvm.org/PR92782 for a detailed discussion and summary.
 template <class _Real>
 _LIBCPP_HIDE_FROM_ABI _Real __hypot(_Real __x, _Real __y, _Real __z) {
   // Factors needed to determine if over-/underflow might happen
diff --git a/lib/libcxx/include/__math/logarithms.h b/lib/libcxx/include/__math/logarithms.h
index 5f5f943977..7343d6a84a 100644
--- a/lib/libcxx/include/__math/logarithms.h
+++ b/lib/libcxx/include/__math/logarithms.h
@@ -58,7 +58,7 @@ inline _LIBCPP_HIDE_FROM_ABI double log10(_A1 __x) _NOEXCEPT {
 inline _LIBCPP_HIDE_FROM_ABI int ilogb(float __x) _NOEXCEPT { return __builtin_ilogbf(__x); }
 
 template <class = int>
-_LIBCPP_HIDE_FROM_ABI double ilogb(double __x) _NOEXCEPT {
+_LIBCPP_HIDE_FROM_ABI int ilogb(double __x) _NOEXCEPT {
   return __builtin_ilogb(__x);
 }
 
diff --git a/lib/libcxx/include/__math/traits.h b/lib/libcxx/include/__math/traits.h
index 4a6e58c6da..ff22cee730 100644
--- a/lib/libcxx/include/__math/traits.h
+++ b/lib/libcxx/include/__math/traits.h
@@ -25,33 +25,26 @@ namespace __math {
 
 // signbit
 
-// TODO(LLVM 22): Remove conditional once support for Clang 19 is dropped.
-#if defined(_LIBCPP_COMPILER_GCC) || __has_constexpr_builtin(__builtin_signbit)
-#  define _LIBCPP_SIGNBIT_CONSTEXPR _LIBCPP_CONSTEXPR_SINCE_CXX23
-#else
-#  define _LIBCPP_SIGNBIT_CONSTEXPR
-#endif
-
 // The universal C runtime (UCRT) in the WinSDK provides floating point overloads
 // for std::signbit(). By defining our overloads as templates, we can work around
 // this issue as templates are less preferred than non-template functions.
 template <class = void>
-[[__nodiscard__]] inline _LIBCPP_SIGNBIT_CONSTEXPR _LIBCPP_HIDE_FROM_ABI bool signbit(float __x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool signbit(float __x) _NOEXCEPT {
   return __builtin_signbit(__x);
 }
 
 template <class = void>
-[[__nodiscard__]] inline _LIBCPP_SIGNBIT_CONSTEXPR _LIBCPP_HIDE_FROM_ABI bool signbit(double __x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool signbit(double __x) _NOEXCEPT {
   return __builtin_signbit(__x);
 }
 
 template <class = void>
-[[__nodiscard__]] inline _LIBCPP_SIGNBIT_CONSTEXPR _LIBCPP_HIDE_FROM_ABI bool signbit(long double __x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool signbit(long double __x) _NOEXCEPT {
   return __builtin_signbit(__x);
 }
 
 template <class _A1, __enable_if_t<is_integral<_A1>::value, int> = 0>
-[[__nodiscard__]] inline _LIBCPP_SIGNBIT_CONSTEXPR _LIBCPP_HIDE_FROM_ABI bool signbit(_A1 __x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool signbit(_A1 __x) _NOEXCEPT {
   return __x < 0;
 }
 
@@ -189,6 +182,82 @@ template <class _A1, class _A2, __enable_if_t<is_arithmetic<_A1>::value && is_ar
   return __builtin_isunordered((type)__x, (type)__y);
 }
 
+// MS UCRT incorrectly defines some functions in a way not working with integer types. Until C++20, this was worked
+// around by -fdelayed-template-parsing. Since C++20, we can use standard feature "requires" instead.
+
+// TODO: Remove the workaround once UCRT fixes these functions. Note that this doesn't seem planned as of 2025-07 per
+// https://developercommunity.visualstudio.com/t/10294165.
+
+#if defined(_LIBCPP_MSVCRT) && _LIBCPP_STD_VER >= 20
+namespace __ucrt {
+template <class _A1>
+  requires is_integral_v<_A1>
+[[nodiscard]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isfinite(_A1) noexcept {
+  return true;
+}
+
+template <class _A1>
+  requires is_integral_v<_A1>
+[[nodiscard]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isinf(_A1) noexcept {
+  return false;
+}
+
+template <class _A1>
+  requires is_integral_v<_A1>
+[[nodiscard]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isnan(_A1) noexcept {
+  return false;
+}
+
+template <class _A1>
+  requires is_integral_v<_A1>
+[[nodiscard]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isnormal(_A1 __x) noexcept {
+  return __x != 0;
+}
+
+template <class _A1, class _A2>
+  requires is_arithmetic_v<_A1> && is_arithmetic_v<_A2>
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool isgreater(_A1 __x, _A2 __y) noexcept {
+  using type = __promote_t<_A1, _A2>;
+  return __builtin_isgreater((type)__x, (type)__y);
+}
+
+template <class _A1, class _A2>
+  requires is_arithmetic_v<_A1> && is_arithmetic_v<_A2>
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool isgreaterequal(_A1 __x, _A2 __y) noexcept {
+  using type = __promote_t<_A1, _A2>;
+  return __builtin_isgreaterequal((type)__x, (type)__y);
+}
+
+template <class _A1, class _A2>
+  requires is_arithmetic_v<_A1> && is_arithmetic_v<_A2>
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool isless(_A1 __x, _A2 __y) noexcept {
+  using type = __promote_t<_A1, _A2>;
+  return __builtin_isless((type)__x, (type)__y);
+}
+
+template <class _A1, class _A2>
+  requires is_arithmetic_v<_A1> && is_arithmetic_v<_A2>
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool islessequal(_A1 __x, _A2 __y) noexcept {
+  using type = __promote_t<_A1, _A2>;
+  return __builtin_islessequal((type)__x, (type)__y);
+}
+
+template <class _A1, class _A2>
+  requires is_arithmetic_v<_A1> && is_arithmetic_v<_A2>
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool islessgreater(_A1 __x, _A2 __y) noexcept {
+  using type = __promote_t<_A1, _A2>;
+  return __builtin_islessgreater((type)__x, (type)__y);
+}
+
+template <class _A1, class _A2>
+  requires is_arithmetic_v<_A1> && is_arithmetic_v<_A2>
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool isunordered(_A1 __x, _A2 __y) noexcept {
+  using type = __promote_t<_A1, _A2>;
+  return __builtin_isunordered((type)__x, (type)__y);
+}
+} // namespace __ucrt
+#endif
+
 } // namespace __math
 
 _LIBCPP_END_NAMESPACE_STD
diff --git a/lib/libcxx/include/__mdspan/extents.h b/lib/libcxx/include/__mdspan/extents.h
index 99b54badf8..d16bbd2af4 100644
--- a/lib/libcxx/include/__mdspan/extents.h
+++ b/lib/libcxx/include/__mdspan/extents.h
@@ -25,6 +25,7 @@
 #include <__type_traits/integer_traits.h>
 #include <__type_traits/is_convertible.h>
 #include <__type_traits/is_nothrow_constructible.h>
+#include <__type_traits/is_signed.h>
 #include <__type_traits/make_unsigned.h>
 #include <__utility/integer_sequence.h>
 #include <__utility/unreachable.h>
@@ -298,11 +299,13 @@ private:
 
 public:
   // [mdspan.extents.obs], observers of multidimensional index space
-  _LIBCPP_HIDE_FROM_ABI static constexpr rank_type rank() noexcept { return __rank_; }
-  _LIBCPP_HIDE_FROM_ABI static constexpr rank_type rank_dynamic() noexcept { return __rank_dynamic_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr rank_type rank() noexcept { return __rank_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr rank_type rank_dynamic() noexcept { return __rank_dynamic_; }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr index_type extent(rank_type __r) const noexcept { return __vals_.__value(__r); }
-  _LIBCPP_HIDE_FROM_ABI static constexpr size_t static_extent(rank_type __r) noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr index_type extent(rank_type __r) const noexcept {
+    return __vals_.__value(__r);
+  }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr size_t static_extent(rank_type __r) noexcept {
     return _Values::__static_value(__r);
   }
 
diff --git a/lib/libcxx/include/__mdspan/layout_stride.h b/lib/libcxx/include/__mdspan/layout_stride.h
index 9d77d71bc3..eb22475756 100644
--- a/lib/libcxx/include/__mdspan/layout_stride.h
+++ b/lib/libcxx/include/__mdspan/layout_stride.h
@@ -272,11 +272,10 @@ public:
       return [&]<size_t... _Pos>(index_sequence<_Pos...>) {
         if ((__extents_.extent(_Pos) * ... * 1) == 0)
           return static_cast<index_type>(0);
-        else
-          return static_cast<index_type>(
-              static_cast<index_type>(1) +
-              (((__extents_.extent(_Pos) - static_cast<index_type>(1)) * __strides_[_Pos]) + ... +
-               static_cast<index_type>(0)));
+
+        return static_cast<index_type>(
+            static_cast<index_type>(1) + (((__extents_.extent(_Pos) - static_cast<index_type>(1)) * __strides_[_Pos]) +
+                                          ... + static_cast<index_type>(0)));
       }(make_index_sequence<__rank_>());
     }
   }
diff --git a/lib/libcxx/include/__mdspan/mdspan.h b/lib/libcxx/include/__mdspan/mdspan.h
index c0f2767819..449baea43f 100644
--- a/lib/libcxx/include/__mdspan/mdspan.h
+++ b/lib/libcxx/include/__mdspan/mdspan.h
@@ -87,16 +87,17 @@ public:
   using data_handle_type = typename accessor_type::data_handle_type;
   using reference        = typename accessor_type::reference;
 
-  _LIBCPP_HIDE_FROM_ABI static constexpr rank_type rank() noexcept { return extents_type::rank(); }
-  _LIBCPP_HIDE_FROM_ABI static constexpr rank_type rank_dynamic() noexcept { return extents_type::rank_dynamic(); }
-  _LIBCPP_HIDE_FROM_ABI static constexpr size_t static_extent(rank_type __r) noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr rank_type rank() noexcept { return extents_type::rank(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr rank_type rank_dynamic() noexcept {
+    return extents_type::rank_dynamic();
+  }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr size_t static_extent(rank_type __r) noexcept {
     return extents_type::static_extent(__r);
   }
-  _LIBCPP_HIDE_FROM_ABI constexpr index_type extent(rank_type __r) const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr index_type extent(rank_type __r) const noexcept {
     return __map_.extents().extent(__r);
-  };
+  }
 
-public:
   //--------------------------------------------------------------------------------
   // [mdspan.mdspan.cons], mdspan constructors, assignment, and destructor
 
@@ -185,7 +186,7 @@ public:
     requires((is_convertible_v<_OtherIndexTypes, index_type> && ...) &&
              (is_nothrow_constructible_v<index_type, _OtherIndexTypes> && ...) &&
              (sizeof...(_OtherIndexTypes) == rank()))
-  _LIBCPP_HIDE_FROM_ABI constexpr reference operator[](_OtherIndexTypes... __indices) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr reference operator[](_OtherIndexTypes... __indices) const {
     // Note the standard layouts would also check this, but user provided ones may not, so we
     // check the precondition here
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(__mdspan_detail::__is_multidimensional_index_in(extents(), __indices...),
@@ -196,7 +197,8 @@ public:
   template <class _OtherIndexType>
     requires(is_convertible_v<const _OtherIndexType&, index_type> &&
              is_nothrow_constructible_v<index_type, const _OtherIndexType&>)
-  _LIBCPP_HIDE_FROM_ABI constexpr reference operator[](const array< _OtherIndexType, rank()>& __indices) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr reference
+  operator[](const array< _OtherIndexType, rank()>& __indices) const {
     return __acc_.access(__ptr_, [&]<size_t... _Idxs>(index_sequence<_Idxs...>) {
       return __map_(__indices[_Idxs]...);
     }(make_index_sequence<rank()>()));
@@ -205,13 +207,13 @@ public:
   template <class _OtherIndexType>
     requires(is_convertible_v<const _OtherIndexType&, index_type> &&
              is_nothrow_constructible_v<index_type, const _OtherIndexType&>)
-  _LIBCPP_HIDE_FROM_ABI constexpr reference operator[](span<_OtherIndexType, rank()> __indices) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr reference operator[](span<_OtherIndexType, rank()> __indices) const {
     return __acc_.access(__ptr_, [&]<size_t... _Idxs>(index_sequence<_Idxs...>) {
       return __map_(__indices[_Idxs]...);
     }(make_index_sequence<rank()>()));
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr size_type size() const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr size_type size() const noexcept {
     // Could leave this as only checked in debug mode: semantically size() is never
     // guaranteed to be related to any accessible range
     _LIBCPP_ASSERT_UNCATEGORIZED(
@@ -237,24 +239,28 @@ public:
     swap(__x.__acc_, __y.__acc_);
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr const extents_type& extents() const noexcept { return __map_.extents(); };
-  _LIBCPP_HIDE_FROM_ABI constexpr const data_handle_type& data_handle() const noexcept { return __ptr_; };
-  _LIBCPP_HIDE_FROM_ABI constexpr const mapping_type& mapping() const noexcept { return __map_; };
-  _LIBCPP_HIDE_FROM_ABI constexpr const accessor_type& accessor() const noexcept { return __acc_; };
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const extents_type& extents() const noexcept {
+    return __map_.extents();
+  }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const data_handle_type& data_handle() const noexcept { return __ptr_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const mapping_type& mapping() const noexcept { return __map_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const accessor_type& accessor() const noexcept { return __acc_; }
 
   // per LWG-4021 "mdspan::is_always_meow() should be noexcept"
-  _LIBCPP_HIDE_FROM_ABI static constexpr bool is_always_unique() noexcept { return mapping_type::is_always_unique(); };
-  _LIBCPP_HIDE_FROM_ABI static constexpr bool is_always_exhaustive() noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr bool is_always_unique() noexcept {
+    return mapping_type::is_always_unique();
+  }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr bool is_always_exhaustive() noexcept {
     return mapping_type::is_always_exhaustive();
-  };
-  _LIBCPP_HIDE_FROM_ABI static constexpr bool is_always_strided() noexcept {
+  }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr bool is_always_strided() noexcept {
     return mapping_type::is_always_strided();
-  };
+  }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr bool is_unique() const { return __map_.is_unique(); };
-  _LIBCPP_HIDE_FROM_ABI constexpr bool is_exhaustive() const { return __map_.is_exhaustive(); };
-  _LIBCPP_HIDE_FROM_ABI constexpr bool is_strided() const { return __map_.is_strided(); };
-  _LIBCPP_HIDE_FROM_ABI constexpr index_type stride(rank_type __r) const { return __map_.stride(__r); };
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool is_unique() const { return __map_.is_unique(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool is_exhaustive() const { return __map_.is_exhaustive(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool is_strided() const { return __map_.is_strided(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr index_type stride(rank_type __r) const { return __map_.stride(__r); }
 
 private:
   _LIBCPP_NO_UNIQUE_ADDRESS data_handle_type __ptr_{};
diff --git a/lib/libcxx/include/__memory/addressof.h b/lib/libcxx/include/__memory/addressof.h
index 667071dfc6..52ec94a529 100644
--- a/lib/libcxx/include/__memory/addressof.h
+++ b/lib/libcxx/include/__memory/addressof.h
@@ -19,7 +19,8 @@
 _LIBCPP_BEGIN_NAMESPACE_STD
 
 template <class _Tp>
-inline _LIBCPP_CONSTEXPR_SINCE_CXX17 _LIBCPP_NO_CFI _LIBCPP_HIDE_FROM_ABI _Tp* addressof(_Tp& __x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX17 _LIBCPP_NO_CFI _LIBCPP_HIDE_FROM_ABI _Tp*
+addressof(_Tp& __x) _NOEXCEPT {
   return __builtin_addressof(__x);
 }
 
@@ -27,24 +28,25 @@ inline _LIBCPP_CONSTEXPR_SINCE_CXX17 _LIBCPP_NO_CFI _LIBCPP_HIDE_FROM_ABI _Tp* a
 // Objective-C++ Automatic Reference Counting uses qualified pointers
 // that require special addressof() signatures.
 template <class _Tp>
-inline _LIBCPP_HIDE_FROM_ABI __strong _Tp* addressof(__strong _Tp& __x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI __strong _Tp* addressof(__strong _Tp& __x) _NOEXCEPT {
   return &__x;
 }
 
 #  if __has_feature(objc_arc_weak)
 template <class _Tp>
-inline _LIBCPP_HIDE_FROM_ABI __weak _Tp* addressof(__weak _Tp& __x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI __weak _Tp* addressof(__weak _Tp& __x) _NOEXCEPT {
   return &__x;
 }
 #  endif
 
 template <class _Tp>
-inline _LIBCPP_HIDE_FROM_ABI __autoreleasing _Tp* addressof(__autoreleasing _Tp& __x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI __autoreleasing _Tp* addressof(__autoreleasing _Tp& __x) _NOEXCEPT {
   return &__x;
 }
 
 template <class _Tp>
-inline _LIBCPP_HIDE_FROM_ABI __unsafe_unretained _Tp* addressof(__unsafe_unretained _Tp& __x) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI __unsafe_unretained _Tp*
+addressof(__unsafe_unretained _Tp& __x) _NOEXCEPT {
   return &__x;
 }
 #endif
diff --git a/lib/libcxx/include/__memory/align.h b/lib/libcxx/include/__memory/align.h
index 402eac3380..47a7a2d274 100644
--- a/lib/libcxx/include/__memory/align.h
+++ b/lib/libcxx/include/__memory/align.h
@@ -11,6 +11,7 @@
 
 #include <__config>
 #include <__cstddef/size_t.h>
+#include <cstdint>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
@@ -18,7 +19,23 @@
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-_LIBCPP_EXPORTED_FROM_ABI void* align(size_t __align, size_t __sz, void*& __ptr, size_t& __space);
+inline namespace __align_inline {
+_LIBCPP_HIDE_FROM_ABI inline void* align(size_t __align, size_t __sz, void*& __ptr, size_t& __space) {
+  void* __r = nullptr;
+  if (__sz <= __space) {
+    char* __p1 = static_cast<char*>(__ptr);
+    char* __p2 = reinterpret_cast<char*>(reinterpret_cast<uintptr_t>(__p1 + (__align - 1)) & -__align);
+    size_t __d = static_cast<size_t>(__p2 - __p1);
+    if (__d <= __space - __sz) {
+      __r   = __p2;
+      __ptr = __r;
+      __space -= __d;
+    }
+  }
+  return __r;
+}
+
+} // namespace __align_inline
 
 _LIBCPP_END_NAMESPACE_STD
 
diff --git a/lib/libcxx/include/__memory/allocate_at_least.h b/lib/libcxx/include/__memory/allocate_at_least.h
index 9b5a8bcbd4..72140d0de2 100644
--- a/lib/libcxx/include/__memory/allocate_at_least.h
+++ b/lib/libcxx/include/__memory/allocate_at_least.h
@@ -19,26 +19,31 @@
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
+template <class _Pointer, class _SizeT = size_t>
+struct __allocation_result {
+  _Pointer ptr;
+  _SizeT count;
+
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR __allocation_result(_Pointer __ptr, _SizeT __count)
+      : ptr(__ptr), count(__count) {}
+};
+_LIBCPP_CTAD_SUPPORTED_FOR_TYPE(__allocation_result);
+
 #if _LIBCPP_STD_VER >= 23
 
 template <class _Alloc>
 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto __allocate_at_least(_Alloc& __alloc, size_t __n) {
-  return std::allocator_traits<_Alloc>::allocate_at_least(__alloc, __n);
+  auto __res = std::allocator_traits<_Alloc>::allocate_at_least(__alloc, __n);
+  return __allocation_result{__res.ptr, __res.count};
 }
 
 #else
 
-template <class _Pointer>
-struct __allocation_result {
-  _Pointer ptr;
-  size_t count;
-};
-
-template <class _Alloc>
+template <class _Alloc, class _Traits = allocator_traits<_Alloc> >
 [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI
-_LIBCPP_CONSTEXPR __allocation_result<typename allocator_traits<_Alloc>::pointer>
+_LIBCPP_CONSTEXPR __allocation_result<typename _Traits::pointer, typename _Traits::size_type>
 __allocate_at_least(_Alloc& __alloc, size_t __n) {
-  return {__alloc.allocate(__n), __n};
+  return __allocation_result<typename _Traits::pointer, typename _Traits::size_type>(__alloc.allocate(__n), __n);
 }
 
 #endif // _LIBCPP_STD_VER >= 23
diff --git a/lib/libcxx/include/__memory/allocator.h b/lib/libcxx/include/__memory/allocator.h
index 52f4122a9b..609b305a12 100644
--- a/lib/libcxx/include/__memory/allocator.h
+++ b/lib/libcxx/include/__memory/allocator.h
@@ -14,7 +14,6 @@
 #include <__cstddef/ptrdiff_t.h>
 #include <__cstddef/size_t.h>
 #include <__memory/addressof.h>
-#include <__memory/allocate_at_least.h>
 #include <__memory/allocator_traits.h>
 #include <__new/allocate.h>
 #include <__new/exceptions.h>
@@ -51,33 +50,21 @@ public:
 };
 #endif // _LIBCPP_STD_VER <= 17
 
-// This class provides a non-trivial default constructor to the class that derives from it
-// if the condition is satisfied.
-//
-// The second template parameter exists to allow giving a unique type to __non_trivial_if,
-// which makes it possible to avoid breaking the ABI when making this a base class of an
-// existing class. Without that, imagine we have classes D1 and D2, both of which used to
-// have no base classes, but which now derive from __non_trivial_if. The layout of a class
-// that inherits from both D1 and D2 will change because the two __non_trivial_if base
-// classes are not allowed to share the same address.
-//
-// By making those __non_trivial_if base classes unique, we work around this problem and
-// it is safe to start deriving from __non_trivial_if in existing classes.
-template <bool _Cond, class _Unique>
-struct __non_trivial_if {};
+template <bool, class _Unique>
+struct __non_trivially_default_constructible_if {};
 
 template <class _Unique>
-struct __non_trivial_if<true, _Unique> {
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR __non_trivial_if() _NOEXCEPT {}
+struct __non_trivially_default_constructible_if<true, _Unique> {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR __non_trivially_default_constructible_if() {}
 };
 
-// allocator
-//
-// Note: For ABI compatibility between C++20 and previous standards, we make
-//       allocator<void> trivial in C++20.
-
 template <class _Tp>
-class allocator : private __non_trivial_if<!is_void<_Tp>::value, allocator<_Tp> > {
+class allocator
+// TODO(LLVM 24): Remove the opt-out
+#ifdef _LIBCPP_DEPRECATED_ABI_NON_TRIVIAL_ALLOCATOR
+    : __non_trivially_default_constructible_if<!is_void<_Tp>::value, allocator<_Tp> >
+#endif
+{
   static_assert(!is_const<_Tp>::value, "std::allocator does not support const types");
   static_assert(!is_volatile<_Tp>::value, "std::allocator does not support volatile types");
 
@@ -133,10 +120,11 @@ public:
     typedef allocator<_Up> other;
   };
 
-  _LIBCPP_DEPRECATED_IN_CXX17 _LIBCPP_HIDE_FROM_ABI pointer address(reference __x) const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_DEPRECATED_IN_CXX17 _LIBCPP_HIDE_FROM_ABI pointer address(reference __x) const _NOEXCEPT {
     return std::addressof(__x);
   }
-  _LIBCPP_DEPRECATED_IN_CXX17 _LIBCPP_HIDE_FROM_ABI const_pointer address(const_reference __x) const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_DEPRECATED_IN_CXX17 _LIBCPP_HIDE_FROM_ABI const_pointer
+  address(const_reference __x) const _NOEXCEPT {
     return std::addressof(__x);
   }
 
@@ -144,7 +132,7 @@ public:
     return allocate(__n);
   }
 
-  _LIBCPP_DEPRECATED_IN_CXX17 _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_DEPRECATED_IN_CXX17 _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT {
     return size_type(~0) / sizeof(_Tp);
   }
 
diff --git a/lib/libcxx/include/__memory/allocator_traits.h b/lib/libcxx/include/__memory/allocator_traits.h
index 46c247f704..b38d7293a3 100644
--- a/lib/libcxx/include/__memory/allocator_traits.h
+++ b/lib/libcxx/include/__memory/allocator_traits.h
@@ -314,23 +314,25 @@ struct allocator_traits {
   }
 
   template <class _Ap = _Alloc, __enable_if_t<__has_max_size_v<const _Ap>, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 static size_type max_size(const allocator_type& __a) _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 static size_type
+  max_size(const allocator_type& __a) _NOEXCEPT {
     _LIBCPP_SUPPRESS_DEPRECATED_PUSH
     return __a.max_size();
     _LIBCPP_SUPPRESS_DEPRECATED_POP
   }
   template <class _Ap = _Alloc, __enable_if_t<!__has_max_size_v<const _Ap>, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 static size_type max_size(const allocator_type&) _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 static size_type
+  max_size(const allocator_type&) _NOEXCEPT {
     return numeric_limits<size_type>::max() / sizeof(value_type);
   }
 
   template <class _Ap = _Alloc, __enable_if_t<__has_select_on_container_copy_construction_v<const _Ap>, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 static allocator_type
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 static allocator_type
   select_on_container_copy_construction(const allocator_type& __a) {
     return __a.select_on_container_copy_construction();
   }
   template <class _Ap = _Alloc, __enable_if_t<!__has_select_on_container_copy_construction_v<const _Ap>, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 static allocator_type
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 static allocator_type
   select_on_container_copy_construction(const allocator_type& __a) {
     return __a;
   }
diff --git a/lib/libcxx/include/__memory/compressed_pair.h b/lib/libcxx/include/__memory/compressed_pair.h
index 29e503931b..f1f1c92045 100644
--- a/lib/libcxx/include/__memory/compressed_pair.h
+++ b/lib/libcxx/include/__memory/compressed_pair.h
@@ -28,8 +28,8 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 // understand how it works).                                                                                          //
 // ================================================================================================================== //
 
-// The first member is aligned to the alignment of the second member to force padding in front of the compressed pair
-// in case there are members before it.
+// On GCC, the first member is aligned to the alignment of the second member to force padding in front of the compressed
+// pair in case there are members before it.
 //
 // For example:
 // (assuming x86-64 linux)
@@ -52,7 +52,10 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 //
 // Furthermore, that alignment must be the same as what was used in the old __compressed_pair layout, so we must
 // handle reference types specially since alignof(T&) == alignof(T).
-// See https://github.com/llvm/llvm-project/issues/118559.
+// See https://llvm.org/PR118559.
+//
+// On Clang, this is unnecessary, since we use anonymous structs instead, which automatically handle the alignment
+// correctly.
 
 #ifndef _LIBCPP_ABI_NO_COMPRESSED_PAIR_PADDING
 
@@ -64,7 +67,7 @@ inline const size_t __compressed_pair_alignment<_Tp&> = _LIBCPP_ALIGNOF(void*);
 
 template <class _ToPad>
 inline const bool __is_reference_or_unpadded_object =
-    (is_empty<_ToPad>::value && !__libcpp_is_final<_ToPad>::value) || sizeof(_ToPad) == __datasizeof_v<_ToPad>;
+    (is_empty<_ToPad>::value && !__is_final_v<_ToPad>) || sizeof(_ToPad) == __datasizeof_v<_ToPad>;
 
 template <class _Tp>
 inline const bool __is_reference_or_unpadded_object<_Tp&> = true;
@@ -80,6 +83,10 @@ class __compressed_pair_padding {
 template <class _ToPad>
 class __compressed_pair_padding<_ToPad, true> {};
 
+#  define _LIBCPP_COMPRESSED_ELEMENT(T1, Initializer1)                                                                 \
+    _LIBCPP_NO_UNIQUE_ADDRESS T1 Initializer1;                                                                         \
+    _LIBCPP_NO_UNIQUE_ADDRESS ::std::__compressed_pair_padding<T1> _LIBCPP_CONCAT3(__padding_, __LINE__, _)
+
 // TODO: Fix the ABI for GCC as well once https://gcc.gnu.org/bugzilla/show_bug.cgi?id=121637 is fixed
 #  ifdef _LIBCPP_COMPILER_GCC
 #    define _LIBCPP_COMPRESSED_PAIR(T1, Initializer1, T2, Initializer2)                                                \
@@ -100,8 +107,7 @@ class __compressed_pair_padding<_ToPad, true> {};
 #  else
 #    define _LIBCPP_COMPRESSED_PAIR(T1, Initializer1, T2, Initializer2)                                                \
       struct {                                                                                                         \
-        _LIBCPP_NO_UNIQUE_ADDRESS                                                                                      \
-        __attribute__((__aligned__(::std::__compressed_pair_alignment<T2>))) T1 Initializer1;                          \
+        _LIBCPP_NO_UNIQUE_ADDRESS T1 Initializer1;                                                                     \
         _LIBCPP_NO_UNIQUE_ADDRESS ::std::__compressed_pair_padding<T1> _LIBCPP_CONCAT3(__padding1_, __LINE__, _);      \
         _LIBCPP_NO_UNIQUE_ADDRESS T2 Initializer2;                                                                     \
         _LIBCPP_NO_UNIQUE_ADDRESS ::std::__compressed_pair_padding<T2> _LIBCPP_CONCAT3(__padding2_, __LINE__, _);      \
@@ -109,9 +115,7 @@ class __compressed_pair_padding<_ToPad, true> {};
 
 #    define _LIBCPP_COMPRESSED_TRIPLE(T1, Initializer1, T2, Initializer2, T3, Initializer3)                            \
       struct {                                                                                                         \
-        _LIBCPP_NO_UNIQUE_ADDRESS                                                                                      \
-        __attribute__((__aligned__(::std::__compressed_pair_alignment<T2>),                                            \
-                       __aligned__(::std::__compressed_pair_alignment<T3>))) T1 Initializer1;                          \
+        _LIBCPP_NO_UNIQUE_ADDRESS T1 Initializer1;                                                                     \
         _LIBCPP_NO_UNIQUE_ADDRESS ::std::__compressed_pair_padding<T1> _LIBCPP_CONCAT3(__padding1_, __LINE__, _);      \
         _LIBCPP_NO_UNIQUE_ADDRESS T2 Initializer2;                                                                     \
         _LIBCPP_NO_UNIQUE_ADDRESS ::std::__compressed_pair_padding<T2> _LIBCPP_CONCAT3(__padding2_, __LINE__, _);      \
@@ -121,6 +125,8 @@ class __compressed_pair_padding<_ToPad, true> {};
 #  endif
 
 #else
+#  define _LIBCPP_COMPRESSED_ELEMENT(T1, Initializer1) _LIBCPP_NO_UNIQUE_ADDRESS T1 Initializer1
+
 #  define _LIBCPP_COMPRESSED_PAIR(T1, Name1, T2, Name2)                                                                \
     _LIBCPP_NO_UNIQUE_ADDRESS T1 Name1;                                                                                \
     _LIBCPP_NO_UNIQUE_ADDRESS T2 Name2
diff --git a/lib/libcxx/include/__memory/construct_at.h b/lib/libcxx/include/__memory/construct_at.h
index b64e64b5a2..5378c03aba 100644
--- a/lib/libcxx/include/__memory/construct_at.h
+++ b/lib/libcxx/include/__memory/construct_at.h
@@ -14,7 +14,6 @@
 #include <__config>
 #include <__memory/addressof.h>
 #include <__new/placement_new_delete.h>
-#include <__type_traits/enable_if.h>
 #include <__type_traits/is_array.h>
 #include <__utility/declval.h>
 #include <__utility/forward.h>
@@ -33,7 +32,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 #if _LIBCPP_STD_VER >= 20
 
 template <class _Tp, class... _Args, class = decltype(::new(std::declval<void*>()) _Tp(std::declval<_Args>()...))>
-_LIBCPP_HIDE_FROM_ABI constexpr _Tp* construct_at(_Tp* __location, _Args&&... __args) {
+_LIBCPP_HIDE_FROM_ABI constexpr _Tp* construct_at(_Tp* _LIBCPP_DIAGNOSE_NULLPTR __location, _Args&&... __args) {
   _LIBCPP_ASSERT_NON_NULL(__location != nullptr, "null pointer given to construct_at");
   return ::new (static_cast<void*>(__location)) _Tp(std::forward<_Args>(__args)...);
 }
@@ -55,35 +54,25 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Tp* __construct_at(_Tp* __l
 // The internal functions are available regardless of the language version (with the exception of the `__destroy_at`
 // taking an array).
 
-template <class _Tp, __enable_if_t<!is_array<_Tp>::value, int> = 0>
+template <class _Tp>
 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __destroy_at(_Tp* __loc) {
   _LIBCPP_ASSERT_NON_NULL(__loc != nullptr, "null pointer given to destroy_at");
-  __loc->~_Tp();
-}
-
 #if _LIBCPP_STD_VER >= 20
-template <class _Tp, __enable_if_t<is_array<_Tp>::value, int> = 0>
-_LIBCPP_HIDE_FROM_ABI constexpr void __destroy_at(_Tp* __loc) {
-  _LIBCPP_ASSERT_NON_NULL(__loc != nullptr, "null pointer given to destroy_at");
-  for (auto&& __val : *__loc)
-    std::__destroy_at(std::addressof(__val));
-}
+  if constexpr (is_array_v<_Tp>) {
+    for (auto&& __val : *__loc)
+      std::__destroy_at(std::addressof(__val));
+  } else
 #endif
+  {
+    __loc->~_Tp();
+  }
+}
 
 #if _LIBCPP_STD_VER >= 17
-
-template <class _Tp, enable_if_t<!is_array_v<_Tp>, int> = 0>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void destroy_at(_Tp* __loc) {
+template <class _Tp>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void destroy_at(_Tp* _LIBCPP_DIAGNOSE_NULLPTR __loc) {
   std::__destroy_at(__loc);
 }
-
-#  if _LIBCPP_STD_VER >= 20
-template <class _Tp, enable_if_t<is_array_v<_Tp>, int> = 0>
-_LIBCPP_HIDE_FROM_ABI constexpr void destroy_at(_Tp* __loc) {
-  std::__destroy_at(__loc);
-}
-#  endif
-
 #endif // _LIBCPP_STD_VER >= 17
 
 _LIBCPP_END_NAMESPACE_STD
diff --git a/lib/libcxx/include/__memory/inout_ptr.h b/lib/libcxx/include/__memory/inout_ptr.h
index ef345fe469..0fa685afb2 100644
--- a/lib/libcxx/include/__memory/inout_ptr.h
+++ b/lib/libcxx/include/__memory/inout_ptr.h
@@ -96,7 +96,7 @@ private:
 };
 
 template <class _Pointer = void, class _Smart, class... _Args>
-_LIBCPP_HIDE_FROM_ABI auto inout_ptr(_Smart& __s, _Args&&... __args) {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI auto inout_ptr(_Smart& __s, _Args&&... __args) {
   using _Ptr = conditional_t<is_void_v<_Pointer>, __pointer_of_t<_Smart>, _Pointer>;
   return std::inout_ptr_t<_Smart, _Ptr, _Args&&...>(__s, std::forward<_Args>(__args)...);
 }
diff --git a/lib/libcxx/include/__memory/is_sufficiently_aligned.h b/lib/libcxx/include/__memory/is_sufficiently_aligned.h
index 4280920cab..93d24aaf78 100644
--- a/lib/libcxx/include/__memory/is_sufficiently_aligned.h
+++ b/lib/libcxx/include/__memory/is_sufficiently_aligned.h
@@ -23,7 +23,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 #if _LIBCPP_STD_VER >= 26
 
 template <size_t _Alignment, class _Tp>
-_LIBCPP_HIDE_FROM_ABI bool is_sufficiently_aligned(_Tp* __ptr) {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool is_sufficiently_aligned(_Tp* __ptr) {
   return reinterpret_cast<uintptr_t>(__ptr) % _Alignment == 0;
 }
 
diff --git a/lib/libcxx/include/__memory/out_ptr.h b/lib/libcxx/include/__memory/out_ptr.h
index e498e3307b..23a77f6a0f 100644
--- a/lib/libcxx/include/__memory/out_ptr.h
+++ b/lib/libcxx/include/__memory/out_ptr.h
@@ -88,7 +88,7 @@ private:
 };
 
 template <class _Pointer = void, class _Smart, class... _Args>
-_LIBCPP_HIDE_FROM_ABI auto out_ptr(_Smart& __s, _Args&&... __args) {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI auto out_ptr(_Smart& __s, _Args&&... __args) {
   using _Ptr = conditional_t<is_void_v<_Pointer>, __pointer_of_t<_Smart>, _Pointer>;
   return std::out_ptr_t<_Smart, _Ptr, _Args&&...>(__s, std::forward<_Args>(__args)...);
 }
diff --git a/lib/libcxx/include/__memory/pointer_traits.h b/lib/libcxx/include/__memory/pointer_traits.h
index 8c7f8dff1b..62fcd93263 100644
--- a/lib/libcxx/include/__memory/pointer_traits.h
+++ b/lib/libcxx/include/__memory/pointer_traits.h
@@ -255,7 +255,7 @@ concept __resettable_smart_pointer_with_args = requires(_Smart __s, _Pointer __p
 // This function ensures safe conversions between fancy pointers at compile-time, where we avoid casts from/to
 // `__void_pointer` by obtaining the underlying raw pointer from the fancy pointer using `std::to_address`,
 // then dereferencing it to retrieve the pointed-to object, and finally constructing the target fancy pointer
-// to that object using the `std::pointer_traits<>::pinter_to` function.
+// to that object using the `std::pointer_traits<>::pointer_to` function.
 template <class _PtrTo, class _PtrFrom>
 _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI _PtrTo __static_fancy_pointer_cast(const _PtrFrom& __p) {
   using __ptr_traits   = pointer_traits<_PtrTo>;
diff --git a/lib/libcxx/include/__memory/raw_storage_iterator.h b/lib/libcxx/include/__memory/raw_storage_iterator.h
index 0e8b909070..dff0fed3b7 100644
--- a/lib/libcxx/include/__memory/raw_storage_iterator.h
+++ b/lib/libcxx/include/__memory/raw_storage_iterator.h
@@ -28,15 +28,9 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 #if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_RAW_STORAGE_ITERATOR)
 
-_LIBCPP_SUPPRESS_DEPRECATED_PUSH
 template <class _OutputIterator, class _Tp>
 class _LIBCPP_DEPRECATED_IN_CXX17 raw_storage_iterator
-#  if _LIBCPP_STD_VER <= 14 || !defined(_LIBCPP_ABI_NO_ITERATOR_BASES)
-    : public iterator<output_iterator_tag, void, void, void, void>
-#  endif
-{
-  _LIBCPP_SUPPRESS_DEPRECATED_POP
-
+    : public __iterator_base<raw_storage_iterator<_OutputIterator, _Tp>, output_iterator_tag, void, void, void, void> {
 private:
   _OutputIterator __x_;
 
@@ -52,7 +46,7 @@ public:
   typedef void reference;
 
   _LIBCPP_HIDE_FROM_ABI explicit raw_storage_iterator(_OutputIterator __x) : __x_(__x) {}
-  _LIBCPP_HIDE_FROM_ABI raw_storage_iterator& operator*() { return *this; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI raw_storage_iterator& operator*() { return *this; }
   _LIBCPP_HIDE_FROM_ABI raw_storage_iterator& operator=(const _Tp& __element) {
     ::new ((void*)std::addressof(*__x_)) _Tp(__element);
     return *this;
@@ -73,7 +67,7 @@ public:
     return __t;
   }
 #  if _LIBCPP_STD_VER >= 14
-  _LIBCPP_HIDE_FROM_ABI _OutputIterator base() const { return __x_; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _OutputIterator base() const { return __x_; }
 #  endif
 };
 
diff --git a/lib/libcxx/include/__memory/shared_count.h b/lib/libcxx/include/__memory/shared_count.h
index dad20bcabd..b40d8c9cf7 100644
--- a/lib/libcxx/include/__memory/shared_count.h
+++ b/lib/libcxx/include/__memory/shared_count.h
@@ -22,37 +22,10 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 // NOTE: Relaxed and acq/rel atomics (for increment and decrement respectively)
 // should be sufficient for thread safety.
 // See https://llvm.org/PR22803
-#if (defined(__clang__) && __has_builtin(__atomic_add_fetch) && defined(__ATOMIC_RELAXED) &&                           \
-     defined(__ATOMIC_ACQ_REL)) ||                                                                                     \
-    defined(_LIBCPP_COMPILER_GCC)
-#  define _LIBCPP_HAS_BUILTIN_ATOMIC_SUPPORT 1
-#else
-#  define _LIBCPP_HAS_BUILTIN_ATOMIC_SUPPORT 0
-#endif
-
-template <class _ValueType>
-inline _LIBCPP_HIDE_FROM_ABI _ValueType __libcpp_relaxed_load(_ValueType const* __value) {
-#if _LIBCPP_HAS_THREADS && defined(__ATOMIC_RELAXED) &&                                                                \
-    (__has_builtin(__atomic_load_n) || defined(_LIBCPP_COMPILER_GCC))
-  return __atomic_load_n(__value, __ATOMIC_RELAXED);
-#else
-  return *__value;
-#endif
-}
-
-template <class _ValueType>
-inline _LIBCPP_HIDE_FROM_ABI _ValueType __libcpp_acquire_load(_ValueType const* __value) {
-#if _LIBCPP_HAS_THREADS && defined(__ATOMIC_ACQUIRE) &&                                                                \
-    (__has_builtin(__atomic_load_n) || defined(_LIBCPP_COMPILER_GCC))
-  return __atomic_load_n(__value, __ATOMIC_ACQUIRE);
-#else
-  return *__value;
-#endif
-}
 
 template <class _Tp>
 inline _LIBCPP_HIDE_FROM_ABI _Tp __libcpp_atomic_refcount_increment(_Tp& __t) _NOEXCEPT {
-#if _LIBCPP_HAS_BUILTIN_ATOMIC_SUPPORT && _LIBCPP_HAS_THREADS
+#if _LIBCPP_HAS_THREADS
   return __atomic_add_fetch(std::addressof(__t), 1, __ATOMIC_RELAXED);
 #else
   return __t += 1;
@@ -61,7 +34,7 @@ inline _LIBCPP_HIDE_FROM_ABI _Tp __libcpp_atomic_refcount_increment(_Tp& __t) _N
 
 template <class _Tp>
 inline _LIBCPP_HIDE_FROM_ABI _Tp __libcpp_atomic_refcount_decrement(_Tp& __t) _NOEXCEPT {
-#if _LIBCPP_HAS_BUILTIN_ATOMIC_SUPPORT && _LIBCPP_HAS_THREADS
+#if _LIBCPP_HAS_THREADS
   return __atomic_add_fetch(std::addressof(__t), -1, __ATOMIC_ACQ_REL);
 #else
   return __t -= 1;
@@ -95,7 +68,13 @@ public:
     return false;
   }
 #endif
-  _LIBCPP_HIDE_FROM_ABI long use_count() const _NOEXCEPT { return __libcpp_relaxed_load(&__shared_owners_) + 1; }
+  _LIBCPP_HIDE_FROM_ABI long use_count() const _NOEXCEPT {
+#if _LIBCPP_HAS_THREADS
+    return __atomic_load_n(&__shared_owners_, __ATOMIC_RELAXED) + 1;
+#else
+    return __shared_owners_ + 1;
+#endif
+  }
 };
 
 class _LIBCPP_EXPORTED_FROM_ABI __shared_weak_count : private __shared_count {
diff --git a/lib/libcxx/include/__memory/shared_ptr.h b/lib/libcxx/include/__memory/shared_ptr.h
index 0cbd995105..4fbd0af984 100644
--- a/lib/libcxx/include/__memory/shared_ptr.h
+++ b/lib/libcxx/include/__memory/shared_ptr.h
@@ -41,19 +41,18 @@
 #include <__type_traits/enable_if.h>
 #include <__type_traits/integral_constant.h>
 #include <__type_traits/is_array.h>
-#include <__type_traits/is_bounded_array.h>
 #include <__type_traits/is_constructible.h>
 #include <__type_traits/is_convertible.h>
 #include <__type_traits/is_function.h>
 #include <__type_traits/is_reference.h>
 #include <__type_traits/is_same.h>
-#include <__type_traits/is_unbounded_array.h>
 #include <__type_traits/nat.h>
 #include <__type_traits/negation.h>
 #include <__type_traits/remove_cv.h>
 #include <__type_traits/remove_extent.h>
 #include <__type_traits/remove_reference.h>
 #include <__utility/declval.h>
+#include <__utility/exception_guard.h>
 #include <__utility/forward.h>
 #include <__utility/move.h>
 #include <__utility/swap.h>
@@ -78,7 +77,7 @@ public:
   _LIBCPP_HIDE_FROM_ABI bad_weak_ptr(const bad_weak_ptr&) _NOEXCEPT            = default;
   _LIBCPP_HIDE_FROM_ABI bad_weak_ptr& operator=(const bad_weak_ptr&) _NOEXCEPT = default;
   ~bad_weak_ptr() _NOEXCEPT override;
-  const char* what() const _NOEXCEPT override;
+  [[__nodiscard__]] const char* what() const _NOEXCEPT override;
 };
 
 [[__noreturn__]] inline _LIBCPP_HIDE_FROM_ABI void __throw_bad_weak_ptr() {
@@ -316,10 +315,8 @@ public:
 #endif
 
   // A shared_ptr contains only two raw pointers which point to the heap and move constructing already doesn't require
-  // any bookkeeping, so it's always trivially relocatable. It is also replaceable because assignment just rebinds the
-  // shared_ptr to manage a different object.
+  // any bookkeeping, so it's always trivially relocatable.
   using __trivially_relocatable _LIBCPP_NODEBUG = shared_ptr;
-  using __replaceable _LIBCPP_NODEBUG           = shared_ptr;
 
 private:
   element_type* __ptr_;
@@ -352,23 +349,16 @@ public:
 
   template <class _Yp, class _Dp, __enable_if_t<__shared_ptr_deleter_ctor_reqs<_Dp, _Yp, _Tp>::value, int> = 0>
   _LIBCPP_HIDE_FROM_ABI shared_ptr(_Yp* __p, _Dp __d) : __ptr_(__p) {
-#if _LIBCPP_HAS_EXCEPTIONS
-    try {
-#endif // _LIBCPP_HAS_EXCEPTIONS
-      typedef typename __shared_ptr_default_allocator<_Yp>::type _AllocT;
-      typedef __shared_ptr_pointer<_Yp*, _Dp, _AllocT> _CntrlBlk;
+    auto __guard = std::__make_exception_guard([&] { __d(__p); });
+    typedef typename __shared_ptr_default_allocator<_Yp>::type _AllocT;
+    typedef __shared_ptr_pointer<_Yp*, _Dp, _AllocT> _CntrlBlk;
 #ifndef _LIBCPP_CXX03_LANG
-      __cntrl_ = new _CntrlBlk(__p, std::move(__d), _AllocT());
+    __cntrl_ = new _CntrlBlk(__p, std::move(__d), _AllocT());
 #else
     __cntrl_ = new _CntrlBlk(__p, __d, _AllocT());
 #endif // not _LIBCPP_CXX03_LANG
-      __enable_weak_this(__p, __p);
-#if _LIBCPP_HAS_EXCEPTIONS
-    } catch (...) {
-      __d(__p);
-      throw;
-    }
-#endif // _LIBCPP_HAS_EXCEPTIONS
+    __enable_weak_this(__p, __p);
+    __guard.__complete();
   }
 
   template <class _Yp,
@@ -376,28 +366,21 @@ public:
             class _Alloc,
             __enable_if_t<__shared_ptr_deleter_ctor_reqs<_Dp, _Yp, _Tp>::value, int> = 0>
   _LIBCPP_HIDE_FROM_ABI shared_ptr(_Yp* __p, _Dp __d, _Alloc __a) : __ptr_(__p) {
-#if _LIBCPP_HAS_EXCEPTIONS
-    try {
-#endif // _LIBCPP_HAS_EXCEPTIONS
-      typedef __shared_ptr_pointer<_Yp*, _Dp, _Alloc> _CntrlBlk;
-      typedef typename __allocator_traits_rebind<_Alloc, _CntrlBlk>::type _A2;
-      typedef __allocator_destructor<_A2> _D2;
-      _A2 __a2(__a);
-      unique_ptr<_CntrlBlk, _D2> __hold2(__a2.allocate(1), _D2(__a2, 1));
-      ::new ((void*)std::addressof(*__hold2.get()))
+    auto __guard = std::__make_exception_guard([&] { __d(__p); });
+    typedef __shared_ptr_pointer<_Yp*, _Dp, _Alloc> _CntrlBlk;
+    typedef typename __allocator_traits_rebind<_Alloc, _CntrlBlk>::type _A2;
+    typedef __allocator_destructor<_A2> _D2;
+    _A2 __a2(__a);
+    unique_ptr<_CntrlBlk, _D2> __hold2(__a2.allocate(1), _D2(__a2, 1));
+    ::new ((void*)std::addressof(*__hold2.get()))
 #ifndef _LIBCPP_CXX03_LANG
-          _CntrlBlk(__p, std::move(__d), __a);
+        _CntrlBlk(__p, std::move(__d), __a);
 #else
         _CntrlBlk(__p, __d, __a);
 #endif // not _LIBCPP_CXX03_LANG
-      __cntrl_ = std::addressof(*__hold2.release());
-      __enable_weak_this(__p, __p);
-#if _LIBCPP_HAS_EXCEPTIONS
-    } catch (...) {
-      __d(__p);
-      throw;
-    }
-#endif // _LIBCPP_HAS_EXCEPTIONS
+    __cntrl_ = std::addressof(*__hold2.release());
+    __enable_weak_this(__p, __p);
+    __guard.__complete();
   }
 
   template <class _Dp>
@@ -406,22 +389,15 @@ public:
       _Dp __d,
       __enable_if_t<__shared_ptr_nullptr_deleter_ctor_reqs<_Dp>::value, __nullptr_sfinae_tag> = __nullptr_sfinae_tag())
       : __ptr_(nullptr) {
-#if _LIBCPP_HAS_EXCEPTIONS
-    try {
-#endif // _LIBCPP_HAS_EXCEPTIONS
-      typedef typename __shared_ptr_default_allocator<_Tp>::type _AllocT;
-      typedef __shared_ptr_pointer<nullptr_t, _Dp, _AllocT> _CntrlBlk;
+    auto __guard = std::__make_exception_guard([&] { __d(__p); });
+    typedef typename __shared_ptr_default_allocator<_Tp>::type _AllocT;
+    typedef __shared_ptr_pointer<nullptr_t, _Dp, _AllocT> _CntrlBlk;
 #ifndef _LIBCPP_CXX03_LANG
-      __cntrl_ = new _CntrlBlk(__p, std::move(__d), _AllocT());
+    __cntrl_ = new _CntrlBlk(__p, std::move(__d), _AllocT());
 #else
     __cntrl_ = new _CntrlBlk(__p, __d, _AllocT());
 #endif // not _LIBCPP_CXX03_LANG
-#if _LIBCPP_HAS_EXCEPTIONS
-    } catch (...) {
-      __d(__p);
-      throw;
-    }
-#endif // _LIBCPP_HAS_EXCEPTIONS
+    __guard.__complete();
   }
 
   template <class _Dp, class _Alloc>
@@ -431,27 +407,20 @@ public:
       _Alloc __a,
       __enable_if_t<__shared_ptr_nullptr_deleter_ctor_reqs<_Dp>::value, __nullptr_sfinae_tag> = __nullptr_sfinae_tag())
       : __ptr_(nullptr) {
-#if _LIBCPP_HAS_EXCEPTIONS
-    try {
-#endif // _LIBCPP_HAS_EXCEPTIONS
-      typedef __shared_ptr_pointer<nullptr_t, _Dp, _Alloc> _CntrlBlk;
-      typedef typename __allocator_traits_rebind<_Alloc, _CntrlBlk>::type _A2;
-      typedef __allocator_destructor<_A2> _D2;
-      _A2 __a2(__a);
-      unique_ptr<_CntrlBlk, _D2> __hold2(__a2.allocate(1), _D2(__a2, 1));
-      ::new ((void*)std::addressof(*__hold2.get()))
+    auto __guard = std::__make_exception_guard([&] { __d(__p); });
+    typedef __shared_ptr_pointer<nullptr_t, _Dp, _Alloc> _CntrlBlk;
+    typedef typename __allocator_traits_rebind<_Alloc, _CntrlBlk>::type _A2;
+    typedef __allocator_destructor<_A2> _D2;
+    _A2 __a2(__a);
+    unique_ptr<_CntrlBlk, _D2> __hold2(__a2.allocate(1), _D2(__a2, 1));
+    ::new ((void*)std::addressof(*__hold2.get()))
 #ifndef _LIBCPP_CXX03_LANG
-          _CntrlBlk(__p, std::move(__d), __a);
+        _CntrlBlk(__p, std::move(__d), __a);
 #else
         _CntrlBlk(__p, __d, __a);
 #endif // not _LIBCPP_CXX03_LANG
-      __cntrl_ = std::addressof(*__hold2.release());
-#if _LIBCPP_HAS_EXCEPTIONS
-    } catch (...) {
-      __d(__p);
-      throw;
-    }
-#endif // _LIBCPP_HAS_EXCEPTIONS
+    __cntrl_ = std::addressof(*__hold2.release());
+    __guard.__complete();
   }
 
   template <class _Yp>
@@ -514,45 +483,16 @@ public:
 
   template <class _Yp,
             class _Dp,
-            __enable_if_t<!is_lvalue_reference<_Dp>::value && __compatible_with<_Yp, _Tp>::value &&
+            __enable_if_t<__compatible_with<_Yp, _Tp>::value &&
                               is_convertible<typename unique_ptr<_Yp, _Dp>::pointer, element_type*>::value,
                           int> = 0>
   _LIBCPP_HIDE_FROM_ABI shared_ptr(unique_ptr<_Yp, _Dp>&& __r) : __ptr_(__r.get()) {
-#if _LIBCPP_STD_VER >= 14
-    if (__ptr_ == nullptr)
-      __cntrl_ = nullptr;
-    else
-#endif
-    {
-      typedef typename __shared_ptr_default_allocator<_Yp>::type _AllocT;
-      typedef __shared_ptr_pointer<typename unique_ptr<_Yp, _Dp>::pointer, _Dp, _AllocT> _CntrlBlk;
-      __cntrl_ = new _CntrlBlk(__r.get(), std::move(__r.get_deleter()), _AllocT());
-      __enable_weak_this(__r.get(), __r.get());
-    }
-    __r.release();
-  }
+    using _AllocT   = typename __shared_ptr_default_allocator<_Yp>::type;
+    using _Deleter  = _If<is_lvalue_reference<_Dp>::value, reference_wrapper<__libcpp_remove_reference_t<_Dp> >, _Dp>;
+    using _CntrlBlk = __shared_ptr_pointer<typename unique_ptr<_Yp, _Dp>::pointer, _Deleter, _AllocT>;
 
-  template <class _Yp,
-            class _Dp,
-            class              = void,
-            __enable_if_t<is_lvalue_reference<_Dp>::value && __compatible_with<_Yp, _Tp>::value &&
-                              is_convertible<typename unique_ptr<_Yp, _Dp>::pointer, element_type*>::value,
-                          int> = 0>
-  _LIBCPP_HIDE_FROM_ABI shared_ptr(unique_ptr<_Yp, _Dp>&& __r) : __ptr_(__r.get()) {
-#if _LIBCPP_STD_VER >= 14
-    if (__ptr_ == nullptr)
-      __cntrl_ = nullptr;
-    else
-#endif
-    {
-      typedef typename __shared_ptr_default_allocator<_Yp>::type _AllocT;
-      typedef __shared_ptr_pointer<typename unique_ptr<_Yp, _Dp>::pointer,
-                                   reference_wrapper<__libcpp_remove_reference_t<_Dp> >,
-                                   _AllocT>
-          _CntrlBlk;
-      __cntrl_ = new _CntrlBlk(__r.get(), std::ref(__r.get_deleter()), _AllocT());
-      __enable_weak_this(__r.get(), __r.get());
-    }
+    __cntrl_ = __ptr_ ? new _CntrlBlk(__r.get(), std::forward<_Dp>(__r.get_deleter()), _AllocT()) : nullptr;
+    __enable_weak_this(__r.get(), __r.get());
     __r.release();
   }
 
@@ -628,37 +568,43 @@ public:
     shared_ptr(__p, __d, __a).swap(*this);
   }
 
-  _LIBCPP_HIDE_FROM_ABI element_type* get() const _NOEXCEPT { return __ptr_; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI element_type* get() const _NOEXCEPT { return __ptr_; }
 
-  _LIBCPP_HIDE_FROM_ABI __add_lvalue_reference_t<element_type> operator*() const _NOEXCEPT { return *__ptr_; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI __add_lvalue_reference_t<element_type> operator*() const _NOEXCEPT {
+    return *__ptr_;
+  }
 
   _LIBCPP_HIDE_FROM_ABI element_type* operator->() const _NOEXCEPT {
     static_assert(!is_array<_Tp>::value, "std::shared_ptr<T>::operator-> is only valid when T is not an array type.");
     return __ptr_;
   }
 
-  _LIBCPP_HIDE_FROM_ABI long use_count() const _NOEXCEPT { return __cntrl_ ? __cntrl_->use_count() : 0; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI long use_count() const _NOEXCEPT {
+    return __cntrl_ ? __cntrl_->use_count() : 0;
+  }
 
 #if _LIBCPP_STD_VER < 20 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_SHARED_PTR_UNIQUE)
-  _LIBCPP_DEPRECATED_IN_CXX17 _LIBCPP_HIDE_FROM_ABI bool unique() const _NOEXCEPT { return use_count() == 1; }
+  [[__nodiscard__]] _LIBCPP_DEPRECATED_IN_CXX17 _LIBCPP_HIDE_FROM_ABI bool unique() const _NOEXCEPT {
+    return use_count() == 1;
+  }
 #endif
 
   _LIBCPP_HIDE_FROM_ABI explicit operator bool() const _NOEXCEPT { return get() != nullptr; }
 
   template <class _Up>
-  _LIBCPP_HIDE_FROM_ABI bool owner_before(shared_ptr<_Up> const& __p) const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool owner_before(shared_ptr<_Up> const& __p) const _NOEXCEPT {
     return __cntrl_ < __p.__cntrl_;
   }
 
   template <class _Up>
-  _LIBCPP_HIDE_FROM_ABI bool owner_before(weak_ptr<_Up> const& __p) const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool owner_before(weak_ptr<_Up> const& __p) const _NOEXCEPT {
     return __cntrl_ < __p.__cntrl_;
   }
 
   _LIBCPP_HIDE_FROM_ABI bool __owner_equivalent(const shared_ptr& __p) const { return __cntrl_ == __p.__cntrl_; }
 
 #if _LIBCPP_STD_VER >= 17
-  _LIBCPP_HIDE_FROM_ABI __add_lvalue_reference_t<element_type> operator[](ptrdiff_t __i) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI __add_lvalue_reference_t<element_type> operator[](ptrdiff_t __i) const {
     static_assert(is_array<_Tp>::value, "std::shared_ptr<T>::operator[] is only valid when T is an array type.");
     return __ptr_[__i];
   }
@@ -729,7 +675,7 @@ shared_ptr(unique_ptr<_Tp, _Dp>) -> shared_ptr<_Tp>;
 // std::allocate_shared and std::make_shared
 //
 template <class _Tp, class _Alloc, class... _Args, __enable_if_t<!is_array<_Tp>::value, int> = 0>
-_LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> allocate_shared(const _Alloc& __a, _Args&&... __args) {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> allocate_shared(const _Alloc& __a, _Args&&... __args) {
   using _ControlBlock          = __shared_ptr_emplace<_Tp, _Alloc>;
   using _ControlBlockAllocator = typename __allocator_traits_rebind<_Alloc, _ControlBlock>::type;
   __allocation_guard<_ControlBlockAllocator> __guard(__a, 1);
@@ -740,21 +686,21 @@ _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> allocate_shared(const _Alloc& __a, _Args&&
 }
 
 template <class _Tp, class... _Args, __enable_if_t<!is_array<_Tp>::value, int> = 0>
-_LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> make_shared(_Args&&... __args) {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> make_shared(_Args&&... __args) {
   return std::allocate_shared<_Tp>(allocator<__remove_cv_t<_Tp> >(), std::forward<_Args>(__args)...);
 }
 
 #if _LIBCPP_STD_VER >= 20
 
 template <class _Tp, class _Alloc, __enable_if_t<!is_array<_Tp>::value, int> = 0>
-_LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> allocate_shared_for_overwrite(const _Alloc& __a) {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> allocate_shared_for_overwrite(const _Alloc& __a) {
   using _ForOverwriteAllocator = __allocator_traits_rebind_t<_Alloc, __for_overwrite_tag>;
   _ForOverwriteAllocator __alloc(__a);
   return std::allocate_shared<_Tp>(__alloc);
 }
 
 template <class _Tp, __enable_if_t<!is_array<_Tp>::value, int> = 0>
-_LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> make_shared_for_overwrite() {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> make_shared_for_overwrite() {
   return std::allocate_shared_for_overwrite<_Tp>(allocator<__remove_cv_t<_Tp>>());
 }
 
@@ -946,67 +892,69 @@ _LIBCPP_HIDE_FROM_ABI shared_ptr<_Array> __allocate_shared_bounded_array(const _
 
 // bounded array variants
 template <class _Tp, class _Alloc, __enable_if_t<is_bounded_array<_Tp>::value, int> = 0>
-_LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> allocate_shared(const _Alloc& __a) {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> allocate_shared(const _Alloc& __a) {
   return std::__allocate_shared_bounded_array<_Tp>(__a);
 }
 
 template <class _Tp, class _Alloc, __enable_if_t<is_bounded_array<_Tp>::value, int> = 0>
-_LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> allocate_shared(const _Alloc& __a, const remove_extent_t<_Tp>& __u) {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp>
+allocate_shared(const _Alloc& __a, const remove_extent_t<_Tp>& __u) {
   return std::__allocate_shared_bounded_array<_Tp>(__a, __u);
 }
 
 template <class _Tp, class _Alloc, __enable_if_t<is_bounded_array<_Tp>::value, int> = 0>
-_LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> allocate_shared_for_overwrite(const _Alloc& __a) {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> allocate_shared_for_overwrite(const _Alloc& __a) {
   using _ForOverwriteAllocator = __allocator_traits_rebind_t<_Alloc, __for_overwrite_tag>;
   _ForOverwriteAllocator __alloc(__a);
   return std::__allocate_shared_bounded_array<_Tp>(__alloc);
 }
 
 template <class _Tp, __enable_if_t<is_bounded_array<_Tp>::value, int> = 0>
-_LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> make_shared() {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> make_shared() {
   return std::__allocate_shared_bounded_array<_Tp>(allocator<_Tp>());
 }
 
 template <class _Tp, __enable_if_t<is_bounded_array<_Tp>::value, int> = 0>
-_LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> make_shared(const remove_extent_t<_Tp>& __u) {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> make_shared(const remove_extent_t<_Tp>& __u) {
   return std::__allocate_shared_bounded_array<_Tp>(allocator<_Tp>(), __u);
 }
 
 template <class _Tp, __enable_if_t<is_bounded_array<_Tp>::value, int> = 0>
-_LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> make_shared_for_overwrite() {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> make_shared_for_overwrite() {
   return std::__allocate_shared_bounded_array<_Tp>(allocator<__for_overwrite_tag>());
 }
 
 // unbounded array variants
 template <class _Tp, class _Alloc, __enable_if_t<is_unbounded_array<_Tp>::value, int> = 0>
-_LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> allocate_shared(const _Alloc& __a, size_t __n) {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> allocate_shared(const _Alloc& __a, size_t __n) {
   return std::__allocate_shared_unbounded_array<_Tp>(__a, __n);
 }
 
 template <class _Tp, class _Alloc, __enable_if_t<is_unbounded_array<_Tp>::value, int> = 0>
-_LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> allocate_shared(const _Alloc& __a, size_t __n, const remove_extent_t<_Tp>& __u) {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp>
+allocate_shared(const _Alloc& __a, size_t __n, const remove_extent_t<_Tp>& __u) {
   return std::__allocate_shared_unbounded_array<_Tp>(__a, __n, __u);
 }
 
 template <class _Tp, class _Alloc, __enable_if_t<is_unbounded_array<_Tp>::value, int> = 0>
-_LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> allocate_shared_for_overwrite(const _Alloc& __a, size_t __n) {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> allocate_shared_for_overwrite(const _Alloc& __a, size_t __n) {
   using _ForOverwriteAllocator = __allocator_traits_rebind_t<_Alloc, __for_overwrite_tag>;
   _ForOverwriteAllocator __alloc(__a);
   return std::__allocate_shared_unbounded_array<_Tp>(__alloc, __n);
 }
 
 template <class _Tp, __enable_if_t<is_unbounded_array<_Tp>::value, int> = 0>
-_LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> make_shared(size_t __n) {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> make_shared(size_t __n) {
   return std::__allocate_shared_unbounded_array<_Tp>(allocator<_Tp>(), __n);
 }
 
 template <class _Tp, __enable_if_t<is_unbounded_array<_Tp>::value, int> = 0>
-_LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> make_shared(size_t __n, const remove_extent_t<_Tp>& __u) {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> make_shared(size_t __n, const remove_extent_t<_Tp>& __u) {
   return std::__allocate_shared_unbounded_array<_Tp>(allocator<_Tp>(), __n, __u);
 }
 
 template <class _Tp, __enable_if_t<is_unbounded_array<_Tp>::value, int> = 0>
-_LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> make_shared_for_overwrite(size_t __n) {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> make_shared_for_overwrite(size_t __n) {
   return std::__allocate_shared_unbounded_array<_Tp>(allocator<__for_overwrite_tag>(), __n);
 }
 
@@ -1135,7 +1083,8 @@ inline _LIBCPP_HIDE_FROM_ABI void swap(shared_ptr<_Tp>& __x, shared_ptr<_Tp>& __
 }
 
 template <class _Tp, class _Up>
-inline _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> static_pointer_cast(const shared_ptr<_Up>& __r) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp>
+static_pointer_cast(const shared_ptr<_Up>& __r) _NOEXCEPT {
   return shared_ptr<_Tp>(__r, static_cast< typename shared_ptr<_Tp>::element_type*>(__r.get()));
 }
 
@@ -1143,13 +1092,14 @@ inline _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> static_pointer_cast(const shared_pt
 // We don't backport because it is an evolutionary change.
 #if _LIBCPP_STD_VER >= 20
 template <class _Tp, class _Up>
-_LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> static_pointer_cast(shared_ptr<_Up>&& __r) noexcept {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> static_pointer_cast(shared_ptr<_Up>&& __r) noexcept {
   return shared_ptr<_Tp>(std::move(__r), static_cast<typename shared_ptr<_Tp>::element_type*>(__r.get()));
 }
 #endif
 
 template <class _Tp, class _Up>
-inline _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> dynamic_pointer_cast(const shared_ptr<_Up>& __r) _NOEXCEPT {
+[[__nodiscard__]] inline
+    _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> dynamic_pointer_cast(const shared_ptr<_Up>& __r) _NOEXCEPT {
   typedef typename shared_ptr<_Tp>::element_type _ET;
   _ET* __p = dynamic_cast<_ET*>(__r.get());
   return __p ? shared_ptr<_Tp>(__r, __p) : shared_ptr<_Tp>();
@@ -1159,14 +1109,14 @@ inline _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> dynamic_pointer_cast(const shared_p
 // We don't backport because it is an evolutionary change.
 #if _LIBCPP_STD_VER >= 20
 template <class _Tp, class _Up>
-_LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> dynamic_pointer_cast(shared_ptr<_Up>&& __r) noexcept {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> dynamic_pointer_cast(shared_ptr<_Up>&& __r) noexcept {
   auto* __p = dynamic_cast<typename shared_ptr<_Tp>::element_type*>(__r.get());
   return __p ? shared_ptr<_Tp>(std::move(__r), __p) : shared_ptr<_Tp>();
 }
 #endif
 
 template <class _Tp, class _Up>
-_LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> const_pointer_cast(const shared_ptr<_Up>& __r) _NOEXCEPT {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> const_pointer_cast(const shared_ptr<_Up>& __r) _NOEXCEPT {
   typedef typename shared_ptr<_Tp>::element_type _RTp;
   return shared_ptr<_Tp>(__r, const_cast<_RTp*>(__r.get()));
 }
@@ -1175,13 +1125,13 @@ _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> const_pointer_cast(const shared_ptr<_Up>&
 // We don't backport because it is an evolutionary change.
 #if _LIBCPP_STD_VER >= 20
 template <class _Tp, class _Up>
-_LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> const_pointer_cast(shared_ptr<_Up>&& __r) noexcept {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> const_pointer_cast(shared_ptr<_Up>&& __r) noexcept {
   return shared_ptr<_Tp>(std::move(__r), const_cast<typename shared_ptr<_Tp>::element_type*>(__r.get()));
 }
 #endif
 
 template <class _Tp, class _Up>
-_LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> reinterpret_pointer_cast(const shared_ptr<_Up>& __r) _NOEXCEPT {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> reinterpret_pointer_cast(const shared_ptr<_Up>& __r) _NOEXCEPT {
   return shared_ptr<_Tp>(__r, reinterpret_cast< typename shared_ptr<_Tp>::element_type*>(__r.get()));
 }
 
@@ -1189,7 +1139,7 @@ _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> reinterpret_pointer_cast(const shared_ptr<
 // We don't backport because it is an evolutionary change.
 #if _LIBCPP_STD_VER >= 20
 template <class _Tp, class _Up>
-_LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> reinterpret_pointer_cast(shared_ptr<_Up>&& __r) noexcept {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> reinterpret_pointer_cast(shared_ptr<_Up>&& __r) noexcept {
   return shared_ptr<_Tp>(std::move(__r), reinterpret_cast<typename shared_ptr<_Tp>::element_type*>(__r.get()));
 }
 #endif
@@ -1197,7 +1147,7 @@ _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> reinterpret_pointer_cast(shared_ptr<_Up>&&
 #if _LIBCPP_HAS_RTTI
 
 template <class _Dp, class _Tp>
-inline _LIBCPP_HIDE_FROM_ABI _Dp* get_deleter(const shared_ptr<_Tp>& __p) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _Dp* get_deleter(const shared_ptr<_Tp>& __p) _NOEXCEPT {
   return __p.template __get_deleter<_Dp>();
 }
 
@@ -1213,9 +1163,8 @@ public:
 #endif
 
   // A weak_ptr contains only two raw pointers which point to the heap and move constructing already doesn't require
-  // any bookkeeping, so it's always trivially relocatable. It's also replaceable for the same reason.
+  // any bookkeeping, so it's always trivially relocatable.
   using __trivially_relocatable _LIBCPP_NODEBUG = weak_ptr;
-  using __replaceable _LIBCPP_NODEBUG           = weak_ptr;
 
 private:
   element_type* __ptr_;
@@ -1253,15 +1202,19 @@ public:
   _LIBCPP_HIDE_FROM_ABI void swap(weak_ptr& __r) _NOEXCEPT;
   _LIBCPP_HIDE_FROM_ABI void reset() _NOEXCEPT;
 
-  _LIBCPP_HIDE_FROM_ABI long use_count() const _NOEXCEPT { return __cntrl_ ? __cntrl_->use_count() : 0; }
-  _LIBCPP_HIDE_FROM_ABI bool expired() const _NOEXCEPT { return __cntrl_ == nullptr || __cntrl_->use_count() == 0; }
-  _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> lock() const _NOEXCEPT;
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI long use_count() const _NOEXCEPT {
+    return __cntrl_ ? __cntrl_->use_count() : 0;
+  }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool expired() const _NOEXCEPT {
+    return __cntrl_ == nullptr || __cntrl_->use_count() == 0;
+  }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> lock() const _NOEXCEPT;
   template <class _Up>
-  _LIBCPP_HIDE_FROM_ABI bool owner_before(const shared_ptr<_Up>& __r) const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool owner_before(const shared_ptr<_Up>& __r) const _NOEXCEPT {
     return __cntrl_ < __r.__cntrl_;
   }
   template <class _Up>
-  _LIBCPP_HIDE_FROM_ABI bool owner_before(const weak_ptr<_Up>& __r) const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool owner_before(const weak_ptr<_Up>& __r) const _NOEXCEPT {
     return __cntrl_ < __r.__cntrl_;
   }
 
@@ -1445,13 +1398,15 @@ protected:
   _LIBCPP_HIDE_FROM_ABI ~enable_shared_from_this() {}
 
 public:
-  _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> shared_from_this() { return shared_ptr<_Tp>(__weak_this_); }
-  _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp const> shared_from_this() const { return shared_ptr<const _Tp>(__weak_this_); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> shared_from_this() { return shared_ptr<_Tp>(__weak_this_); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp const> shared_from_this() const {
+    return shared_ptr<const _Tp>(__weak_this_);
+  }
 
 #if _LIBCPP_STD_VER >= 17
-  _LIBCPP_HIDE_FROM_ABI weak_ptr<_Tp> weak_from_this() _NOEXCEPT { return __weak_this_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI weak_ptr<_Tp> weak_from_this() _NOEXCEPT { return __weak_this_; }
 
-  _LIBCPP_HIDE_FROM_ABI weak_ptr<const _Tp> weak_from_this() const _NOEXCEPT { return __weak_this_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI weak_ptr<const _Tp> weak_from_this() const _NOEXCEPT { return __weak_this_; }
 #endif // _LIBCPP_STD_VER >= 17
 
   template <class _Up>
@@ -1468,7 +1423,7 @@ struct hash<shared_ptr<_Tp> > {
   _LIBCPP_DEPRECATED_IN_CXX17 typedef size_t result_type;
 #endif
 
-  _LIBCPP_HIDE_FROM_ABI size_t operator()(const shared_ptr<_Tp>& __ptr) const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t operator()(const shared_ptr<_Tp>& __ptr) const _NOEXCEPT {
     return hash<typename shared_ptr<_Tp>::element_type*>()(__ptr.get());
   }
 };
diff --git a/lib/libcxx/include/__memory/temp_value.h b/lib/libcxx/include/__memory/temp_value.h
index 4a133b3fbc..5285bcab9a 100644
--- a/lib/libcxx/include/__memory/temp_value.h
+++ b/lib/libcxx/include/__memory/temp_value.h
@@ -12,7 +12,6 @@
 #include <__config>
 #include <__memory/addressof.h>
 #include <__memory/allocator_traits.h>
-#include <__type_traits/aligned_storage.h>
 #include <__utility/forward.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -26,7 +25,7 @@ struct __temp_value {
   typedef allocator_traits<_Alloc> _Traits;
 
 #ifdef _LIBCPP_CXX03_LANG
-  typename aligned_storage<sizeof(_Tp), _LIBCPP_ALIGNOF(_Tp)>::type __v;
+  _ALIGNAS_TYPE(_Tp) char __v[sizeof(_Tp)];
 #else
   union {
     _Tp __v;
diff --git a/lib/libcxx/include/__memory/uninitialized_algorithms.h b/lib/libcxx/include/__memory/uninitialized_algorithms.h
index e802366400..9182db4b41 100644
--- a/lib/libcxx/include/__memory/uninitialized_algorithms.h
+++ b/lib/libcxx/include/__memory/uninitialized_algorithms.h
@@ -32,7 +32,6 @@
 #include <__type_traits/is_trivially_assignable.h>
 #include <__type_traits/is_trivially_constructible.h>
 #include <__type_traits/is_trivially_relocatable.h>
-#include <__type_traits/is_unbounded_array.h>
 #include <__type_traits/remove_const.h>
 #include <__type_traits/remove_extent.h>
 #include <__utility/exception_guard.h>
@@ -61,17 +60,10 @@ template <class _ValueType, class _InputIterator, class _Sentinel1, class _Forwa
 inline _LIBCPP_HIDE_FROM_ABI pair<_InputIterator, _ForwardIterator> __uninitialized_copy(
     _InputIterator __ifirst, _Sentinel1 __ilast, _ForwardIterator __ofirst, _EndPredicate __stop_copying) {
   _ForwardIterator __idx = __ofirst;
-#if _LIBCPP_HAS_EXCEPTIONS
-  try {
-#endif
-    for (; __ifirst != __ilast && !__stop_copying(__idx); ++__ifirst, (void)++__idx)
-      ::new (static_cast<void*>(std::addressof(*__idx))) _ValueType(*__ifirst);
-#if _LIBCPP_HAS_EXCEPTIONS
-  } catch (...) {
-    std::__destroy(__ofirst, __idx);
-    throw;
-  }
-#endif
+  auto __guard           = std::__make_exception_guard([&] { std::__destroy(__ofirst, __idx); });
+  for (; __ifirst != __ilast && !__stop_copying(__idx); ++__ifirst, (void)++__idx)
+    ::new (static_cast<void*>(std::addressof(*__idx))) _ValueType(*__ifirst);
+  __guard.__complete();
 
   return pair<_InputIterator, _ForwardIterator>(std::move(__ifirst), std::move(__idx));
 }
@@ -91,17 +83,10 @@ template <class _ValueType, class _InputIterator, class _Size, class _ForwardIte
 inline _LIBCPP_HIDE_FROM_ABI pair<_InputIterator, _ForwardIterator>
 __uninitialized_copy_n(_InputIterator __ifirst, _Size __n, _ForwardIterator __ofirst, _EndPredicate __stop_copying) {
   _ForwardIterator __idx = __ofirst;
-#if _LIBCPP_HAS_EXCEPTIONS
-  try {
-#endif
-    for (; __n > 0 && !__stop_copying(__idx); ++__ifirst, (void)++__idx, (void)--__n)
-      ::new (static_cast<void*>(std::addressof(*__idx))) _ValueType(*__ifirst);
-#if _LIBCPP_HAS_EXCEPTIONS
-  } catch (...) {
-    std::__destroy(__ofirst, __idx);
-    throw;
-  }
-#endif
+  auto __guard           = std::__make_exception_guard([&] { std::__destroy(__ofirst, __idx); });
+  for (; __n > 0 && !__stop_copying(__idx); ++__ifirst, (void)++__idx, (void)--__n)
+    ::new (static_cast<void*>(std::addressof(*__idx))) _ValueType(*__ifirst);
+  __guard.__complete();
 
   return pair<_InputIterator, _ForwardIterator>(std::move(__ifirst), std::move(__idx));
 }
@@ -121,17 +106,10 @@ template <class _ValueType, class _ForwardIterator, class _Sentinel, class _Tp>
 inline _LIBCPP_HIDE_FROM_ABI _ForwardIterator
 __uninitialized_fill(_ForwardIterator __first, _Sentinel __last, const _Tp& __x) {
   _ForwardIterator __idx = __first;
-#if _LIBCPP_HAS_EXCEPTIONS
-  try {
-#endif
-    for (; __idx != __last; ++__idx)
-      ::new (static_cast<void*>(std::addressof(*__idx))) _ValueType(__x);
-#if _LIBCPP_HAS_EXCEPTIONS
-  } catch (...) {
-    std::__destroy(__first, __idx);
-    throw;
-  }
-#endif
+  auto __guard           = std::__make_exception_guard([&] { std::__destroy(__first, __idx); });
+  for (; __idx != __last; ++__idx)
+    ::new (static_cast<void*>(std::addressof(*__idx))) _ValueType(__x);
+  __guard.__complete();
 
   return __idx;
 }
@@ -149,17 +127,10 @@ template <class _ValueType, class _ForwardIterator, class _Size, class _Tp>
 inline _LIBCPP_HIDE_FROM_ABI _ForwardIterator
 __uninitialized_fill_n(_ForwardIterator __first, _Size __n, const _Tp& __x) {
   _ForwardIterator __idx = __first;
-#if _LIBCPP_HAS_EXCEPTIONS
-  try {
-#endif
-    for (; __n > 0; ++__idx, (void)--__n)
-      ::new (static_cast<void*>(std::addressof(*__idx))) _ValueType(__x);
-#if _LIBCPP_HAS_EXCEPTIONS
-  } catch (...) {
-    std::__destroy(__first, __idx);
-    throw;
-  }
-#endif
+  auto __guard           = std::__make_exception_guard([&] { std::__destroy(__first, __idx); });
+  for (; __n > 0; ++__idx, (void)--__n)
+    ::new (static_cast<void*>(std::addressof(*__idx))) _ValueType(__x);
+  __guard.__complete();
 
   return __idx;
 }
@@ -178,18 +149,11 @@ uninitialized_fill_n(_ForwardIterator __first, _Size __n, const _Tp& __x) {
 template <class _ValueType, class _ForwardIterator, class _Sentinel>
 inline _LIBCPP_HIDE_FROM_ABI _ForwardIterator
 __uninitialized_default_construct(_ForwardIterator __first, _Sentinel __last) {
-  auto __idx = __first;
-#  if _LIBCPP_HAS_EXCEPTIONS
-  try {
-#  endif
-    for (; __idx != __last; ++__idx)
-      ::new (static_cast<void*>(std::addressof(*__idx))) _ValueType;
-#  if _LIBCPP_HAS_EXCEPTIONS
-  } catch (...) {
-    std::__destroy(__first, __idx);
-    throw;
-  }
-#  endif
+  auto __idx   = __first;
+  auto __guard = std::__make_exception_guard([&] { std::__destroy(__first, __idx); });
+  for (; __idx != __last; ++__idx)
+    ::new (static_cast<void*>(std::addressof(*__idx))) _ValueType;
+  __guard.__complete();
 
   return __idx;
 }
@@ -205,17 +169,10 @@ inline _LIBCPP_HIDE_FROM_ABI void uninitialized_default_construct(_ForwardIterat
 template <class _ValueType, class _ForwardIterator, class _Size>
 inline _LIBCPP_HIDE_FROM_ABI _ForwardIterator __uninitialized_default_construct_n(_ForwardIterator __first, _Size __n) {
   auto __idx = __first;
-#  if _LIBCPP_HAS_EXCEPTIONS
-  try {
-#  endif
-    for (; __n > 0; ++__idx, (void)--__n)
-      ::new (static_cast<void*>(std::addressof(*__idx))) _ValueType;
-#  if _LIBCPP_HAS_EXCEPTIONS
-  } catch (...) {
-    std::__destroy(__first, __idx);
-    throw;
-  }
-#  endif
+  auto __guard = std::__make_exception_guard([&] { std::__destroy(__first, __idx); });
+  for (; __n > 0; ++__idx, (void)--__n)
+    ::new (static_cast<void*>(std::addressof(*__idx))) _ValueType;
+  __guard.__complete();
 
   return __idx;
 }
@@ -231,18 +188,11 @@ inline _LIBCPP_HIDE_FROM_ABI _ForwardIterator uninitialized_default_construct_n(
 template <class _ValueType, class _ForwardIterator, class _Sentinel>
 inline _LIBCPP_HIDE_FROM_ABI _ForwardIterator
 __uninitialized_value_construct(_ForwardIterator __first, _Sentinel __last) {
-  auto __idx = __first;
-#  if _LIBCPP_HAS_EXCEPTIONS
-  try {
-#  endif
-    for (; __idx != __last; ++__idx)
-      ::new (static_cast<void*>(std::addressof(*__idx))) _ValueType();
-#  if _LIBCPP_HAS_EXCEPTIONS
-  } catch (...) {
-    std::__destroy(__first, __idx);
-    throw;
-  }
-#  endif
+  auto __idx   = __first;
+  auto __guard = std::__make_exception_guard([&] { std::__destroy(__first, __idx); });
+  for (; __idx != __last; ++__idx)
+    ::new (static_cast<void*>(std::addressof(*__idx))) _ValueType();
+  __guard.__complete();
 
   return __idx;
 }
@@ -258,17 +208,10 @@ inline _LIBCPP_HIDE_FROM_ABI void uninitialized_value_construct(_ForwardIterator
 template <class _ValueType, class _ForwardIterator, class _Size>
 inline _LIBCPP_HIDE_FROM_ABI _ForwardIterator __uninitialized_value_construct_n(_ForwardIterator __first, _Size __n) {
   auto __idx = __first;
-#  if _LIBCPP_HAS_EXCEPTIONS
-  try {
-#  endif
-    for (; __n > 0; ++__idx, (void)--__n)
-      ::new (static_cast<void*>(std::addressof(*__idx))) _ValueType();
-#  if _LIBCPP_HAS_EXCEPTIONS
-  } catch (...) {
-    std::__destroy(__first, __idx);
-    throw;
-  }
-#  endif
+  auto __guard = std::__make_exception_guard([&] { std::__destroy(__first, __idx); });
+  for (; __n > 0; ++__idx, (void)--__n)
+    ::new (static_cast<void*>(std::addressof(*__idx))) _ValueType();
+  __guard.__complete();
 
   return __idx;
 }
@@ -293,19 +236,12 @@ inline _LIBCPP_HIDE_FROM_ABI pair<_InputIterator, _ForwardIterator> __uninitiali
     _ForwardIterator __ofirst,
     _EndPredicate __stop_moving,
     _IterMove __iter_move) {
-  auto __idx = __ofirst;
-#  if _LIBCPP_HAS_EXCEPTIONS
-  try {
-#  endif
-    for (; __ifirst != __ilast && !__stop_moving(__idx); ++__idx, (void)++__ifirst) {
-      ::new (static_cast<void*>(std::addressof(*__idx))) _ValueType(__iter_move(__ifirst));
-    }
-#  if _LIBCPP_HAS_EXCEPTIONS
-  } catch (...) {
-    std::__destroy(__ofirst, __idx);
-    throw;
+  auto __idx   = __ofirst;
+  auto __guard = std::__make_exception_guard([&] { std::__destroy(__ofirst, __idx); });
+  for (; __ifirst != __ilast && !__stop_moving(__idx); ++__idx, (void)++__ifirst) {
+    ::new (static_cast<void*>(std::addressof(*__idx))) _ValueType(__iter_move(__ifirst));
   }
-#  endif
+  __guard.__complete();
 
   return {std::move(__ifirst), std::move(__idx)};
 }
@@ -331,18 +267,11 @@ template <class _ValueType,
           class _IterMove>
 inline _LIBCPP_HIDE_FROM_ABI pair<_InputIterator, _ForwardIterator> __uninitialized_move_n(
     _InputIterator __ifirst, _Size __n, _ForwardIterator __ofirst, _EndPredicate __stop_moving, _IterMove __iter_move) {
-  auto __idx = __ofirst;
-#  if _LIBCPP_HAS_EXCEPTIONS
-  try {
-#  endif
-    for (; __n > 0 && !__stop_moving(__idx); ++__idx, (void)++__ifirst, --__n)
-      ::new (static_cast<void*>(std::addressof(*__idx))) _ValueType(__iter_move(__ifirst));
-#  if _LIBCPP_HAS_EXCEPTIONS
-  } catch (...) {
-    std::__destroy(__ofirst, __idx);
-    throw;
-  }
-#  endif
+  auto __idx   = __ofirst;
+  auto __guard = std::__make_exception_guard([&] { std::__destroy(__ofirst, __idx); });
+  for (; __n > 0 && !__stop_moving(__idx); ++__idx, (void)++__ifirst, --__n)
+    ::new (static_cast<void*>(std::addressof(*__idx))) _ValueType(__iter_move(__ifirst));
+  __guard.__complete();
 
   return {std::move(__ifirst), std::move(__idx)};
 }
diff --git a/lib/libcxx/include/__memory/unique_ptr.h b/lib/libcxx/include/__memory/unique_ptr.h
index eff24546cd..6a4ec0a466 100644
--- a/lib/libcxx/include/__memory/unique_ptr.h
+++ b/lib/libcxx/include/__memory/unique_ptr.h
@@ -32,18 +32,15 @@
 #include <__type_traits/integral_constant.h>
 #include <__type_traits/is_array.h>
 #include <__type_traits/is_assignable.h>
-#include <__type_traits/is_bounded_array.h>
 #include <__type_traits/is_constant_evaluated.h>
 #include <__type_traits/is_constructible.h>
 #include <__type_traits/is_convertible.h>
 #include <__type_traits/is_function.h>
 #include <__type_traits/is_pointer.h>
 #include <__type_traits/is_reference.h>
-#include <__type_traits/is_replaceable.h>
 #include <__type_traits/is_same.h>
 #include <__type_traits/is_swappable.h>
 #include <__type_traits/is_trivially_relocatable.h>
-#include <__type_traits/is_unbounded_array.h>
 #include <__type_traits/is_void.h>
 #include <__type_traits/remove_extent.h>
 #include <__type_traits/type_identity.h>
@@ -145,8 +142,6 @@ public:
       __libcpp_is_trivially_relocatable<pointer>::value && __libcpp_is_trivially_relocatable<deleter_type>::value,
       unique_ptr,
       void>;
-  using __replaceable _LIBCPP_NODEBUG =
-      __conditional_t<__is_replaceable_v<pointer> && __is_replaceable_v<deleter_type>, unique_ptr, void>;
 
 private:
   _LIBCPP_COMPRESSED_PAIR(pointer, __ptr_, deleter_type, __deleter_);
@@ -263,14 +258,17 @@ public:
     return *this;
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 __add_lvalue_reference_t<_Tp> operator*() const
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 __add_lvalue_reference_t<_Tp> operator*() const
       _NOEXCEPT_(_NOEXCEPT_(*std::declval<pointer>())) {
     return *__ptr_;
   }
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 pointer operator->() const _NOEXCEPT { return __ptr_; }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 pointer get() const _NOEXCEPT { return __ptr_; }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 deleter_type& get_deleter() _NOEXCEPT { return __deleter_; }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 const deleter_type& get_deleter() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 pointer get() const _NOEXCEPT { return __ptr_; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 deleter_type& get_deleter() _NOEXCEPT {
+    return __deleter_;
+  }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 const deleter_type&
+  get_deleter() const _NOEXCEPT {
     return __deleter_;
   }
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 explicit operator bool() const _NOEXCEPT {
@@ -413,8 +411,6 @@ public:
       __libcpp_is_trivially_relocatable<pointer>::value && __libcpp_is_trivially_relocatable<deleter_type>::value,
       unique_ptr,
       void>;
-  using __replaceable _LIBCPP_NODEBUG =
-      __conditional_t<__is_replaceable_v<pointer> && __is_replaceable_v<deleter_type>, unique_ptr, void>;
 
 private:
   template <class _Up, class _OtherDeleter>
@@ -755,12 +751,13 @@ operator<=>(const unique_ptr<_T1, _D1>& __x, nullptr_t) {
 #if _LIBCPP_STD_VER >= 14
 
 template <class _Tp, class... _Args, enable_if_t<!is_array<_Tp>::value, int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unique_ptr<_Tp> make_unique(_Args&&... __args) {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI
+_LIBCPP_CONSTEXPR_SINCE_CXX23 unique_ptr<_Tp> make_unique(_Args&&... __args) {
   return unique_ptr<_Tp>(new _Tp(std::forward<_Args>(__args)...));
 }
 
 template <class _Tp, enable_if_t<__is_unbounded_array_v<_Tp>, int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unique_ptr<_Tp> make_unique(size_t __n) {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unique_ptr<_Tp> make_unique(size_t __n) {
   typedef __remove_extent_t<_Tp> _Up;
   return unique_ptr<_Tp>(__private_constructor_tag(), new _Up[__n](), __n);
 }
@@ -773,12 +770,13 @@ void make_unique(_Args&&...) = delete;
 #if _LIBCPP_STD_VER >= 20
 
 template <class _Tp, enable_if_t<!is_array_v<_Tp>, int> = 0>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unique_ptr<_Tp> make_unique_for_overwrite() {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unique_ptr<_Tp> make_unique_for_overwrite() {
   return unique_ptr<_Tp>(new _Tp);
 }
 
 template <class _Tp, enable_if_t<is_unbounded_array_v<_Tp>, int> = 0>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unique_ptr<_Tp> make_unique_for_overwrite(size_t __n) {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unique_ptr<_Tp>
+make_unique_for_overwrite(size_t __n) {
   return unique_ptr<_Tp>(__private_constructor_tag(), new __remove_extent_t<_Tp>[__n], __n);
 }
 
@@ -802,7 +800,7 @@ struct hash<__enable_hash_helper< unique_ptr<_Tp, _Dp>, typename unique_ptr<_Tp,
   _LIBCPP_DEPRECATED_IN_CXX17 typedef size_t result_type;
 #endif
 
-  _LIBCPP_HIDE_FROM_ABI size_t operator()(const unique_ptr<_Tp, _Dp>& __ptr) const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t operator()(const unique_ptr<_Tp, _Dp>& __ptr) const {
     typedef typename unique_ptr<_Tp, _Dp>::pointer pointer;
     return hash<pointer>()(__ptr.get());
   }
diff --git a/lib/libcxx/include/__memory/uses_allocator_construction.h b/lib/libcxx/include/__memory/uses_allocator_construction.h
index 49ddf99d9c..6733f5cf6f 100644
--- a/lib/libcxx/include/__memory/uses_allocator_construction.h
+++ b/lib/libcxx/include/__memory/uses_allocator_construction.h
@@ -17,6 +17,7 @@
 #include <__type_traits/remove_cv.h>
 #include <__utility/declval.h>
 #include <__utility/pair.h>
+#include <__utility/piecewise_construct.h>
 #include <tuple>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
diff --git a/lib/libcxx/include/__memory_resource/memory_resource.h b/lib/libcxx/include/__memory_resource/memory_resource.h
index f93f10fe21..5b42ae5489 100644
--- a/lib/libcxx/include/__memory_resource/memory_resource.h
+++ b/lib/libcxx/include/__memory_resource/memory_resource.h
@@ -42,7 +42,9 @@ public:
     do_deallocate(__p, __bytes, __align);
   }
 
-  _LIBCPP_HIDE_FROM_ABI bool is_equal(const memory_resource& __other) const noexcept { return do_is_equal(__other); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool is_equal(const memory_resource& __other) const noexcept {
+    return do_is_equal(__other);
+  }
 
 private:
   virtual void* do_allocate(size_t, size_t)                       = 0;
@@ -68,7 +70,7 @@ operator!=(const memory_resource& __lhs, const memory_resource& __rhs) noexcept
 
 // [mem.res.global]
 
-[[__gnu__::__returns_nonnull__]] _LIBCPP_AVAILABILITY_PMR _LIBCPP_EXPORTED_FROM_ABI memory_resource*
+[[nodiscard, __gnu__::__returns_nonnull__]] _LIBCPP_AVAILABILITY_PMR _LIBCPP_EXPORTED_FROM_ABI memory_resource*
 get_default_resource() noexcept;
 
 [[__gnu__::__returns_nonnull__]] _LIBCPP_AVAILABILITY_PMR _LIBCPP_EXPORTED_FROM_ABI memory_resource*
diff --git a/lib/libcxx/include/__memory_resource/monotonic_buffer_resource.h b/lib/libcxx/include/__memory_resource/monotonic_buffer_resource.h
index 942d490ce3..9c7b07df52 100644
--- a/lib/libcxx/include/__memory_resource/monotonic_buffer_resource.h
+++ b/lib/libcxx/include/__memory_resource/monotonic_buffer_resource.h
@@ -93,7 +93,7 @@ public:
     }
   }
 
-  _LIBCPP_HIDE_FROM_ABI memory_resource* upstream_resource() const { return __res_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI memory_resource* upstream_resource() const { return __res_; }
 
 protected:
   void* do_allocate(size_t __bytes, size_t __alignment) override; // key function
diff --git a/lib/libcxx/include/__memory_resource/polymorphic_allocator.h b/lib/libcxx/include/__memory_resource/polymorphic_allocator.h
index b95c6a37c5..b01541fa0e 100644
--- a/lib/libcxx/include/__memory_resource/polymorphic_allocator.h
+++ b/lib/libcxx/include/__memory_resource/polymorphic_allocator.h
@@ -18,6 +18,7 @@
 #include <__new/exceptions.h>
 #include <__new/placement_new_delete.h>
 #include <__utility/exception_guard.h>
+#include <__utility/piecewise_construct.h>
 #include <limits>
 #include <tuple>
 
@@ -50,7 +51,9 @@ public:
 
   _LIBCPP_HIDE_FROM_ABI polymorphic_allocator() noexcept : __res_(std::pmr::get_default_resource()) {}
 
-  _LIBCPP_HIDE_FROM_ABI polymorphic_allocator(memory_resource* __r) noexcept : __res_(__r) {}
+  _LIBCPP_HIDE_FROM_ABI polymorphic_allocator(memory_resource* _LIBCPP_DIAGNOSE_NULLPTR __r) noexcept : __res_(__r) {
+    _LIBCPP_ASSERT_NON_NULL(__r, "Attempted to pass a nullptr resource to polymorphic_alloator");
+  }
 
   _LIBCPP_HIDE_FROM_ABI polymorphic_allocator(const polymorphic_allocator&) = default;
 
@@ -133,10 +136,10 @@ public:
         piecewise_construct,
         __transform_tuple(typename __uses_alloc_ctor< _T1, polymorphic_allocator&, _Args1... >::type(),
                           std::move(__x),
-                          typename __make_tuple_indices<sizeof...(_Args1)>::type{}),
+                          make_index_sequence<sizeof...(_Args1)>()),
         __transform_tuple(typename __uses_alloc_ctor< _T2, polymorphic_allocator&, _Args2... >::type(),
                           std::move(__y),
-                          typename __make_tuple_indices<sizeof...(_Args2)>::type{}));
+                          make_index_sequence<sizeof...(_Args2)>()));
   }
 
   template <class _T1, class _T2>
@@ -170,11 +173,13 @@ public:
     __p->~_Tp();
   }
 
-  _LIBCPP_HIDE_FROM_ABI polymorphic_allocator select_on_container_copy_construction() const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI polymorphic_allocator select_on_container_copy_construction() const noexcept {
     return polymorphic_allocator();
   }
 
-  _LIBCPP_HIDE_FROM_ABI memory_resource* resource() const noexcept { return __res_; }
+  [[nodiscard, __gnu__::__returns_nonnull__]] _LIBCPP_HIDE_FROM_ABI memory_resource* resource() const noexcept {
+    return __res_;
+  }
 
   _LIBCPP_HIDE_FROM_ABI friend bool
   operator==(const polymorphic_allocator& __lhs, const polymorphic_allocator& __rhs) noexcept {
@@ -192,20 +197,20 @@ public:
 private:
   template <class... _Args, size_t... _Is>
   _LIBCPP_HIDE_FROM_ABI tuple<_Args&&...>
-  __transform_tuple(integral_constant<int, 0>, tuple<_Args...>&& __t, __tuple_indices<_Is...>) {
+  __transform_tuple(integral_constant<int, 0>, tuple<_Args...>&& __t, index_sequence<_Is...>) {
     return std::forward_as_tuple(std::get<_Is>(std::move(__t))...);
   }
 
   template <class... _Args, size_t... _Is>
   _LIBCPP_HIDE_FROM_ABI tuple<allocator_arg_t const&, polymorphic_allocator&, _Args&&...>
-  __transform_tuple(integral_constant<int, 1>, tuple<_Args...>&& __t, __tuple_indices<_Is...>) {
+  __transform_tuple(integral_constant<int, 1>, tuple<_Args...>&& __t, index_sequence<_Is...>) {
     using _Tup = tuple<allocator_arg_t const&, polymorphic_allocator&, _Args&&...>;
     return _Tup(allocator_arg, *this, std::get<_Is>(std::move(__t))...);
   }
 
   template <class... _Args, size_t... _Is>
   _LIBCPP_HIDE_FROM_ABI tuple<_Args&&..., polymorphic_allocator&>
-  __transform_tuple(integral_constant<int, 2>, tuple<_Args...>&& __t, __tuple_indices<_Is...>) {
+  __transform_tuple(integral_constant<int, 2>, tuple<_Args...>&& __t, index_sequence<_Is...>) {
     using _Tup = tuple<_Args&&..., polymorphic_allocator&>;
     return _Tup(std::get<_Is>(std::move(__t))..., *this);
   }
diff --git a/lib/libcxx/include/__memory_resource/pool_options.h b/lib/libcxx/include/__memory_resource/pool_options.h
index 324b8aaa85..fd20ced567 100644
--- a/lib/libcxx/include/__memory_resource/pool_options.h
+++ b/lib/libcxx/include/__memory_resource/pool_options.h
@@ -24,7 +24,7 @@ namespace pmr {
 
 // [mem.res.pool.options]
 
-struct _LIBCPP_EXPORTED_FROM_ABI pool_options {
+struct pool_options {
   size_t max_blocks_per_chunk        = 0;
   size_t largest_required_pool_block = 0;
 };
diff --git a/lib/libcxx/include/__memory_resource/synchronized_pool_resource.h b/lib/libcxx/include/__memory_resource/synchronized_pool_resource.h
index bcc1ac4a17..1c929675bb 100644
--- a/lib/libcxx/include/__memory_resource/synchronized_pool_resource.h
+++ b/lib/libcxx/include/__memory_resource/synchronized_pool_resource.h
@@ -56,9 +56,11 @@ public:
     __unsync_.release();
   }
 
-  _LIBCPP_HIDE_FROM_ABI memory_resource* upstream_resource() const { return __unsync_.upstream_resource(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI memory_resource* upstream_resource() const {
+    return __unsync_.upstream_resource();
+  }
 
-  _LIBCPP_HIDE_FROM_ABI pool_options options() const { return __unsync_.options(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI pool_options options() const { return __unsync_.options(); }
 
 protected:
   _LIBCPP_HIDE_FROM_ABI_VIRTUAL void* do_allocate(size_t __bytes, size_t __align) override {
diff --git a/lib/libcxx/include/__memory_resource/unsynchronized_pool_resource.h b/lib/libcxx/include/__memory_resource/unsynchronized_pool_resource.h
index 92da16c559..89198a1b7c 100644
--- a/lib/libcxx/include/__memory_resource/unsynchronized_pool_resource.h
+++ b/lib/libcxx/include/__memory_resource/unsynchronized_pool_resource.h
@@ -76,7 +76,7 @@ public:
 
   void release();
 
-  _LIBCPP_HIDE_FROM_ABI memory_resource* upstream_resource() const { return __res_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI memory_resource* upstream_resource() const { return __res_; }
 
   [[__gnu__::__pure__]] pool_options options() const;
 
diff --git a/lib/libcxx/include/__mutex/mutex.h b/lib/libcxx/include/__mutex/mutex.h
index 68c8842b35..e9cedf8db1 100644
--- a/lib/libcxx/include/__mutex/mutex.h
+++ b/lib/libcxx/include/__mutex/mutex.h
@@ -37,11 +37,11 @@ public:
 #  endif
 
   _LIBCPP_ACQUIRE_CAPABILITY() void lock();
-  _LIBCPP_TRY_ACQUIRE_CAPABILITY(true) bool try_lock() _NOEXCEPT;
+  [[__nodiscard__]] _LIBCPP_TRY_ACQUIRE_CAPABILITY(true) bool try_lock() _NOEXCEPT;
   _LIBCPP_RELEASE_CAPABILITY void unlock() _NOEXCEPT;
 
   typedef __libcpp_mutex_t* native_handle_type;
-  _LIBCPP_HIDE_FROM_ABI native_handle_type native_handle() { return &__m_; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI native_handle_type native_handle() { return &__m_; }
 };
 
 static_assert(is_nothrow_default_constructible<mutex>::value, "the default constructor for std::mutex must be nothrow");
diff --git a/lib/libcxx/include/__mutex/once_flag.h b/lib/libcxx/include/__mutex/once_flag.h
index 3306449955..ad15b2eb6d 100644
--- a/lib/libcxx/include/__mutex/once_flag.h
+++ b/lib/libcxx/include/__mutex/once_flag.h
@@ -10,12 +10,11 @@
 #define _LIBCPP___MUTEX_ONCE_FLAG_H
 
 #include <__config>
-#include <__functional/invoke.h>
 #include <__memory/addressof.h>
-#include <__memory/shared_count.h> // __libcpp_acquire_load
-#include <__tuple/tuple_indices.h>
 #include <__tuple/tuple_size.h>
+#include <__type_traits/invoke.h>
 #include <__utility/forward.h>
+#include <__utility/integer_sequence.h>
 #include <__utility/move.h>
 #include <cstdint>
 #ifndef _LIBCPP_CXX03_LANG
@@ -88,14 +87,9 @@ public:
   _LIBCPP_HIDE_FROM_ABI explicit __call_once_param(_Fp& __f) : __f_(__f) {}
 
   _LIBCPP_HIDE_FROM_ABI void operator()() {
-    typedef typename __make_tuple_indices<tuple_size<_Fp>::value, 1>::type _Index;
-    __execute(_Index());
-  }
-
-private:
-  template <size_t... _Indices>
-  _LIBCPP_HIDE_FROM_ABI void __execute(__tuple_indices<_Indices...>) {
-    std::__invoke(std::get<0>(std::move(__f_)), std::get<_Indices>(std::move(__f_))...);
+    [&]<size_t... _Indices>(__index_sequence<_Indices...>) -> void {
+      std::__invoke(std::get<_Indices>(std::move(__f_))...);
+    }(__make_index_sequence<tuple_size<_Fp>::value>());
   }
 };
 
@@ -121,6 +115,15 @@ void _LIBCPP_HIDE_FROM_ABI __call_once_proxy(void* __vp) {
 
 _LIBCPP_EXPORTED_FROM_ABI void __call_once(volatile once_flag::_State_type&, void*, void (*)(void*));
 
+template <class _ValueType>
+inline _LIBCPP_HIDE_FROM_ABI _ValueType __libcpp_acquire_load(_ValueType const* __value) {
+#if _LIBCPP_HAS_THREADS
+  return __atomic_load_n(__value, __ATOMIC_ACQUIRE);
+#else
+  return *__value;
+#endif
+}
+
 #ifndef _LIBCPP_CXX03_LANG
 
 template <class _Callable, class... _Args>
diff --git a/lib/libcxx/include/__mutex/tag_types.h b/lib/libcxx/include/__mutex/tag_types.h
index 2b2dd58ee4..36b1a3d92b 100644
--- a/lib/libcxx/include/__mutex/tag_types.h
+++ b/lib/libcxx/include/__mutex/tag_types.h
@@ -17,15 +17,15 @@
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-struct _LIBCPP_EXPORTED_FROM_ABI defer_lock_t {
+struct defer_lock_t {
   explicit defer_lock_t() = default;
 };
 
-struct _LIBCPP_EXPORTED_FROM_ABI try_to_lock_t {
+struct try_to_lock_t {
   explicit try_to_lock_t() = default;
 };
 
-struct _LIBCPP_EXPORTED_FROM_ABI adopt_lock_t {
+struct adopt_lock_t {
   explicit adopt_lock_t() = default;
 };
 
diff --git a/lib/libcxx/include/__mutex/unique_lock.h b/lib/libcxx/include/__mutex/unique_lock.h
index aea93eb9b8..6968922639 100644
--- a/lib/libcxx/include/__mutex/unique_lock.h
+++ b/lib/libcxx/include/__mutex/unique_lock.h
@@ -15,6 +15,7 @@
 #include <__memory/addressof.h>
 #include <__mutex/tag_types.h>
 #include <__system_error/throw_system_error.h>
+#include <__utility/move.h>
 #include <__utility/swap.h>
 #include <cerrno>
 
@@ -22,6 +23,9 @@
 #  pragma GCC system_header
 #endif
 
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
 _LIBCPP_BEGIN_NAMESPACE_STD
 
 template <class _Mutex>
@@ -74,13 +78,8 @@ public:
   }
 
   _LIBCPP_HIDE_FROM_ABI unique_lock& operator=(unique_lock&& __u) _NOEXCEPT {
-    if (__owns_)
-      __m_->unlock();
-
-    __m_        = __u.__m_;
-    __owns_     = __u.__owns_;
-    __u.__m_    = nullptr;
-    __u.__owns_ = false;
+    if (this != std::addressof(__u))
+      unique_lock(std::move(__u)).swap(*this);
     return *this;
   }
 
@@ -170,4 +169,6 @@ inline _LIBCPP_HIDE_FROM_ABI void swap(unique_lock<_Mutex>& __x, unique_lock<_Mu
 
 _LIBCPP_END_NAMESPACE_STD
 
+_LIBCPP_POP_MACROS
+
 #endif // _LIBCPP___MUTEX_UNIQUE_LOCK_H
diff --git a/lib/libcxx/include/__new/align_val_t.h b/lib/libcxx/include/__new/align_val_t.h
index 03ab7cb143..d8ce528334 100644
--- a/lib/libcxx/include/__new/align_val_t.h
+++ b/lib/libcxx/include/__new/align_val_t.h
@@ -16,6 +16,12 @@
 #  pragma GCC system_header
 #endif
 
+// <vcruntime_exception.h> defines its own std::align_val_t type,
+// which we use in order to be ABI-compatible with other STLs on Windows.
+#if _LIBCPP_HAS_LIBRARY_ALIGNED_ALLOCATION && defined(_LIBCPP_ABI_VCRUNTIME)
+#  include <vcruntime_new.h>
+#endif
+
 _LIBCPP_BEGIN_UNVERSIONED_NAMESPACE_STD
 #if _LIBCPP_HAS_LIBRARY_ALIGNED_ALLOCATION && !defined(_LIBCPP_ABI_VCRUNTIME)
 #  ifndef _LIBCPP_CXX03_LANG
diff --git a/lib/libcxx/include/__new/allocate.h b/lib/libcxx/include/__new/allocate.h
index 9bfe19aedb..b9bc2e1a50 100644
--- a/lib/libcxx/include/__new/allocate.h
+++ b/lib/libcxx/include/__new/allocate.h
@@ -13,7 +13,6 @@
 #include <__cstddef/max_align_t.h>
 #include <__cstddef/size_t.h>
 #include <__new/align_val_t.h>
-#include <__new/global_new_delete.h> // for _LIBCPP_HAS_SIZED_DEALLOCATION
 #include <__type_traits/type_identity.h>
 #include <__utility/element_count.h>
 
@@ -43,7 +42,7 @@ __libcpp_allocate(__element_count __n, [[__maybe_unused__]] size_t __align = _LI
   return static_cast<_Tp*>(__builtin_operator_new(__size));
 }
 
-#if _LIBCPP_HAS_SIZED_DEALLOCATION
+#if defined(__cpp_sized_deallocation) && __cpp_sized_deallocation >= 201309L
 #  define _LIBCPP_ONLY_IF_SIZED_DEALLOCATION(...) __VA_ARGS__
 #else
 #  define _LIBCPP_ONLY_IF_SIZED_DEALLOCATION(...) /* nothing */
diff --git a/lib/libcxx/include/__new/exceptions.h b/lib/libcxx/include/__new/exceptions.h
index 86951818b7..1aadc23120 100644
--- a/lib/libcxx/include/__new/exceptions.h
+++ b/lib/libcxx/include/__new/exceptions.h
@@ -17,6 +17,12 @@
 #  pragma GCC system_header
 #endif
 
+// <vcruntime_exception.h> defines its own std::bad_alloc type,
+// which we use in order to be ABI-compatible with other STLs on Windows.
+#if defined(_LIBCPP_ABI_VCRUNTIME)
+#  include <vcruntime_exception.h>
+#endif
+
 _LIBCPP_BEGIN_UNVERSIONED_NAMESPACE_STD
 #if !defined(_LIBCPP_ABI_VCRUNTIME)
 
@@ -26,7 +32,7 @@ public:
   _LIBCPP_HIDE_FROM_ABI bad_alloc(const bad_alloc&) _NOEXCEPT            = default;
   _LIBCPP_HIDE_FROM_ABI bad_alloc& operator=(const bad_alloc&) _NOEXCEPT = default;
   ~bad_alloc() _NOEXCEPT override;
-  const char* what() const _NOEXCEPT override;
+  [[__nodiscard__]] const char* what() const _NOEXCEPT override;
 };
 
 class _LIBCPP_EXPORTED_FROM_ABI bad_array_new_length : public bad_alloc {
@@ -35,7 +41,7 @@ public:
   _LIBCPP_HIDE_FROM_ABI bad_array_new_length(const bad_array_new_length&) _NOEXCEPT            = default;
   _LIBCPP_HIDE_FROM_ABI bad_array_new_length& operator=(const bad_array_new_length&) _NOEXCEPT = default;
   ~bad_array_new_length() _NOEXCEPT override;
-  const char* what() const _NOEXCEPT override;
+  [[__nodiscard__]] const char* what() const _NOEXCEPT override;
 };
 
 #elif defined(_HAS_EXCEPTIONS) && _HAS_EXCEPTIONS == 0 // !_LIBCPP_ABI_VCRUNTIME
diff --git a/lib/libcxx/include/__new/global_new_delete.h b/lib/libcxx/include/__new/global_new_delete.h
index 96510ab56b..f31bac3730 100644
--- a/lib/libcxx/include/__new/global_new_delete.h
+++ b/lib/libcxx/include/__new/global_new_delete.h
@@ -12,7 +12,6 @@
 #include <__config>
 #include <__cstddef/size_t.h>
 #include <__new/align_val_t.h>
-#include <__new/exceptions.h>
 #include <__new/nothrow_t.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -25,12 +24,6 @@
 #  define _THROW_BAD_ALLOC
 #endif
 
-#if defined(__cpp_sized_deallocation) && __cpp_sized_deallocation >= 201309L
-#  define _LIBCPP_HAS_SIZED_DEALLOCATION 1
-#else
-#  define _LIBCPP_HAS_SIZED_DEALLOCATION 0
-#endif
-
 #if defined(_LIBCPP_ABI_VCRUNTIME)
 #  include <new.h>
 #else
@@ -39,7 +32,7 @@
     _LIBCPP_NOALIAS;
 _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete(void* __p) _NOEXCEPT;
 _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete(void* __p, const std::nothrow_t&) _NOEXCEPT;
-#  if _LIBCPP_HAS_SIZED_DEALLOCATION
+#  if defined(__cpp_sized_deallocation) && __cpp_sized_deallocation >= 201309L
 _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete(void* __p, std::size_t __sz) _NOEXCEPT;
 #  endif
 
@@ -48,7 +41,7 @@ _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete(void* __p, std::size_t __sz) _
     _LIBCPP_NOALIAS;
 _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete[](void* __p) _NOEXCEPT;
 _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete[](void* __p, const std::nothrow_t&) _NOEXCEPT;
-#  if _LIBCPP_HAS_SIZED_DEALLOCATION
+#  if defined(__cpp_sized_deallocation) && __cpp_sized_deallocation >= 201309L
 _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete[](void* __p, std::size_t __sz) _NOEXCEPT;
 #  endif
 
@@ -58,7 +51,7 @@ _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete[](void* __p, std::size_t __sz)
 operator new(std::size_t __sz, std::align_val_t, const std::nothrow_t&) _NOEXCEPT _LIBCPP_NOALIAS;
 _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete(void* __p, std::align_val_t) _NOEXCEPT;
 _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete(void* __p, std::align_val_t, const std::nothrow_t&) _NOEXCEPT;
-#    if _LIBCPP_HAS_SIZED_DEALLOCATION
+#    if defined(__cpp_sized_deallocation) && __cpp_sized_deallocation >= 201309L
 _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete(void* __p, std::size_t __sz, std::align_val_t) _NOEXCEPT;
 #    endif
 
@@ -68,7 +61,7 @@ operator new[](std::size_t __sz, std::align_val_t) _THROW_BAD_ALLOC;
 operator new[](std::size_t __sz, std::align_val_t, const std::nothrow_t&) _NOEXCEPT _LIBCPP_NOALIAS;
 _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete[](void* __p, std::align_val_t) _NOEXCEPT;
 _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete[](void* __p, std::align_val_t, const std::nothrow_t&) _NOEXCEPT;
-#    if _LIBCPP_HAS_SIZED_DEALLOCATION
+#    if defined(__cpp_sized_deallocation) && __cpp_sized_deallocation >= 201309L
 _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete[](void* __p, std::size_t __sz, std::align_val_t) _NOEXCEPT;
 #    endif
 #  endif
diff --git a/lib/libcxx/include/__new/interference_size.h b/lib/libcxx/include/__new/interference_size.h
index d326c43a33..591d0ab405 100644
--- a/lib/libcxx/include/__new/interference_size.h
+++ b/lib/libcxx/include/__new/interference_size.h
@@ -20,13 +20,9 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 #if _LIBCPP_STD_VER >= 17
 
-#  if defined(__GCC_DESTRUCTIVE_SIZE) && defined(__GCC_CONSTRUCTIVE_SIZE)
-
 inline constexpr size_t hardware_destructive_interference_size  = __GCC_DESTRUCTIVE_SIZE;
 inline constexpr size_t hardware_constructive_interference_size = __GCC_CONSTRUCTIVE_SIZE;
 
-#  endif // defined(__GCC_DESTRUCTIVE_SIZE) && defined(__GCC_CONSTRUCTIVE_SIZE)
-
 #endif // _LIBCPP_STD_VER >= 17
 
 _LIBCPP_END_NAMESPACE_STD
diff --git a/lib/libcxx/include/__new/launder.h b/lib/libcxx/include/__new/launder.h
index 83d8001591..886f614eed 100644
--- a/lib/libcxx/include/__new/launder.h
+++ b/lib/libcxx/include/__new/launder.h
@@ -10,8 +10,6 @@
 #define _LIBCPP___NEW_LAUNDER_H
 
 #include <__config>
-#include <__type_traits/is_function.h>
-#include <__type_traits/is_void.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
@@ -20,15 +18,15 @@
 _LIBCPP_BEGIN_NAMESPACE_STD
 template <class _Tp>
 [[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _Tp* __launder(_Tp* __p) _NOEXCEPT {
-  static_assert(!(is_function<_Tp>::value), "can't launder functions");
-  static_assert(!is_void<_Tp>::value, "can't launder cv-void");
+  // The compiler diagnoses misuses of __builtin_launder, so we don't need to add any static_asserts
+  // to implement the Mandates.
   return __builtin_launder(__p);
 }
 
 #if _LIBCPP_STD_VER >= 17
 template <class _Tp>
 [[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI constexpr _Tp* launder(_Tp* __p) noexcept {
-  return std::__launder(__p);
+  return __builtin_launder(__p);
 }
 #endif
 _LIBCPP_END_NAMESPACE_STD
diff --git a/lib/libcxx/include/__new/nothrow_t.h b/lib/libcxx/include/__new/nothrow_t.h
index a286bf7af6..a099772123 100644
--- a/lib/libcxx/include/__new/nothrow_t.h
+++ b/lib/libcxx/include/__new/nothrow_t.h
@@ -19,7 +19,7 @@
 #  include <new.h>
 #else
 _LIBCPP_BEGIN_UNVERSIONED_NAMESPACE_STD
-struct _LIBCPP_EXPORTED_FROM_ABI nothrow_t {
+struct nothrow_t {
   explicit nothrow_t() = default;
 };
 extern _LIBCPP_EXPORTED_FROM_ABI const nothrow_t nothrow;
diff --git a/lib/libcxx/include/__numeric/gcd_lcm.h b/lib/libcxx/include/__numeric/gcd_lcm.h
index 95df54dc06..5ab870fa73 100644
--- a/lib/libcxx/include/__numeric/gcd_lcm.h
+++ b/lib/libcxx/include/__numeric/gcd_lcm.h
@@ -33,28 +33,26 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 #if _LIBCPP_STD_VER >= 17
 
-template <typename _Result, typename _Source, bool _IsSigned = is_signed<_Source>::value>
-struct __ct_abs;
-
-template <typename _Result, typename _Source>
-struct __ct_abs<_Result, _Source, true> {
-  constexpr _LIBCPP_HIDE_FROM_ABI _Result operator()(_Source __t) const noexcept {
+template <class _Result, class _Source>
+constexpr _LIBCPP_HIDE_FROM_ABI _Result __abs_in_type(_Source __t) noexcept {
+  if constexpr (is_signed_v<_Source>) {
     if (__t >= 0)
       return __t;
     if (__t == numeric_limits<_Source>::min())
       return -static_cast<_Result>(__t);
     return -__t;
+  } else {
+    return __t;
   }
-};
+}
 
-template <typename _Result, typename _Source>
-struct __ct_abs<_Result, _Source, false> {
-  constexpr _LIBCPP_HIDE_FROM_ABI _Result operator()(_Source __t) const noexcept { return __t; }
-};
-
-template <class _Tp>
-constexpr _LIBCPP_HIDDEN _Tp __gcd(_Tp __a, _Tp __b) {
-  static_assert(!is_signed<_Tp>::value, "");
+template <class _Tp, class _Up>
+constexpr _LIBCPP_HIDE_FROM_ABI common_type_t<_Tp, _Up> gcd(_Tp __m, _Up __n) {
+  static_assert(is_integral<_Tp>::value && is_integral<_Up>::value, "Arguments to gcd must be integer types");
+  static_assert(!is_same<__remove_cv_t<_Tp>, bool>::value, "First argument to gcd cannot be bool");
+  static_assert(!is_same<__remove_cv_t<_Up>, bool>::value, "Second argument to gcd cannot be bool");
+  using _Rp = common_type_t<_Tp, _Up>;
+  using _Wp = make_unsigned_t<_Rp>;
 
   // Using Binary GCD algorithm https://en.wikipedia.org/wiki/Binary_GCD_algorithm, based on an implementation
   // from https://lemire.me/blog/2024/04/13/greatest-common-divisor-the-extended-euclidean-algorithm-and-speed/
@@ -67,22 +65,25 @@ constexpr _LIBCPP_HIDDEN _Tp __gcd(_Tp __a, _Tp __b) {
   //
   // And standard gcd algorithm where instead of modulo, minus is used.
 
+  auto __a = static_cast<_Wp>(std::__abs_in_type<_Rp>(__m));
+  auto __b = static_cast<_Wp>(std::__abs_in_type<_Rp>(__n));
+
   if (__a < __b) {
-    _Tp __tmp = __b;
+    _Wp __tmp = __b;
     __b       = __a;
     __a       = __tmp;
   }
   if (__b == 0)
-    return __a;
+    return static_cast<_Rp>(__a);
   __a %= __b; // Make both argument of the same size, and early result in the easy case.
   if (__a == 0)
-    return __b;
+    return static_cast<_Rp>(__b);
 
-  _Tp __c     = __a | __b;
+  _Wp __c     = __a | __b;
   int __shift = std::__countr_zero(__c);
   __a >>= std::__countr_zero(__a);
   do {
-    _Tp __t = __b >> std::__countr_zero(__b);
+    _Wp __t = __b >> std::__countr_zero(__b);
     if (__a > __t) {
       __b = __a - __t;
       __a = __t;
@@ -90,18 +91,7 @@ constexpr _LIBCPP_HIDDEN _Tp __gcd(_Tp __a, _Tp __b) {
       __b = __t - __a;
     }
   } while (__b != 0);
-  return __a << __shift;
-}
-
-template <class _Tp, class _Up>
-constexpr _LIBCPP_HIDE_FROM_ABI common_type_t<_Tp, _Up> gcd(_Tp __m, _Up __n) {
-  static_assert(is_integral<_Tp>::value && is_integral<_Up>::value, "Arguments to gcd must be integer types");
-  static_assert(!is_same<__remove_cv_t<_Tp>, bool>::value, "First argument to gcd cannot be bool");
-  static_assert(!is_same<__remove_cv_t<_Up>, bool>::value, "Second argument to gcd cannot be bool");
-  using _Rp = common_type_t<_Tp, _Up>;
-  using _Wp = make_unsigned_t<_Rp>;
-  return static_cast<_Rp>(
-      std::__gcd(static_cast<_Wp>(__ct_abs<_Rp, _Tp>()(__m)), static_cast<_Wp>(__ct_abs<_Rp, _Up>()(__n))));
+  return static_cast<_Rp>(__a << __shift);
 }
 
 template <class _Tp, class _Up>
@@ -113,8 +103,8 @@ constexpr _LIBCPP_HIDE_FROM_ABI common_type_t<_Tp, _Up> lcm(_Tp __m, _Up __n) {
     return 0;
 
   using _Rp  = common_type_t<_Tp, _Up>;
-  _Rp __val1 = __ct_abs<_Rp, _Tp>()(__m) / std::gcd(__m, __n);
-  _Rp __val2 = __ct_abs<_Rp, _Up>()(__n);
+  _Rp __val1 = std::__abs_in_type<_Rp>(__m) / std::gcd(__m, __n);
+  _Rp __val2 = std::__abs_in_type<_Rp>(__n);
   _Rp __res;
   [[maybe_unused]] bool __overflow = __builtin_mul_overflow(__val1, __val2, std::addressof(__res));
   _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(!__overflow, "Overflow in lcm");
diff --git a/lib/libcxx/include/__numeric/midpoint.h b/lib/libcxx/include/__numeric/midpoint.h
index 2ba80e5cca..d8e73ab8ca 100644
--- a/lib/libcxx/include/__numeric/midpoint.h
+++ b/lib/libcxx/include/__numeric/midpoint.h
@@ -12,16 +12,13 @@
 
 #include <__config>
 #include <__cstddef/ptrdiff_t.h>
-#include <__type_traits/enable_if.h>
 #include <__type_traits/is_floating_point.h>
 #include <__type_traits/is_integral.h>
-#include <__type_traits/is_null_pointer.h>
 #include <__type_traits/is_object.h>
-#include <__type_traits/is_pointer.h>
 #include <__type_traits/is_same.h>
 #include <__type_traits/is_void.h>
 #include <__type_traits/make_unsigned.h>
-#include <__type_traits/remove_pointer.h>
+#include <__type_traits/remove_cv.h>
 #include <limits>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -35,8 +32,9 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 #if _LIBCPP_STD_VER >= 20
 template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI constexpr enable_if_t<is_integral_v<_Tp> && !is_same_v<bool, _Tp> && !is_null_pointer_v<_Tp>, _Tp>
-midpoint(_Tp __a, _Tp __b) noexcept _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK {
+  requires(is_integral_v<_Tp> && !is_same_v<remove_cv_t<_Tp>, bool>)
+[[nodiscard]]
+_LIBCPP_HIDE_FROM_ABI constexpr _Tp midpoint(_Tp __a, _Tp __b) noexcept _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK {
   using _Up                = make_unsigned_t<_Tp>;
   constexpr _Up __bitshift = numeric_limits<_Up>::digits - 1;
 
@@ -48,23 +46,20 @@ midpoint(_Tp __a, _Tp __b) noexcept _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK
   return __a + __half_diff;
 }
 
-template <class _Tp, enable_if_t<is_object_v<_Tp> && !is_void_v<_Tp> && (sizeof(_Tp) > 0), int> = 0>
-_LIBCPP_HIDE_FROM_ABI constexpr _Tp* midpoint(_Tp* __a, _Tp* __b) noexcept {
+template <class _Tp>
+  requires(is_object_v<_Tp> && (sizeof(_Tp) > 0))
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp* midpoint(_Tp* __a, _Tp* __b) noexcept {
   return __a + std::midpoint(ptrdiff_t(0), __b - __a);
 }
 
-template <typename _Tp>
-_LIBCPP_HIDE_FROM_ABI constexpr int __sign(_Tp __val) {
-  return (_Tp(0) < __val) - (__val < _Tp(0));
-}
-
 template <typename _Fp>
 _LIBCPP_HIDE_FROM_ABI constexpr _Fp __fp_abs(_Fp __f) {
   return __f >= 0 ? __f : -__f;
 }
 
 template <class _Fp>
-_LIBCPP_HIDE_FROM_ABI constexpr enable_if_t<is_floating_point_v<_Fp>, _Fp> midpoint(_Fp __a, _Fp __b) noexcept {
+  requires(is_floating_point_v<_Fp>)
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Fp midpoint(_Fp __a, _Fp __b) noexcept {
   constexpr _Fp __lo = numeric_limits<_Fp>::min() * 2;
   constexpr _Fp __hi = numeric_limits<_Fp>::max() / 2;
 
diff --git a/lib/libcxx/include/__numeric/pstl.h b/lib/libcxx/include/__numeric/pstl.h
index 22d971ac3b..fe7b2cc7a8 100644
--- a/lib/libcxx/include/__numeric/pstl.h
+++ b/lib/libcxx/include/__numeric/pstl.h
@@ -70,7 +70,7 @@ template <class _ExecutionPolicy,
           class _ForwardIterator,
           class _RawPolicy                                    = __remove_cvref_t<_ExecutionPolicy>,
           enable_if_t<is_execution_policy_v<_RawPolicy>, int> = 0>
-_LIBCPP_HIDE_FROM_ABI __iter_value_type<_ForwardIterator>
+_LIBCPP_HIDE_FROM_ABI __iterator_value_type<_ForwardIterator>
 reduce(_ExecutionPolicy&& __policy, _ForwardIterator __first, _ForwardIterator __last) {
   _LIBCPP_REQUIRE_CPP17_FORWARD_ITERATOR(_ForwardIterator, "reduce requires ForwardIterators");
   using _Implementation = __pstl::__dispatch<__pstl::__reduce, __pstl::__current_configuration, _RawPolicy>;
@@ -78,7 +78,7 @@ reduce(_ExecutionPolicy&& __policy, _ForwardIterator __first, _ForwardIterator _
       std::forward<_ExecutionPolicy>(__policy),
       std::move(__first),
       std::move(__last),
-      __iter_value_type<_ForwardIterator>(),
+      __iterator_value_type<_ForwardIterator>(),
       plus{});
 }
 
diff --git a/lib/libcxx/include/__numeric/saturation_arithmetic.h b/lib/libcxx/include/__numeric/saturation_arithmetic.h
index 9bd3af12c9..4491bab2b1 100644
--- a/lib/libcxx/include/__numeric/saturation_arithmetic.h
+++ b/lib/libcxx/include/__numeric/saturation_arithmetic.h
@@ -30,6 +30,9 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 template <__signed_or_unsigned_integer _Tp>
 _LIBCPP_HIDE_FROM_ABI constexpr _Tp __add_sat(_Tp __x, _Tp __y) noexcept {
+#  if defined(_LIBCPP_CLANG_VER) && _LIBCPP_CLANG_VER >= 2101
+  return __builtin_elementwise_add_sat(__x, __y);
+#  else
   if (_Tp __sum; !__builtin_add_overflow(__x, __y, std::addressof(__sum)))
     return __sum;
   // Handle overflow
@@ -44,10 +47,14 @@ _LIBCPP_HIDE_FROM_ABI constexpr _Tp __add_sat(_Tp __x, _Tp __y) noexcept {
       // Overflows if  (x < 0 && y < 0)
       return std::numeric_limits<_Tp>::min();
   }
+#  endif
 }
 
 template <__signed_or_unsigned_integer _Tp>
 _LIBCPP_HIDE_FROM_ABI constexpr _Tp __sub_sat(_Tp __x, _Tp __y) noexcept {
+#  if defined(_LIBCPP_CLANG_VER) && _LIBCPP_CLANG_VER >= 2101
+  return __builtin_elementwise_sub_sat(__x, __y);
+#  else
   if (_Tp __sub; !__builtin_sub_overflow(__x, __y, std::addressof(__sub)))
     return __sub;
   // Handle overflow
@@ -63,6 +70,7 @@ _LIBCPP_HIDE_FROM_ABI constexpr _Tp __sub_sat(_Tp __x, _Tp __y) noexcept {
       // Overflows if (x < 0 && y > 0)
       return std::numeric_limits<_Tp>::min();
   }
+#  endif
 }
 
 template <__signed_or_unsigned_integer _Tp>
@@ -113,27 +121,27 @@ _LIBCPP_HIDE_FROM_ABI constexpr _Rp __saturate_cast(_Tp __x) noexcept {
 #if _LIBCPP_STD_VER >= 26
 
 template <__signed_or_unsigned_integer _Tp>
-_LIBCPP_HIDE_FROM_ABI constexpr _Tp add_sat(_Tp __x, _Tp __y) noexcept {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp add_sat(_Tp __x, _Tp __y) noexcept {
   return std::__add_sat(__x, __y);
 }
 
 template <__signed_or_unsigned_integer _Tp>
-_LIBCPP_HIDE_FROM_ABI constexpr _Tp sub_sat(_Tp __x, _Tp __y) noexcept {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp sub_sat(_Tp __x, _Tp __y) noexcept {
   return std::__sub_sat(__x, __y);
 }
 
 template <__signed_or_unsigned_integer _Tp>
-_LIBCPP_HIDE_FROM_ABI constexpr _Tp mul_sat(_Tp __x, _Tp __y) noexcept {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp mul_sat(_Tp __x, _Tp __y) noexcept {
   return std::__mul_sat(__x, __y);
 }
 
 template <__signed_or_unsigned_integer _Tp>
-_LIBCPP_HIDE_FROM_ABI constexpr _Tp div_sat(_Tp __x, _Tp __y) noexcept {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp div_sat(_Tp __x, _Tp __y) noexcept {
   return std::__div_sat(__x, __y);
 }
 
 template <__signed_or_unsigned_integer _Rp, __signed_or_unsigned_integer _Tp>
-_LIBCPP_HIDE_FROM_ABI constexpr _Rp saturate_cast(_Tp __x) noexcept {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Rp saturate_cast(_Tp __x) noexcept {
   return std::__saturate_cast<_Rp>(__x);
 }
 
diff --git a/lib/libcxx/include/__ostream/basic_ostream.h b/lib/libcxx/include/__ostream/basic_ostream.h
index effeef491f..62770be72f 100644
--- a/lib/libcxx/include/__ostream/basic_ostream.h
+++ b/lib/libcxx/include/__ostream/basic_ostream.h
@@ -53,7 +53,7 @@ public:
   typedef typename traits_type::off_type off_type;
 
   // 27.7.2.2 Constructor/destructor:
-  inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 explicit basic_ostream(basic_streambuf<char_type, traits_type>* __sb) {
+  inline _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 explicit basic_ostream(basic_streambuf<char_type, traits_type>* __sb) {
     this->init(__sb);
   }
   ~basic_ostream() override;
@@ -67,7 +67,7 @@ protected:
   // 27.7.2.3 Assign/swap
   inline _LIBCPP_HIDE_FROM_ABI basic_ostream& operator=(basic_ostream&& __rhs);
 
-  inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 void swap(basic_ostream& __rhs) {
+  inline _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 void swap(basic_ostream& __rhs) {
     basic_ios<char_type, traits_type>::swap(__rhs);
   }
 
@@ -76,17 +76,17 @@ public:
   class sentry;
 
   // 27.7.2.6 Formatted output:
-  inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 basic_ostream& operator<<(basic_ostream& (*__pf)(basic_ostream&)) {
+  inline _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 basic_ostream& operator<<(basic_ostream& (*__pf)(basic_ostream&)) {
     return __pf(*this);
   }
 
-  inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 basic_ostream&
+  inline _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 basic_ostream&
   operator<<(basic_ios<char_type, traits_type>& (*__pf)(basic_ios<char_type, traits_type>&)) {
     __pf(*this);
     return *this;
   }
 
-  inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 basic_ostream& operator<<(ios_base& (*__pf)(ios_base&)) {
+  inline _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 basic_ostream& operator<<(ios_base& (*__pf)(ios_base&)) {
     __pf(*this);
     return *this;
   }
@@ -174,9 +174,9 @@ public:
   basic_ostream& flush();
 
   // 27.7.2.5 seeks:
-  inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 pos_type tellp();
-  inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 basic_ostream& seekp(pos_type __pos);
-  inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 basic_ostream& seekp(off_type __off, ios_base::seekdir __dir);
+  [[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 pos_type tellp();
+  inline _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 basic_ostream& seekp(pos_type __pos);
+  inline _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 basic_ostream& seekp(off_type __off, ios_base::seekdir __dir);
 
 protected:
   _LIBCPP_HIDE_FROM_ABI basic_ostream() {} // extension, intentially does not initialize
diff --git a/lib/libcxx/include/__pstl/backends/default.h b/lib/libcxx/include/__pstl/backends/default.h
index 3672bbf60a..43b1f1ce38 100644
--- a/lib/libcxx/include/__pstl/backends/default.h
+++ b/lib/libcxx/include/__pstl/backends/default.h
@@ -102,7 +102,7 @@ struct __find<__default_backend_tag, _ExecutionPolicy> {
   operator()(_Policy&& __policy, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) const noexcept {
     using _FindIf = __dispatch<__find_if, __current_configuration, _ExecutionPolicy>;
     return _FindIf()(
-        __policy, std::move(__first), std::move(__last), [&](__iter_reference<_ForwardIterator> __element) {
+        __policy, std::move(__first), std::move(__last), [&](__iterator_reference<_ForwardIterator> __element) {
           return __element == __value;
         });
   }
@@ -137,7 +137,7 @@ struct __all_of<__default_backend_tag, _ExecutionPolicy> {
   [[nodiscard]] _LIBCPP_HIDE_FROM_ABI optional<bool>
   operator()(_Policy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Pred&& __pred) const noexcept {
     using _AnyOf = __dispatch<__any_of, __current_configuration, _ExecutionPolicy>;
-    auto __res   = _AnyOf()(__policy, __first, __last, [&](__iter_reference<_ForwardIterator> __value) {
+    auto __res   = _AnyOf()(__policy, __first, __last, [&](__iterator_reference<_ForwardIterator> __value) {
       return !__pred(__value);
     });
     if (!__res)
@@ -204,7 +204,7 @@ struct __fill<__default_backend_tag, _ExecutionPolicy> {
   [[nodiscard]] _LIBCPP_HIDE_FROM_ABI optional<__empty>
   operator()(_Policy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Tp const& __value) const noexcept {
     using _ForEach = __dispatch<__for_each, __current_configuration, _ExecutionPolicy>;
-    using _Ref     = __iter_reference<_ForwardIterator>;
+    using _Ref     = __iterator_reference<_ForwardIterator>;
     return _ForEach()(__policy, std::move(__first), std::move(__last), [&](_Ref __element) { __element = __value; });
   }
 };
@@ -233,7 +233,7 @@ struct __replace<__default_backend_tag, _ExecutionPolicy> {
   operator()(_Policy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Tp const& __old, _Tp const& __new)
       const noexcept {
     using _ReplaceIf = __dispatch<__replace_if, __current_configuration, _ExecutionPolicy>;
-    using _Ref       = __iter_reference<_ForwardIterator>;
+    using _Ref       = __iterator_reference<_ForwardIterator>;
     return _ReplaceIf()(
         __policy, std::move(__first), std::move(__last), [&](_Ref __element) { return __element == __old; }, __new);
   }
@@ -246,7 +246,7 @@ struct __replace_if<__default_backend_tag, _ExecutionPolicy> {
       _Policy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Pred&& __pred, _Tp const& __new_value)
       const noexcept {
     using _ForEach = __dispatch<__for_each, __current_configuration, _ExecutionPolicy>;
-    using _Ref     = __iter_reference<_ForwardIterator>;
+    using _Ref     = __iterator_reference<_ForwardIterator>;
     return _ForEach()(__policy, std::move(__first), std::move(__last), [&](_Ref __element) {
       if (__pred(__element))
         __element = __new_value;
@@ -260,7 +260,7 @@ struct __generate<__default_backend_tag, _ExecutionPolicy> {
   [[nodiscard]] _LIBCPP_HIDE_FROM_ABI optional<__empty>
   operator()(_Policy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Generator&& __gen) const noexcept {
     using _ForEach = __dispatch<__for_each, __current_configuration, _ExecutionPolicy>;
-    using _Ref     = __iter_reference<_ForwardIterator>;
+    using _Ref     = __iterator_reference<_ForwardIterator>;
     return _ForEach()(__policy, std::move(__first), std::move(__last), [&](_Ref __element) { __element = __gen(); });
   }
 };
@@ -271,7 +271,7 @@ struct __generate_n<__default_backend_tag, _ExecutionPolicy> {
   [[nodiscard]] _LIBCPP_HIDE_FROM_ABI optional<__empty>
   operator()(_Policy&& __policy, _ForwardIterator __first, _Size __n, _Generator&& __gen) const noexcept {
     using _ForEachN = __dispatch<__for_each_n, __current_configuration, _ExecutionPolicy>;
-    using _Ref      = __iter_reference<_ForwardIterator>;
+    using _Ref      = __iterator_reference<_ForwardIterator>;
     return _ForEachN()(__policy, std::move(__first), __n, [&](_Ref __element) { __element = __gen(); });
   }
 };
@@ -295,11 +295,11 @@ struct __sort<__default_backend_tag, _ExecutionPolicy> {
 template <class _ExecutionPolicy>
 struct __count_if<__default_backend_tag, _ExecutionPolicy> {
   template <class _Policy, class _ForwardIterator, class _Predicate>
-  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI optional<__iter_diff_t<_ForwardIterator>> operator()(
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI optional<__iterator_difference_type<_ForwardIterator>> operator()(
       _Policy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Predicate&& __pred) const noexcept {
     using _TransformReduce = __dispatch<__transform_reduce, __current_configuration, _ExecutionPolicy>;
-    using _DiffT           = __iter_diff_t<_ForwardIterator>;
-    using _Ref             = __iter_reference<_ForwardIterator>;
+    using _DiffT           = __iterator_difference_type<_ForwardIterator>;
+    using _Ref             = __iterator_reference<_ForwardIterator>;
     return _TransformReduce()(
         __policy, std::move(__first), std::move(__last), _DiffT{}, std::plus{}, [&](_Ref __element) -> _DiffT {
           return __pred(__element) ? _DiffT(1) : _DiffT(0);
@@ -310,10 +310,10 @@ struct __count_if<__default_backend_tag, _ExecutionPolicy> {
 template <class _ExecutionPolicy>
 struct __count<__default_backend_tag, _ExecutionPolicy> {
   template <class _Policy, class _ForwardIterator, class _Tp>
-  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI optional<__iter_diff_t<_ForwardIterator>>
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI optional<__iterator_difference_type<_ForwardIterator>>
   operator()(_Policy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Tp const& __value) const noexcept {
     using _CountIf = __dispatch<__count_if, __current_configuration, _ExecutionPolicy>;
-    using _Ref     = __iter_reference<_ForwardIterator>;
+    using _Ref     = __iterator_reference<_ForwardIterator>;
     return _CountIf()(__policy, std::move(__first), std::move(__last), [&](_Ref __element) -> bool {
       return __element == __value;
     });
@@ -402,7 +402,7 @@ struct __replace_copy_if<__default_backend_tag, _ExecutionPolicy> {
              _Pred&& __pred,
              _Tp const& __new_value) const noexcept {
     using _Transform = __dispatch<__transform, __current_configuration, _ExecutionPolicy>;
-    using _Ref       = __iter_reference<_ForwardIterator>;
+    using _Ref       = __iterator_reference<_ForwardIterator>;
     auto __res =
         _Transform()(__policy, std::move(__first), std::move(__last), std::move(__out_it), [&](_Ref __element) {
           return __pred(__element) ? __new_value : __element;
@@ -424,7 +424,7 @@ struct __replace_copy<__default_backend_tag, _ExecutionPolicy> {
              _Tp const& __old_value,
              _Tp const& __new_value) const noexcept {
     using _ReplaceCopyIf = __dispatch<__replace_copy_if, __current_configuration, _ExecutionPolicy>;
-    using _Ref           = __iter_reference<_ForwardIterator>;
+    using _Ref           = __iterator_reference<_ForwardIterator>;
     return _ReplaceCopyIf()(
         __policy,
         std::move(__first),
diff --git a/lib/libcxx/include/__pstl/backends/libdispatch.h b/lib/libcxx/include/__pstl/backends/libdispatch.h
index a640a40352..88d4231d29 100644
--- a/lib/libcxx/include/__pstl/backends/libdispatch.h
+++ b/lib/libcxx/include/__pstl/backends/libdispatch.h
@@ -269,7 +269,7 @@ struct __cpu_traits<__libdispatch_backend_tag> {
       return __empty{};
     }
 
-    using _Value = __iter_value_type<_RandomAccessIterator>;
+    using _Value = __iterator_value_type<_RandomAccessIterator>;
 
     auto __destroy = [__size](_Value* __ptr) {
       std::destroy_n(__ptr, __size);
@@ -282,7 +282,7 @@ struct __cpu_traits<__libdispatch_backend_tag> {
     // Initialize all elements to a moved-from state
     // TODO: Don't do this - this can be done in the first merge - see https://llvm.org/PR63928
     std::__construct_at(__values.get(), std::move(*__first));
-    for (__iter_diff_t<_RandomAccessIterator> __i = 1; __i != __size; ++__i) {
+    for (__iterator_difference_type<_RandomAccessIterator> __i = 1; __i != __size; ++__i) {
       std::__construct_at(__values.get() + __i, std::move(__values.get()[__i - 1]));
     }
     *__first = std::move(__values.get()[__size - 1]);
diff --git a/lib/libcxx/include/__pstl/cpu_algos/find_if.h b/lib/libcxx/include/__pstl/cpu_algos/find_if.h
index ebb4ecb4a0..aae64b66eb 100644
--- a/lib/libcxx/include/__pstl/cpu_algos/find_if.h
+++ b/lib/libcxx/include/__pstl/cpu_algos/find_if.h
@@ -119,7 +119,7 @@ struct __cpu_parallel_find_if {
           true);
     } else if constexpr (__is_unsequenced_execution_policy_v<_RawExecutionPolicy> &&
                          __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
-      using __diff_t = __iter_diff_t<_ForwardIterator>;
+      using __diff_t = __iterator_difference_type<_ForwardIterator>;
       return __pstl::__simd_first<_Backend>(
           __first, __diff_t(0), __last - __first, [&__pred](_ForwardIterator __iter, __diff_t __i) {
             return __pred(__iter[__i]);
diff --git a/lib/libcxx/include/__pstl/cpu_algos/transform.h b/lib/libcxx/include/__pstl/cpu_algos/transform.h
index 979121be8c..30d117d754 100644
--- a/lib/libcxx/include/__pstl/cpu_algos/transform.h
+++ b/lib/libcxx/include/__pstl/cpu_algos/transform.h
@@ -84,9 +84,8 @@ struct __cpu_parallel_transform {
           __first,
           __last - __first,
           __result,
-          [&](__iter_reference<_ForwardIterator> __in_value, __iter_reference<_ForwardOutIterator> __out_value) {
-            __out_value = __op(__in_value);
-          });
+          [&](__iterator_reference<_ForwardIterator> __in_value,
+              __iterator_reference<_ForwardOutIterator> __out_value) { __out_value = __op(__in_value); });
     } else {
       return std::transform(__first, __last, __result, __op);
     }
@@ -138,9 +137,9 @@ struct __cpu_parallel_transform_binary {
           __last1 - __first1,
           __first2,
           __result,
-          [&](__iter_reference<_ForwardIterator1> __in1,
-              __iter_reference<_ForwardIterator2> __in2,
-              __iter_reference<_ForwardOutIterator> __out_value) { __out_value = __op(__in1, __in2); });
+          [&](__iterator_reference<_ForwardIterator1> __in1,
+              __iterator_reference<_ForwardIterator2> __in2,
+              __iterator_reference<_ForwardOutIterator> __out_value) { __out_value = __op(__in1, __in2); });
     } else {
       return std::transform(__first1, __last1, __first2, __result, __op);
     }
diff --git a/lib/libcxx/include/__pstl/cpu_algos/transform_reduce.h b/lib/libcxx/include/__pstl/cpu_algos/transform_reduce.h
index abd9d42a6f..edfb28b446 100644
--- a/lib/libcxx/include/__pstl/cpu_algos/transform_reduce.h
+++ b/lib/libcxx/include/__pstl/cpu_algos/transform_reduce.h
@@ -148,9 +148,10 @@ struct __cpu_parallel_transform_reduce_binary {
                          __has_random_access_iterator_category_or_concept<_ForwardIterator1>::value &&
                          __has_random_access_iterator_category_or_concept<_ForwardIterator2>::value) {
       return __pstl::__simd_transform_reduce<_Backend>(
-          __last1 - __first1, std::move(__init), std::move(__reduce), [&](__iter_diff_t<_ForwardIterator1> __i) {
-            return __transform(__first1[__i], __first2[__i]);
-          });
+          __last1 - __first1,
+          std::move(__init),
+          std::move(__reduce),
+          [&](__iterator_difference_type<_ForwardIterator1> __i) { return __transform(__first1[__i], __first2[__i]); });
     } else {
       return std::transform_reduce(
           std::move(__first1),
@@ -200,7 +201,7 @@ struct __cpu_parallel_transform_reduce {
           __last - __first,
           std::move(__init),
           std::move(__reduce),
-          [=, &__transform](__iter_diff_t<_ForwardIterator> __i) { return __transform(__first[__i]); });
+          [=, &__transform](__iterator_difference_type<_ForwardIterator> __i) { return __transform(__first[__i]); });
     } else {
       return std::transform_reduce(
           std::move(__first), std::move(__last), std::move(__init), std::move(__reduce), std::move(__transform));
diff --git a/lib/libcxx/include/__random/binomial_distribution.h b/lib/libcxx/include/__random/binomial_distribution.h
index b4b4340827..0712e4ef4a 100644
--- a/lib/libcxx/include/__random/binomial_distribution.h
+++ b/lib/libcxx/include/__random/binomial_distribution.h
@@ -97,13 +97,19 @@ public:
   }
 };
 
-// The LLVM C library provides this with conflicting `noexcept` attributes.
-#if !defined(_LIBCPP_MSVCRT_LIKE) && !defined(__LLVM_LIBC__)
-extern "C" double lgamma_r(double, int*);
+// Some libc declares the math functions to be `noexcept`.
+#if _LIBCPP_GLIBC_PREREQ(2, 8) || defined(__LLVM_LIBC__)
+#  define _LIBCPP_LGAMMA_R_NOEXCEPT _NOEXCEPT
+#else
+#  define _LIBCPP_LGAMMA_R_NOEXCEPT
+#endif
+
+#if !defined(_LIBCPP_MSVCRT_LIKE)
+extern "C" double lgamma_r(double, int*) _LIBCPP_LGAMMA_R_NOEXCEPT;
 #endif
 
 inline _LIBCPP_HIDE_FROM_ABI double __libcpp_lgamma(double __d) {
-#if defined(_LIBCPP_MSVCRT_LIKE) || defined(__LLVM_LIBC__)
+#if defined(_LIBCPP_MSVCRT_LIKE)
   return lgamma(__d);
 #else
   int __sign;
diff --git a/lib/libcxx/include/__random/mersenne_twister_engine.h b/lib/libcxx/include/__random/mersenne_twister_engine.h
index c60fe1529b..332e830e73 100644
--- a/lib/libcxx/include/__random/mersenne_twister_engine.h
+++ b/lib/libcxx/include/__random/mersenne_twister_engine.h
@@ -62,24 +62,6 @@ _LIBCPP_HIDE_FROM_ABI bool
 operator==(const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, _Bp, _Tp, _Cp, _Lp, _Fp>& __x,
            const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, _Bp, _Tp, _Cp, _Lp, _Fp>& __y);
 
-template <class _UInt,
-          size_t _Wp,
-          size_t _Np,
-          size_t _Mp,
-          size_t _Rp,
-          _UInt _Ap,
-          size_t _Up,
-          _UInt _Dp,
-          size_t _Sp,
-          _UInt _Bp,
-          size_t _Tp,
-          _UInt _Cp,
-          size_t _Lp,
-          _UInt _Fp>
-_LIBCPP_HIDE_FROM_ABI bool
-operator!=(const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, _Bp, _Tp, _Cp, _Lp, _Fp>& __x,
-           const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, _Bp, _Tp, _Cp, _Lp, _Fp>& __y);
-
 template <class _CharT,
           class _Traits,
           class _UInt,
@@ -194,14 +176,31 @@ public:
   _LIBCPP_HIDE_FROM_ABI explicit mersenne_twister_engine(_Sseq& __q) {
     seed(__q);
   }
-  _LIBCPP_HIDE_FROM_ABI void seed(result_type __sd = default_seed);
+  _LIBCPP_HIDE_FROM_ABI void seed(result_type __sd = default_seed) _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK {
+    __x_[0] = __sd & _Max;
+    for (size_t __i = 1; __i < __n; ++__i)
+      __x_[__i] = (__f * (__x_[__i - 1] ^ __rshift<__w - 2>(__x_[__i - 1])) + __i) & _Max;
+    __i_ = 0;
+  }
   template <class _Sseq, __enable_if_t<__is_seed_sequence<_Sseq, mersenne_twister_engine>::value, int> = 0>
   _LIBCPP_HIDE_FROM_ABI void seed(_Sseq& __q) {
     __seed(__q, integral_constant<unsigned, 1 + (__w - 1) / 32>());
   }
 
   // generating functions
-  _LIBCPP_HIDE_FROM_ABI result_type operator()();
+  _LIBCPP_HIDE_FROM_ABI result_type operator()() {
+    const size_t __j         = (__i_ + 1) % __n;
+    const result_type __mask = __r == _Dt ? result_type(~0) : (result_type(1) << __r) - result_type(1);
+    const result_type __yp   = (__x_[__i_] & ~__mask) | (__x_[__j] & __mask);
+    const size_t __k         = (__i_ + __m) % __n;
+    __x_[__i_]               = __x_[__k] ^ __rshift<1>(__yp) ^ (__a * (__yp & 1));
+    result_type __z          = __x_[__i_] ^ (__rshift<__u>(__x_[__i_]) & __d);
+    __i_                     = __j;
+    __z ^= __lshift<__s>(__z) & __b;
+    __z ^= __lshift<__t>(__z) & __c;
+    return __z ^ __rshift<__l>(__z);
+  }
+
   _LIBCPP_HIDE_FROM_ABI void discard(unsigned long long __z) {
     for (; __z; --__z)
       operator()();
@@ -225,24 +224,6 @@ public:
       const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, _Bp, _Tp, _Cp, _Lp, _Fp>& __x,
       const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, _Bp, _Tp, _Cp, _Lp, _Fp>& __y);
 
-  template <class _UInt,
-            size_t _Wp,
-            size_t _Np,
-            size_t _Mp,
-            size_t _Rp,
-            _UInt _Ap,
-            size_t _Up,
-            _UInt _Dp,
-            size_t _Sp,
-            _UInt _Bp,
-            size_t _Tp,
-            _UInt _Cp,
-            size_t _Lp,
-            _UInt _Fp>
-  friend bool operator!=(
-      const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, _Bp, _Tp, _Cp, _Lp, _Fp>& __x,
-      const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, _Bp, _Tp, _Cp, _Lp, _Fp>& __y);
-
   template <class _CharT,
             class _Traits,
             class _UInt,
@@ -285,9 +266,38 @@ public:
 
 private:
   template <class _Sseq>
-  _LIBCPP_HIDE_FROM_ABI void __seed(_Sseq& __q, integral_constant<unsigned, 1>);
+  _LIBCPP_HIDE_FROM_ABI void __seed(_Sseq& __q, integral_constant<unsigned, 1>) {
+    const unsigned __k = 1;
+    uint32_t __ar[__n * __k];
+    __q.generate(__ar, __ar + __n * __k);
+    for (size_t __i = 0; __i < __n; ++__i)
+      __x_[__i] = static_cast<result_type>(__ar[__i] & _Max);
+    const result_type __mask = __r == _Dt ? result_type(~0) : (result_type(1) << __r) - result_type(1);
+    __i_                     = 0;
+    if ((__x_[0] & ~__mask) == 0) {
+      for (size_t __i = 1; __i < __n; ++__i)
+        if (__x_[__i] != 0)
+          return;
+      __x_[0] = result_type(1) << (__w - 1);
+    }
+  }
+
   template <class _Sseq>
-  _LIBCPP_HIDE_FROM_ABI void __seed(_Sseq& __q, integral_constant<unsigned, 2>);
+  _LIBCPP_HIDE_FROM_ABI void __seed(_Sseq& __q, integral_constant<unsigned, 2>) {
+    const unsigned __k = 2;
+    uint32_t __ar[__n * __k];
+    __q.generate(__ar, __ar + __n * __k);
+    for (size_t __i = 0; __i < __n; ++__i)
+      __x_[__i] = static_cast<result_type>((__ar[2 * __i] + ((uint64_t)__ar[2 * __i + 1] << 32)) & _Max);
+    const result_type __mask = __r == _Dt ? result_type(~0) : (result_type(1) << __r) - result_type(1);
+    __i_                     = 0;
+    if ((__x_[0] & ~__mask) == 0) {
+      for (size_t __i = 1; __i < __n; ++__i)
+        if (__x_[__i] != 0)
+          return;
+      __x_[0] = result_type(1) << (__w - 1);
+    }
+  }
 
   template <size_t __count,
             __enable_if_t<__count< __w, int> = 0> _LIBCPP_HIDE_FROM_ABI static result_type __lshift(result_type __x) {
@@ -310,120 +320,6 @@ private:
   }
 };
 
-template <class _UIntType,
-          size_t __w,
-          size_t __n,
-          size_t __m,
-          size_t __r,
-          _UIntType __a,
-          size_t __u,
-          _UIntType __d,
-          size_t __s,
-          _UIntType __b,
-          size_t __t,
-          _UIntType __c,
-          size_t __l,
-          _UIntType __f>
-void mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::seed(
-    result_type __sd) _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK { // __w >= 2
-  __x_[0] = __sd & _Max;
-  for (size_t __i = 1; __i < __n; ++__i)
-    __x_[__i] = (__f * (__x_[__i - 1] ^ __rshift<__w - 2>(__x_[__i - 1])) + __i) & _Max;
-  __i_ = 0;
-}
-
-template <class _UIntType,
-          size_t __w,
-          size_t __n,
-          size_t __m,
-          size_t __r,
-          _UIntType __a,
-          size_t __u,
-          _UIntType __d,
-          size_t __s,
-          _UIntType __b,
-          size_t __t,
-          _UIntType __c,
-          size_t __l,
-          _UIntType __f>
-template <class _Sseq>
-void mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::__seed(
-    _Sseq& __q, integral_constant<unsigned, 1>) {
-  const unsigned __k = 1;
-  uint32_t __ar[__n * __k];
-  __q.generate(__ar, __ar + __n * __k);
-  for (size_t __i = 0; __i < __n; ++__i)
-    __x_[__i] = static_cast<result_type>(__ar[__i] & _Max);
-  const result_type __mask = __r == _Dt ? result_type(~0) : (result_type(1) << __r) - result_type(1);
-  __i_                     = 0;
-  if ((__x_[0] & ~__mask) == 0) {
-    for (size_t __i = 1; __i < __n; ++__i)
-      if (__x_[__i] != 0)
-        return;
-    __x_[0] = result_type(1) << (__w - 1);
-  }
-}
-
-template <class _UIntType,
-          size_t __w,
-          size_t __n,
-          size_t __m,
-          size_t __r,
-          _UIntType __a,
-          size_t __u,
-          _UIntType __d,
-          size_t __s,
-          _UIntType __b,
-          size_t __t,
-          _UIntType __c,
-          size_t __l,
-          _UIntType __f>
-template <class _Sseq>
-void mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::__seed(
-    _Sseq& __q, integral_constant<unsigned, 2>) {
-  const unsigned __k = 2;
-  uint32_t __ar[__n * __k];
-  __q.generate(__ar, __ar + __n * __k);
-  for (size_t __i = 0; __i < __n; ++__i)
-    __x_[__i] = static_cast<result_type>((__ar[2 * __i] + ((uint64_t)__ar[2 * __i + 1] << 32)) & _Max);
-  const result_type __mask = __r == _Dt ? result_type(~0) : (result_type(1) << __r) - result_type(1);
-  __i_                     = 0;
-  if ((__x_[0] & ~__mask) == 0) {
-    for (size_t __i = 1; __i < __n; ++__i)
-      if (__x_[__i] != 0)
-        return;
-    __x_[0] = result_type(1) << (__w - 1);
-  }
-}
-
-template <class _UIntType,
-          size_t __w,
-          size_t __n,
-          size_t __m,
-          size_t __r,
-          _UIntType __a,
-          size_t __u,
-          _UIntType __d,
-          size_t __s,
-          _UIntType __b,
-          size_t __t,
-          _UIntType __c,
-          size_t __l,
-          _UIntType __f>
-_UIntType
-mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::operator()() {
-  const size_t __j         = (__i_ + 1) % __n;
-  const result_type __mask = __r == _Dt ? result_type(~0) : (result_type(1) << __r) - result_type(1);
-  const result_type __yp   = (__x_[__i_] & ~__mask) | (__x_[__j] & __mask);
-  const size_t __k         = (__i_ + __m) % __n;
-  __x_[__i_]               = __x_[__k] ^ __rshift<1>(__yp) ^ (__a * (__yp & 1));
-  result_type __z          = __x_[__i_] ^ (__rshift<__u>(__x_[__i_]) & __d);
-  __i_                     = __j;
-  __z ^= __lshift<__s>(__z) & __b;
-  __z ^= __lshift<__t>(__z) & __c;
-  return __z ^ __rshift<__l>(__z);
-}
-
 template <class _UInt,
           size_t _Wp,
           size_t _Np,
diff --git a/lib/libcxx/include/__random/piecewise_constant_distribution.h b/lib/libcxx/include/__random/piecewise_constant_distribution.h
index c5bfa8dc3a..3faf339325 100644
--- a/lib/libcxx/include/__random/piecewise_constant_distribution.h
+++ b/lib/libcxx/include/__random/piecewise_constant_distribution.h
@@ -9,9 +9,11 @@
 #ifndef _LIBCPP___RANDOM_PIECEWISE_CONSTANT_DISTRIBUTION_H
 #define _LIBCPP___RANDOM_PIECEWISE_CONSTANT_DISTRIBUTION_H
 
+#include <__algorithm/copy_n.h>
 #include <__algorithm/upper_bound.h>
 #include <__config>
 #include <__cstddef/ptrdiff_t.h>
+#include <__iterator/back_insert_iterator.h>
 #include <__random/is_valid.h>
 #include <__random/uniform_real_distribution.h>
 #include <__vector/vector.h>
@@ -190,8 +192,7 @@ piecewise_constant_distribution<_RealType>::param_type::param_type(
     __areas_.assign(1, 0.0);
   } else {
     __densities_.reserve(__b_.size() - 1);
-    for (size_t __i = 0; __i < __b_.size() - 1; ++__i, ++__f_w)
-      __densities_.push_back(*__f_w);
+    std::copy_n(__f_w, __b_.size() - 1, std::back_inserter(__densities_));
     __init();
   }
 }
diff --git a/lib/libcxx/include/__random/piecewise_linear_distribution.h b/lib/libcxx/include/__random/piecewise_linear_distribution.h
index a9906430c0..8aa3f19ca9 100644
--- a/lib/libcxx/include/__random/piecewise_linear_distribution.h
+++ b/lib/libcxx/include/__random/piecewise_linear_distribution.h
@@ -9,9 +9,11 @@
 #ifndef _LIBCPP___RANDOM_PIECEWISE_LINEAR_DISTRIBUTION_H
 #define _LIBCPP___RANDOM_PIECEWISE_LINEAR_DISTRIBUTION_H
 
+#include <__algorithm/copy_n.h>
 #include <__algorithm/upper_bound.h>
 #include <__config>
 #include <__cstddef/ptrdiff_t.h>
+#include <__iterator/back_insert_iterator.h>
 #include <__random/is_valid.h>
 #include <__random/uniform_real_distribution.h>
 #include <__vector/comparison.h>
@@ -194,8 +196,7 @@ piecewise_linear_distribution<_RealType>::param_type::param_type(
     __areas_.assign(1, 0.0);
   } else {
     __densities_.reserve(__b_.size());
-    for (size_t __i = 0; __i < __b_.size(); ++__i, ++__f_w)
-      __densities_.push_back(*__f_w);
+    std::copy_n(__f_w, __b_.size(), std::back_inserter(__densities_));
     __init();
   }
 }
diff --git a/lib/libcxx/include/__ranges/adjacent_transform_view.h b/lib/libcxx/include/__ranges/adjacent_transform_view.h
new file mode 100644
index 0000000000..11b1176824
--- /dev/null
+++ b/lib/libcxx/include/__ranges/adjacent_transform_view.h
@@ -0,0 +1,406 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___RANGES_ADJACENT_TRANSFORM_VIEW_H
+#define _LIBCPP___RANGES_ADJACENT_TRANSFORM_VIEW_H
+
+#include <__config>
+
+#include <__algorithm/min.h>
+#include <__compare/three_way_comparable.h>
+#include <__concepts/constructible.h>
+#include <__concepts/convertible_to.h>
+#include <__concepts/derived_from.h>
+#include <__concepts/equality_comparable.h>
+#include <__concepts/invocable.h>
+#include <__cstddef/size_t.h>
+#include <__functional/bind_back.h>
+#include <__functional/invoke.h>
+#include <__functional/operations.h>
+#include <__iterator/concepts.h>
+#include <__iterator/incrementable_traits.h>
+#include <__iterator/iter_move.h>
+#include <__iterator/iter_swap.h>
+#include <__iterator/iterator_traits.h>
+#include <__iterator/next.h>
+#include <__iterator/prev.h>
+#include <__memory/addressof.h>
+#include <__ranges/access.h>
+#include <__ranges/adjacent_view.h>
+#include <__ranges/all.h>
+#include <__ranges/concepts.h>
+#include <__ranges/empty_view.h>
+#include <__ranges/movable_box.h>
+#include <__ranges/range_adaptor.h>
+#include <__ranges/size.h>
+#include <__ranges/view_interface.h>
+#include <__ranges/zip_transform_view.h>
+#include <__type_traits/common_type.h>
+#include <__type_traits/decay.h>
+#include <__type_traits/is_nothrow_constructible.h>
+#include <__type_traits/is_object.h>
+#include <__type_traits/is_referenceable.h>
+#include <__type_traits/make_unsigned.h>
+#include <__type_traits/maybe_const.h>
+#include <__utility/declval.h>
+#include <__utility/forward.h>
+#include <__utility/in_place.h>
+#include <__utility/integer_sequence.h>
+#include <__utility/move.h>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER >= 23
+
+namespace ranges {
+
+template <class _Fn, size_t _Np>
+struct __apply_n {
+  template <class _Tp, size_t... _Is>
+  static auto __apply(index_sequence<_Is...>) -> invoke_result_t<_Fn, decltype((void)_Is, std::declval<_Tp>())...>;
+
+  template <class _Tp>
+  static auto operator()(_Tp&&) -> decltype(__apply<_Tp>(make_index_sequence<_Np>{}));
+};
+
+template <forward_range _View, move_constructible _Fn, size_t _Np>
+  requires view<_View> && (_Np > 0) && is_object_v<_Fn> &&
+           regular_invocable<__apply_n<_Fn&, _Np>, range_reference_t<_View>> &&
+           __referenceable<invoke_result_t<__apply_n<_Fn&, _Np>, range_reference_t<_View>>>
+class adjacent_transform_view : public view_interface<adjacent_transform_view<_View, _Fn, _Np>> {
+private:
+  _LIBCPP_NO_UNIQUE_ADDRESS adjacent_view<_View, _Np> __inner_;
+  _LIBCPP_NO_UNIQUE_ADDRESS __movable_box<_Fn> __fun_;
+
+  using _InnerView _LIBCPP_NODEBUG = adjacent_view<_View, _Np>;
+
+  template <bool _Const>
+  using __inner_iterator _LIBCPP_NODEBUG = iterator_t<__maybe_const<_Const, _InnerView>>;
+
+  template <bool _Const>
+  using __inner_sentinel _LIBCPP_NODEBUG = sentinel_t<__maybe_const<_Const, _InnerView>>;
+
+  template <bool>
+  class __iterator;
+
+  template <bool>
+  class __sentinel;
+
+public:
+  _LIBCPP_HIDE_FROM_ABI adjacent_transform_view() = default;
+
+  _LIBCPP_HIDE_FROM_ABI constexpr explicit adjacent_transform_view(_View __base, _Fn __fun)
+      : __inner_(std::move(__base)), __fun_(std::in_place, std::move(__fun)) {}
+
+  _LIBCPP_HIDE_FROM_ABI constexpr _View base() const&
+    requires copy_constructible<_View>
+  {
+    return __inner_.base();
+  }
+  _LIBCPP_HIDE_FROM_ABI constexpr _View base() && { return std::move(__inner_).base(); }
+
+  _LIBCPP_HIDE_FROM_ABI constexpr auto begin() { return __iterator<false>(*this, __inner_.begin()); }
+
+  _LIBCPP_HIDE_FROM_ABI constexpr auto begin() const
+    requires range<const _InnerView> && regular_invocable<__apply_n<const _Fn&, _Np>, range_reference_t<const _View>>
+  {
+    return __iterator<true>(*this, __inner_.begin());
+  }
+
+  _LIBCPP_HIDE_FROM_ABI constexpr auto end() {
+    if constexpr (common_range<_InnerView>) {
+      return __iterator<false>(*this, __inner_.end());
+    } else {
+      return __sentinel<false>(__inner_.end());
+    }
+  }
+
+  _LIBCPP_HIDE_FROM_ABI constexpr auto end() const
+    requires range<const _InnerView> && regular_invocable<__apply_n<const _Fn&, _Np>, range_reference_t<const _View>>
+  {
+    if constexpr (common_range<const _InnerView>) {
+      return __iterator<true>(*this, __inner_.end());
+    } else {
+      return __sentinel<true>(__inner_.end());
+    }
+  }
+
+  _LIBCPP_HIDE_FROM_ABI constexpr auto size()
+    requires sized_range<_InnerView>
+  {
+    return __inner_.size();
+  }
+
+  _LIBCPP_HIDE_FROM_ABI constexpr auto size() const
+    requires sized_range<const _InnerView>
+  {
+    return __inner_.size();
+  }
+};
+
+template <forward_range _View, move_constructible _Fn, size_t _Np>
+  requires view<_View> && (_Np > 0) && is_object_v<_Fn> &&
+           regular_invocable<__apply_n<_Fn&, _Np>, range_reference_t<_View>> &&
+           __referenceable<invoke_result_t<__apply_n<_Fn&, _Np>, range_reference_t<_View>>>
+template <bool _Const>
+class adjacent_transform_view<_View, _Fn, _Np>::__iterator {
+  friend adjacent_transform_view;
+
+  using _Parent _LIBCPP_NODEBUG = __maybe_const<_Const, adjacent_transform_view>;
+  using _Base _LIBCPP_NODEBUG   = __maybe_const<_Const, _View>;
+
+  _Parent* __parent_ = nullptr;
+  __inner_iterator<_Const> __inner_;
+
+  _LIBCPP_HIDE_FROM_ABI constexpr __iterator(_Parent& __parent, __inner_iterator<_Const> __inner)
+      : __parent_(std::addressof(__parent)), __inner_(std::move(__inner)) {}
+
+  static consteval auto __get_iterator_category() {
+    using _Cat = iterator_traits<iterator_t<_Base>>::iterator_category;
+    if constexpr (!is_reference_v<
+                      invoke_result_t<__apply_n<__maybe_const<_Const, _Fn>&, _Np>, range_reference_t<_Base>>>)
+      return input_iterator_tag{};
+    else if constexpr (derived_from<_Cat, random_access_iterator_tag>)
+      return random_access_iterator_tag{};
+    else if constexpr (derived_from<_Cat, bidirectional_iterator_tag>)
+      return bidirectional_iterator_tag{};
+    else if constexpr (derived_from<_Cat, forward_iterator_tag>)
+      return forward_iterator_tag{};
+    else
+      return input_iterator_tag{};
+  }
+
+  template <size_t... _Is>
+  static consteval bool __noexcept_dereference(index_sequence<_Is...>) {
+    return noexcept(std::invoke(
+        std::declval<__maybe_const<_Const, _Fn>&>(), ((void)_Is, *std::declval<iterator_t<_Base> const&>())...));
+  }
+
+public:
+  using iterator_category = decltype(__get_iterator_category());
+  using iterator_concept  = typename __inner_iterator<_Const>::iterator_concept;
+  using value_type =
+      remove_cvref_t<invoke_result_t<__apply_n<__maybe_const<_Const, _Fn>&, _Np>, range_reference_t<_Base>>>;
+  using difference_type = range_difference_t<_Base>;
+
+  _LIBCPP_HIDE_FROM_ABI __iterator() = default;
+
+  _LIBCPP_HIDE_FROM_ABI constexpr __iterator(__iterator<!_Const> __i)
+    requires _Const && convertible_to<__inner_iterator<false>, __inner_iterator<true>>
+      : __parent_(__i.__parent_), __inner_(std::move(__i.__inner_)) {}
+
+  _LIBCPP_HIDE_FROM_ABI constexpr decltype(auto) operator*() const
+      noexcept(__noexcept_dereference(make_index_sequence<_Np>{})) {
+    return std::apply(
+        [&](const auto&... __iters) -> decltype(auto) { return std::invoke(*__parent_->__fun_, *__iters...); },
+        __adjacent_view_iter_access::__get_current(__inner_));
+  }
+
+  _LIBCPP_HIDE_FROM_ABI constexpr __iterator& operator++() {
+    ++__inner_;
+    return *this;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI constexpr __iterator operator++(int) {
+    auto __tmp = *this;
+    ++*this;
+    return __tmp;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI constexpr __iterator& operator--()
+    requires bidirectional_range<_Base>
+  {
+    --__inner_;
+    return *this;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI constexpr __iterator operator--(int)
+    requires bidirectional_range<_Base>
+  {
+    auto __tmp = *this;
+    --*this;
+    return __tmp;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI constexpr __iterator& operator+=(difference_type __x)
+    requires random_access_range<_Base>
+  {
+    __inner_ += __x;
+    return *this;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI constexpr __iterator& operator-=(difference_type __x)
+    requires random_access_range<_Base>
+  {
+    __inner_ -= __x;
+    return *this;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI constexpr decltype(auto) operator[](difference_type __n) const
+    requires random_access_range<_Base>
+  {
+    return std::apply(
+        [&](const auto&... __iters) -> decltype(auto) { return std::invoke(*__parent_->__fun_, __iters[__n]...); },
+        __adjacent_view_iter_access::__get_current(__inner_));
+  }
+
+  _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const __iterator& __x, const __iterator& __y) {
+    return __x.__inner_ == __y.__inner_;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator<(const __iterator& __x, const __iterator& __y)
+    requires random_access_range<_Base>
+  {
+    return __x.__inner_ < __y.__inner_;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator>(const __iterator& __x, const __iterator& __y)
+    requires random_access_range<_Base>
+  {
+    return __x.__inner_ > __y.__inner_;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator<=(const __iterator& __x, const __iterator& __y)
+    requires random_access_range<_Base>
+  {
+    return __x.__inner_ <= __y.__inner_;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator>=(const __iterator& __x, const __iterator& __y)
+    requires random_access_range<_Base>
+  {
+    return __x.__inner_ >= __y.__inner_;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI friend constexpr auto operator<=>(const __iterator& __x, const __iterator& __y)
+    requires random_access_range<_Base> && three_way_comparable<__inner_iterator<_Const>>
+  {
+    return __x.__inner_ <=> __y.__inner_;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator+(const __iterator& __i, difference_type __n)
+    requires random_access_range<_Base>
+  {
+    return __iterator(*__i.__parent_, __i.__inner_ + __n);
+  }
+
+  _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator+(difference_type __n, const __iterator& __i)
+    requires random_access_range<_Base>
+  {
+    return __iterator(*__i.__parent_, __i.__inner_ + __n);
+  }
+
+  _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator-(const __iterator& __i, difference_type __n)
+    requires random_access_range<_Base>
+  {
+    return __iterator(*__i.__parent_, __i.__inner_ - __n);
+  }
+
+  _LIBCPP_HIDE_FROM_ABI friend constexpr difference_type operator-(const __iterator& __x, const __iterator& __y)
+    requires sized_sentinel_for<__inner_iterator<_Const>, __inner_iterator<_Const>>
+  {
+    return __x.__inner_ - __y.__inner_;
+  }
+};
+
+template <forward_range _View, move_constructible _Fn, size_t _Np>
+  requires view<_View> && (_Np > 0) && is_object_v<_Fn> &&
+           regular_invocable<__apply_n<_Fn&, _Np>, range_reference_t<_View>> &&
+           __referenceable<invoke_result_t<__apply_n<_Fn&, _Np>, range_reference_t<_View>>>
+template <bool _Const>
+class adjacent_transform_view<_View, _Fn, _Np>::__sentinel {
+  friend adjacent_transform_view;
+
+  __inner_sentinel<_Const> __inner_;
+
+  _LIBCPP_HIDE_FROM_ABI constexpr explicit __sentinel(__inner_sentinel<_Const> __inner)
+      : __inner_(std::move(__inner)) {}
+
+public:
+  _LIBCPP_HIDE_FROM_ABI __sentinel() = default;
+
+  _LIBCPP_HIDE_FROM_ABI constexpr __sentinel(__sentinel<!_Const> __i)
+    requires _Const && convertible_to<__inner_sentinel<false>, __inner_sentinel<_Const>>
+      : __inner_(std::move(__i.__inner_)) {}
+
+  template <bool _OtherConst>
+    requires sentinel_for<__inner_sentinel<_Const>, __inner_iterator<_OtherConst>>
+  _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const __iterator<_OtherConst>& __x, const __sentinel& __y) {
+    return __x.__inner_ == __y.__inner_;
+  }
+
+  template <bool _OtherConst>
+    requires sized_sentinel_for<__inner_sentinel<_Const>, __inner_iterator<_OtherConst>>
+  _LIBCPP_HIDE_FROM_ABI friend constexpr range_difference_t<__maybe_const<_OtherConst, _InnerView>>
+  operator-(const __iterator<_OtherConst>& __x, const __sentinel& __y) {
+    return __x.__inner_ - __y.__inner_;
+  }
+
+  template <bool _OtherConst>
+    requires sized_sentinel_for<__inner_sentinel<_Const>, __inner_iterator<_OtherConst>>
+  _LIBCPP_HIDE_FROM_ABI friend constexpr range_difference_t<__maybe_const<_OtherConst, _InnerView>>
+  operator-(const __sentinel& __x, const __iterator<_OtherConst>& __y) {
+    return __x.__inner_ - __y.__inner_;
+  }
+};
+
+namespace views {
+namespace __adjacent_transform {
+
+template <size_t _Np>
+struct __fn : __range_adaptor_closure<__fn<_Np>> {
+  template <class _Range, class _Fn>
+    requires(_Np == 0 && forward_range<_Range &&>)
+  _LIBCPP_HIDE_FROM_ABI static constexpr auto
+  operator()(_Range&&, _Fn&& __fn) noexcept(noexcept(views::zip_transform(std::forward<_Fn>(__fn))))
+      -> decltype(views::zip_transform(std::forward<_Fn>(__fn))) {
+    return views::zip_transform(std::forward<_Fn>(__fn));
+  }
+
+  template <class _Range, class _Fn>
+  _LIBCPP_HIDE_FROM_ABI static constexpr auto operator()(_Range&& __range, _Fn&& __fn) noexcept(
+      noexcept(adjacent_transform_view<views::all_t<_Range&&>, decay_t<_Fn>, _Np>(
+          std::forward<_Range>(__range), std::forward<_Fn>(__fn))))
+      -> decltype(adjacent_transform_view<views::all_t<_Range&&>, decay_t<_Fn>, _Np>(
+          std::forward<_Range>(__range), std::forward<_Fn>(__fn))) {
+    return adjacent_transform_view<views::all_t<_Range&&>, decay_t<_Fn>, _Np>(
+        std::forward<_Range>(__range), std::forward<_Fn>(__fn));
+  }
+
+  template <class _Fn>
+    requires constructible_from<decay_t<_Fn>, _Fn>
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Fn&& __f) const
+      noexcept(is_nothrow_constructible_v<decay_t<_Fn>, _Fn>) {
+    return __pipeable(std::__bind_back(*this, std::forward<_Fn>(__f)));
+  }
+};
+
+} // namespace __adjacent_transform
+inline namespace __cpo {
+template <size_t _Np>
+inline constexpr auto adjacent_transform = __adjacent_transform::__fn<_Np>{};
+inline constexpr auto pairwise_transform = adjacent_transform<2>;
+} // namespace __cpo
+} // namespace views
+} // namespace ranges
+
+#endif // _LIBCPP_STD_VER >= 23
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___RANGES_ADJACENT_TRANSFORM_VIEW_H
diff --git a/lib/libcxx/include/__ranges/adjacent_view.h b/lib/libcxx/include/__ranges/adjacent_view.h
new file mode 100644
index 0000000000..40474b85c7
--- /dev/null
+++ b/lib/libcxx/include/__ranges/adjacent_view.h
@@ -0,0 +1,419 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___RANGES_ADJACENT_VIEW_H
+#define _LIBCPP___RANGES_ADJACENT_VIEW_H
+
+#include <__config>
+
+#include <__algorithm/min.h>
+#include <__compare/three_way_comparable.h>
+#include <__concepts/constructible.h>
+#include <__concepts/convertible_to.h>
+#include <__concepts/equality_comparable.h>
+#include <__cstddef/size_t.h>
+#include <__functional/invoke.h>
+#include <__functional/operations.h>
+#include <__iterator/concepts.h>
+#include <__iterator/incrementable_traits.h>
+#include <__iterator/iter_move.h>
+#include <__iterator/iter_swap.h>
+#include <__iterator/iterator_traits.h>
+#include <__iterator/next.h>
+#include <__iterator/prev.h>
+#include <__ranges/access.h>
+#include <__ranges/all.h>
+#include <__ranges/concepts.h>
+#include <__ranges/empty_view.h>
+#include <__ranges/enable_borrowed_range.h>
+#include <__ranges/range_adaptor.h>
+#include <__ranges/size.h>
+#include <__ranges/view_interface.h>
+#include <__tuple/tuple_transform.h>
+#include <__type_traits/common_type.h>
+#include <__type_traits/is_nothrow_constructible.h>
+#include <__type_traits/make_unsigned.h>
+#include <__type_traits/maybe_const.h>
+#include <__utility/declval.h>
+#include <__utility/forward.h>
+#include <__utility/integer_sequence.h>
+#include <__utility/move.h>
+#include <array>
+#include <tuple>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER >= 23
+
+namespace ranges {
+
+template <forward_range _View, size_t _Np>
+  requires view<_View> && (_Np > 0)
+class adjacent_view : public view_interface<adjacent_view<_View, _Np>> {
+private:
+  _LIBCPP_NO_UNIQUE_ADDRESS _View __base_ = _View();
+
+  template <bool>
+  class __iterator;
+
+  template <bool>
+  class __sentinel;
+
+  struct __as_sentinel {};
+
+public:
+  _LIBCPP_HIDE_FROM_ABI adjacent_view()
+    requires default_initializable<_View>
+  = default;
+
+  _LIBCPP_HIDE_FROM_ABI constexpr explicit adjacent_view(_View __base) : __base_(std::move(__base)) {}
+
+  _LIBCPP_HIDE_FROM_ABI constexpr _View base() const&
+    requires copy_constructible<_View>
+  {
+    return __base_;
+  }
+  _LIBCPP_HIDE_FROM_ABI constexpr _View base() && { return std::move(__base_); }
+
+  _LIBCPP_HIDE_FROM_ABI constexpr auto begin()
+    requires(!__simple_view<_View>)
+  {
+    return __iterator<false>(ranges::begin(__base_), ranges::end(__base_));
+  }
+
+  _LIBCPP_HIDE_FROM_ABI constexpr auto begin() const
+    requires range<const _View> // LWG4482 This is under-constrained.
+  {
+    return __iterator<true>(ranges::begin(__base_), ranges::end(__base_));
+  }
+
+  _LIBCPP_HIDE_FROM_ABI constexpr auto end()
+    requires(!__simple_view<_View>)
+  {
+    if constexpr (common_range<_View>) {
+      return __iterator<false>(__as_sentinel{}, ranges::begin(__base_), ranges::end(__base_));
+    } else {
+      return __sentinel<false>(ranges::end(__base_));
+    }
+  }
+
+  _LIBCPP_HIDE_FROM_ABI constexpr auto end() const
+    requires range<const _View> // LWG4482 This is under-constrained.
+  {
+    if constexpr (common_range<const _View>) {
+      return __iterator<true>(__as_sentinel{}, ranges::begin(__base_), ranges::end(__base_));
+    } else {
+      return __sentinel<true>(ranges::end(__base_));
+    }
+  }
+
+  _LIBCPP_HIDE_FROM_ABI constexpr auto size()
+    requires sized_range<_View>
+  {
+    using _ST = decltype(ranges::size(__base_));
+    using _CT = common_type_t<_ST, size_t>;
+    auto __sz = static_cast<_CT>(ranges::size(__base_));
+    __sz -= std::min<_CT>(__sz, _Np - 1);
+    return static_cast<_ST>(__sz);
+  }
+
+  _LIBCPP_HIDE_FROM_ABI constexpr auto size() const
+    requires sized_range<const _View>
+  {
+    using _ST = decltype(ranges::size(__base_));
+    using _CT = common_type_t<_ST, size_t>;
+    auto __sz = static_cast<_CT>(ranges::size(__base_));
+    __sz -= std::min<_CT>(__sz, _Np - 1);
+    return static_cast<_ST>(__sz);
+  }
+};
+
+struct __adjacent_view_iter_access {
+  template <class _Iter>
+  _LIBCPP_HIDE_FROM_ABI constexpr static auto& __get_current(_Iter& __it) noexcept {
+    return __it.__current_;
+  }
+};
+
+template <forward_range _View, size_t _Np>
+  requires view<_View> && (_Np > 0)
+template <bool _Const>
+class adjacent_view<_View, _Np>::__iterator {
+  friend __adjacent_view_iter_access;
+  friend adjacent_view;
+  using _Base _LIBCPP_NODEBUG              = __maybe_const<_Const, _View>;
+  array<iterator_t<_Base>, _Np> __current_ = array<iterator_t<_Base>, _Np>();
+
+  _LIBCPP_HIDE_FROM_ABI constexpr __iterator(iterator_t<_Base> __first, sentinel_t<_Base> __last) {
+    __current_[0] = __first;
+    for (size_t __i = 1; __i < _Np; ++__i) {
+      __current_[__i] = ranges::next(__current_[__i - 1], 1, __last);
+    }
+  }
+
+  _LIBCPP_HIDE_FROM_ABI constexpr __iterator(__as_sentinel, iterator_t<_Base> __first, iterator_t<_Base> __last) {
+    if constexpr (!bidirectional_range<_Base>) {
+      __current_.fill(__last);
+    } else {
+      __current_[_Np - 1] = __last;
+      for (int __i = static_cast<int>(_Np) - 2; __i >= 0; --__i) {
+        __current_[__i] = ranges::prev(__current_[__i + 1], 1, __first);
+      }
+    }
+  }
+
+  template <class _Iter, size_t... _Is>
+  _LIBCPP_HIDE_FROM_ABI explicit constexpr __iterator(_Iter&& __i, index_sequence<_Is...>)
+      : __current_{std::move(__i.__current_[_Is])...} {}
+
+  static consteval auto __get_iterator_concept() {
+    if constexpr (random_access_range<_Base>)
+      return random_access_iterator_tag{};
+    else if constexpr (bidirectional_range<_Base>)
+      return bidirectional_iterator_tag{};
+    else
+      return forward_iterator_tag{};
+  }
+
+  template <class _Tp, size_t _Index>
+  using __always _LIBCPP_NODEBUG = _Tp;
+
+  template <class _Tp, size_t... _Is>
+  static auto __repeat_tuple_helper(index_sequence<_Is...>) -> tuple<__always<_Tp, _Is>...>;
+
+public:
+  using iterator_category = input_iterator_tag;
+  using iterator_concept  = decltype(__get_iterator_concept());
+  using value_type        = decltype(__repeat_tuple_helper<range_value_t<_Base>>(make_index_sequence<_Np>{}));
+  using difference_type   = range_difference_t<_Base>;
+
+  _LIBCPP_HIDE_FROM_ABI __iterator() = default;
+  _LIBCPP_HIDE_FROM_ABI constexpr __iterator(__iterator<!_Const> __i)
+    requires _Const && convertible_to<iterator_t<_View>, iterator_t<const _View>>
+      : __iterator(std::move(__i), make_index_sequence<_Np>{}) {}
+
+  _LIBCPP_HIDE_FROM_ABI constexpr auto operator*() const {
+    return std::__tuple_transform([](auto& __i) -> decltype(auto) { return *__i; }, __current_);
+  }
+
+  _LIBCPP_HIDE_FROM_ABI constexpr __iterator& operator++() {
+    for (auto& __i : __current_) {
+      ++__i;
+    }
+    return *this;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI constexpr __iterator operator++(int) {
+    auto __tmp = *this;
+    ++*this;
+    return __tmp;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI constexpr __iterator& operator--()
+    requires bidirectional_range<_Base>
+  {
+    for (auto& __i : __current_) {
+      --__i;
+    }
+    return *this;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI constexpr __iterator operator--(int)
+    requires bidirectional_range<_Base>
+  {
+    auto __tmp = *this;
+    --*this;
+    return __tmp;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI constexpr __iterator& operator+=(difference_type __x)
+    requires random_access_range<_Base>
+  {
+    for (auto& __i : __current_) {
+      __i += __x;
+    }
+    return *this;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI constexpr __iterator& operator-=(difference_type __x)
+    requires random_access_range<_Base>
+  {
+    for (auto& __i : __current_) {
+      __i -= __x;
+    }
+    return *this;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI constexpr auto operator[](difference_type __n) const
+    requires random_access_range<_Base>
+  {
+    return std::__tuple_transform([&](auto& __i) -> decltype(auto) { return __i[__n]; }, __current_);
+  }
+
+  _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const __iterator& __x, const __iterator& __y) {
+    return __x.__current_.back() == __y.__current_.back();
+  }
+
+  _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator<(const __iterator& __x, const __iterator& __y)
+    requires random_access_range<_Base>
+  {
+    return __x.__current_.back() < __y.__current_.back();
+  }
+
+  _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator>(const __iterator& __x, const __iterator& __y)
+    requires random_access_range<_Base>
+  {
+    return __y < __x;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator<=(const __iterator& __x, const __iterator& __y)
+    requires random_access_range<_Base>
+  {
+    return !(__y < __x);
+  }
+
+  _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator>=(const __iterator& __x, const __iterator& __y)
+    requires random_access_range<_Base>
+  {
+    return !(__x < __y);
+  }
+
+  _LIBCPP_HIDE_FROM_ABI friend constexpr auto operator<=>(const __iterator& __x, const __iterator& __y)
+    requires random_access_range<_Base> && three_way_comparable<iterator_t<_Base>>
+  {
+    return __x.__current_.back() <=> __y.__current_.back();
+  }
+
+  _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator+(const __iterator& __i, difference_type __n)
+    requires random_access_range<_Base>
+  {
+    auto __r = __i;
+    __r += __n;
+    return __r;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator+(difference_type __n, const __iterator& __i)
+    requires random_access_range<_Base>
+  {
+    return __i + __n;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator-(const __iterator& __i, difference_type __n)
+    requires random_access_range<_Base>
+  {
+    auto __r = __i;
+    __r -= __n;
+    return __r;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI friend constexpr difference_type operator-(const __iterator& __x, const __iterator& __y)
+    requires sized_sentinel_for<iterator_t<_Base>, iterator_t<_Base>>
+  {
+    return __x.__current_.back() - __y.__current_.back();
+  }
+
+  _LIBCPP_HIDE_FROM_ABI friend constexpr auto iter_move(const __iterator& __i) noexcept(
+      noexcept(ranges::iter_move(std::declval<const iterator_t<_Base>&>())) &&
+      is_nothrow_move_constructible_v<range_rvalue_reference_t<_Base>>) {
+    return std::__tuple_transform(ranges::iter_move, __i.__current_);
+  }
+
+  _LIBCPP_HIDE_FROM_ABI friend constexpr void iter_swap(const __iterator& __l, const __iterator& __r) noexcept(
+      noexcept(ranges::iter_swap(std::declval<iterator_t<_Base>>(), std::declval<iterator_t<_Base>>())))
+    requires indirectly_swappable<iterator_t<_Base>>
+  {
+    for (size_t __i = 0; __i < _Np; ++__i) {
+      ranges::iter_swap(__l.__current_[__i], __r.__current_[__i]);
+    }
+  }
+};
+
+template <forward_range _View, size_t _Np>
+  requires view<_View> && (_Np > 0)
+template <bool _Const>
+class adjacent_view<_View, _Np>::__sentinel {
+  friend adjacent_view;
+  using _Base _LIBCPP_NODEBUG = __maybe_const<_Const, _View>;
+  sentinel_t<_Base> __end_    = sentinel_t<_Base>();
+
+  _LIBCPP_HIDE_FROM_ABI constexpr explicit __sentinel(sentinel_t<_Base> __end) { __end_ = std::move(__end); }
+
+public:
+  _LIBCPP_HIDE_FROM_ABI __sentinel() = default;
+
+  _LIBCPP_HIDE_FROM_ABI constexpr __sentinel(__sentinel<!_Const> __i)
+    requires _Const && convertible_to<sentinel_t<_View>, sentinel_t<_Base>>
+      : __end_(std::move(__i.__end_)) {}
+
+  template <bool _OtherConst>
+    requires sentinel_for<sentinel_t<_Base>, iterator_t<__maybe_const<_OtherConst, _View>>>
+  _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const __iterator<_OtherConst>& __x, const __sentinel& __y) {
+    return __x.__current_.back() == __y.__end_;
+  }
+
+  template <bool _OtherConst>
+    requires sized_sentinel_for<sentinel_t<_Base>, iterator_t<__maybe_const<_OtherConst, _View>>>
+  _LIBCPP_HIDE_FROM_ABI friend constexpr range_difference_t<__maybe_const<_OtherConst, _View>>
+  operator-(const __iterator<_OtherConst>& __x, const __sentinel& __y) {
+    return __x.__current_.back() - __y.__end_;
+  }
+
+  template <bool _OtherConst>
+    requires sized_sentinel_for<sentinel_t<_Base>, iterator_t<__maybe_const<_OtherConst, _View>>>
+  _LIBCPP_HIDE_FROM_ABI friend constexpr range_difference_t<__maybe_const<_OtherConst, _View>>
+  operator-(const __sentinel& __y, const __iterator<_OtherConst>& __x) {
+    return __y.__end_ - __x.__current_.back();
+  }
+};
+
+template <class _View, size_t _Np>
+constexpr bool enable_borrowed_range<adjacent_view<_View, _Np>> = enable_borrowed_range<_View>;
+
+namespace views {
+namespace __adjacent {
+
+template <size_t _Np>
+struct __fn : __range_adaptor_closure<__fn<_Np>> {
+  template <class _Range>
+    requires(_Np == 0 && forward_range<_Range &&>)
+  _LIBCPP_HIDE_FROM_ABI static constexpr auto operator()(_Range&&) noexcept {
+    return empty_view<tuple<>>{};
+  }
+
+  template <class _Ranges>
+  _LIBCPP_HIDE_FROM_ABI static constexpr auto operator()(_Ranges&& __range) noexcept(
+      noexcept(adjacent_view<views::all_t<_Ranges&&>, _Np>(std::forward<_Ranges>(__range))))
+      -> decltype(adjacent_view<views::all_t<_Ranges&&>, _Np>(std::forward<_Ranges>(__range))) {
+    return adjacent_view<views::all_t<_Ranges&&>, _Np>(std::forward<_Ranges>(__range));
+  }
+};
+
+} // namespace __adjacent
+inline namespace __cpo {
+template <size_t _Np>
+inline constexpr auto adjacent = __adjacent::__fn<_Np>{};
+inline constexpr auto pairwise = adjacent<2>;
+} // namespace __cpo
+} // namespace views
+} // namespace ranges
+
+#endif // _LIBCPP_STD_VER >= 23
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___RANGES_ADJACENT_VIEW_H
diff --git a/lib/libcxx/include/__ranges/as_rvalue_view.h b/lib/libcxx/include/__ranges/as_rvalue_view.h
index 5849a6c368..a553f39998 100644
--- a/lib/libcxx/include/__ranges/as_rvalue_view.h
+++ b/lib/libcxx/include/__ranges/as_rvalue_view.h
@@ -48,27 +48,27 @@ public:
 
   _LIBCPP_HIDE_FROM_ABI constexpr explicit as_rvalue_view(_View __base) : __base_(std::move(__base)) {}
 
-  _LIBCPP_HIDE_FROM_ABI constexpr _View base() const&
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _View base() const&
     requires copy_constructible<_View>
   {
     return __base_;
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr _View base() && { return std::move(__base_); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _View base() && { return std::move(__base_); }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr auto begin()
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto begin()
     requires(!__simple_view<_View>)
   {
     return move_iterator(ranges::begin(__base_));
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr auto begin() const
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto begin() const
     requires range<const _View>
   {
     return move_iterator(ranges::begin(__base_));
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr auto end()
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto end()
     requires(!__simple_view<_View>)
   {
     if constexpr (common_range<_View>) {
@@ -78,7 +78,7 @@ public:
     }
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr auto end() const
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto end() const
     requires range<const _View>
   {
     if constexpr (common_range<const _View>) {
@@ -88,13 +88,13 @@ public:
     }
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr auto size()
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto size()
     requires sized_range<_View>
   {
     return ranges::size(__base_);
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr auto size() const
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto size() const
     requires sized_range<const _View>
   {
     return ranges::size(__base_);
@@ -117,7 +117,7 @@ struct __fn : __range_adaptor_closure<__fn> {
     return /*---------------------------------*/ as_rvalue_view(std::forward<_Range>(__range));
   }
 
-  template <class _Range>
+  template <input_range _Range>
     requires same_as<range_rvalue_reference_t<_Range>, range_reference_t<_Range>>
   [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr auto
   operator()(_Range&& __range) noexcept(noexcept(views::all(std::forward<_Range>(__range))))
diff --git a/lib/libcxx/include/__ranges/chunk_by_view.h b/lib/libcxx/include/__ranges/chunk_by_view.h
index 71fee3a4f2..8007f76f0c 100644
--- a/lib/libcxx/include/__ranges/chunk_by_view.h
+++ b/lib/libcxx/include/__ranges/chunk_by_view.h
@@ -100,17 +100,17 @@ public:
   _LIBCPP_HIDE_FROM_ABI constexpr explicit chunk_by_view(_View __base, _Pred __pred)
       : __base_(std::move(__base)), __pred_(in_place, std::move(__pred)) {}
 
-  _LIBCPP_HIDE_FROM_ABI constexpr _View base() const&
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _View base() const&
     requires copy_constructible<_View>
   {
     return __base_;
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr _View base() && { return std::move(__base_); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _View base() && { return std::move(__base_); }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr const _Pred& pred() const { return *__pred_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const _Pred& pred() const { return *__pred_; }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr __iterator begin() {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr __iterator begin() {
     // Note: this duplicates a check in `optional` but provides a better error message.
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(
         __pred_.__has_value(), "Trying to call begin() on a chunk_by_view that does not have a valid predicate.");
@@ -122,7 +122,7 @@ public:
     return {*this, std::move(__first), *__cached_begin_};
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr auto end() {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto end() {
     if constexpr (common_range<_View>) {
       return __iterator{*this, ranges::end(__base_), ranges::end(__base_)};
     } else {
@@ -155,7 +155,7 @@ public:
 
   _LIBCPP_HIDE_FROM_ABI __iterator() = default;
 
-  _LIBCPP_HIDE_FROM_ABI constexpr value_type operator*() const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr value_type operator*() const {
     // If the iterator is at end, this would return an empty range which can be checked by the calling code and doesn't
     // necessarily lead to a bad access.
     _LIBCPP_ASSERT_PEDANTIC(__current_ != __next_, "Trying to dereference past-the-end chunk_by_view iterator.");
diff --git a/lib/libcxx/include/__ranges/common_view.h b/lib/libcxx/include/__ranges/common_view.h
index 133236dd1d..eec1045c8a 100644
--- a/lib/libcxx/include/__ranges/common_view.h
+++ b/lib/libcxx/include/__ranges/common_view.h
@@ -56,16 +56,16 @@ public:
     return __base_;
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr _View base() && { return std::move(__base_); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _View base() && { return std::move(__base_); }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr auto begin() {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto begin() {
     if constexpr (random_access_range<_View> && sized_range<_View>)
       return ranges::begin(__base_);
     else
       return common_iterator<iterator_t<_View>, sentinel_t<_View>>(ranges::begin(__base_));
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr auto begin() const
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto begin() const
     requires range<const _View>
   {
     if constexpr (random_access_range<const _View> && sized_range<const _View>)
@@ -74,14 +74,14 @@ public:
       return common_iterator<iterator_t<const _View>, sentinel_t<const _View>>(ranges::begin(__base_));
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr auto end() {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto end() {
     if constexpr (random_access_range<_View> && sized_range<_View>)
       return ranges::begin(__base_) + ranges::size(__base_);
     else
       return common_iterator<iterator_t<_View>, sentinel_t<_View>>(ranges::end(__base_));
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr auto end() const
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto end() const
     requires range<const _View>
   {
     if constexpr (random_access_range<const _View> && sized_range<const _View>)
@@ -90,13 +90,13 @@ public:
       return common_iterator<iterator_t<const _View>, sentinel_t<const _View>>(ranges::end(__base_));
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr auto size()
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto size()
     requires sized_range<_View>
   {
     return ranges::size(__base_);
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr auto size() const
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto size() const
     requires sized_range<const _View>
   {
     return ranges::size(__base_);
diff --git a/lib/libcxx/include/__ranges/drop_view.h b/lib/libcxx/include/__ranges/drop_view.h
index 42ada9299a..feb3705d2d 100644
--- a/lib/libcxx/include/__ranges/drop_view.h
+++ b/lib/libcxx/include/__ranges/drop_view.h
@@ -80,14 +80,14 @@ public:
     _LIBCPP_ASSERT_UNCATEGORIZED(__count_ >= 0, "count must be greater than or equal to zero.");
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr _View base() const&
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _View base() const&
     requires copy_constructible<_View>
   {
     return __base_;
   }
-  _LIBCPP_HIDE_FROM_ABI constexpr _View base() && { return std::move(__base_); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _View base() && { return std::move(__base_); }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr auto begin()
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto begin()
     requires(!(__simple_view<_View> && random_access_range<const _View> && sized_range<const _View>))
   {
     if constexpr (random_access_range<_View> && sized_range<_View>) {
@@ -104,20 +104,20 @@ public:
     return __tmp;
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr auto begin() const
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto begin() const
     requires random_access_range<const _View> && sized_range<const _View>
   {
     const auto __dist = std::min(ranges::distance(__base_), __count_);
     return ranges::begin(__base_) + __dist;
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr auto end()
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto end()
     requires(!__simple_view<_View>)
   {
     return ranges::end(__base_);
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr auto end() const
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto end() const
     requires range<const _View>
   {
     return ranges::end(__base_);
@@ -129,13 +129,13 @@ public:
     return __s < __c ? 0 : __s - __c;
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr auto size()
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto size()
     requires sized_range<_View>
   {
     return __size(*this);
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr auto size() const
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto size() const
     requires sized_range<const _View>
   {
     return __size(*this);
diff --git a/lib/libcxx/include/__ranges/drop_while_view.h b/lib/libcxx/include/__ranges/drop_while_view.h
index bc7f019393..1fe4e17f80 100644
--- a/lib/libcxx/include/__ranges/drop_while_view.h
+++ b/lib/libcxx/include/__ranges/drop_while_view.h
@@ -57,17 +57,17 @@ public:
   _LIBCPP_HIDE_FROM_ABI constexpr _LIBCPP_EXPLICIT_SINCE_CXX23 drop_while_view(_View __base, _Pred __pred)
       : __base_(std::move(__base)), __pred_(std::in_place, std::move(__pred)) {}
 
-  _LIBCPP_HIDE_FROM_ABI constexpr _View base() const&
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _View base() const&
     requires copy_constructible<_View>
   {
     return __base_;
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr _View base() && { return std::move(__base_); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _View base() && { return std::move(__base_); }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr const _Pred& pred() const { return *__pred_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const _Pred& pred() const { return *__pred_; }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr auto begin() {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto begin() {
     // Note: this duplicates a check in `optional` but provides a better error message.
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(
         __pred_.__has_value(),
@@ -83,7 +83,7 @@ public:
     }
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr auto end() { return ranges::end(__base_); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto end() { return ranges::end(__base_); }
 
 private:
   _LIBCPP_NO_UNIQUE_ADDRESS _View __base_ = _View();
diff --git a/lib/libcxx/include/__ranges/elements_of.h b/lib/libcxx/include/__ranges/elements_of.h
new file mode 100644
index 0000000000..3f89f49d18
--- /dev/null
+++ b/lib/libcxx/include/__ranges/elements_of.h
@@ -0,0 +1,49 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___RANGES_ELEMENTS_OF_H
+#define _LIBCPP___RANGES_ELEMENTS_OF_H
+
+#include <__config>
+#include <__cstddef/byte.h>
+#include <__memory/allocator.h>
+#include <__ranges/concepts.h>
+#include <__utility/forward.h>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER >= 23
+
+namespace ranges {
+
+template <range _Range, class _Allocator = allocator<byte>>
+struct elements_of {
+  _LIBCPP_NO_UNIQUE_ADDRESS _Range range;
+  _LIBCPP_NO_UNIQUE_ADDRESS _Allocator allocator = _Allocator();
+};
+
+template <class _Range, class _Allocator = allocator<byte>>
+elements_of(_Range&&, _Allocator = _Allocator()) -> elements_of<_Range&&, _Allocator>;
+
+} // namespace ranges
+
+#endif // _LIBCPP_STD_VER >= 23
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___RANGES_ELEMENTS_OF_H
diff --git a/lib/libcxx/include/__ranges/empty_view.h b/lib/libcxx/include/__ranges/empty_view.h
index fc08492110..54d62b3c77 100644
--- a/lib/libcxx/include/__ranges/empty_view.h
+++ b/lib/libcxx/include/__ranges/empty_view.h
@@ -29,11 +29,11 @@ template <class _Tp>
   requires is_object_v<_Tp>
 class empty_view : public view_interface<empty_view<_Tp>> {
 public:
-  _LIBCPP_HIDE_FROM_ABI static constexpr _Tp* begin() noexcept { return nullptr; }
-  _LIBCPP_HIDE_FROM_ABI static constexpr _Tp* end() noexcept { return nullptr; }
-  _LIBCPP_HIDE_FROM_ABI static constexpr _Tp* data() noexcept { return nullptr; }
-  _LIBCPP_HIDE_FROM_ABI static constexpr size_t size() noexcept { return 0; }
-  _LIBCPP_HIDE_FROM_ABI static constexpr bool empty() noexcept { return true; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr _Tp* begin() noexcept { return nullptr; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr _Tp* end() noexcept { return nullptr; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr _Tp* data() noexcept { return nullptr; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr size_t size() noexcept { return 0; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr bool empty() noexcept { return true; }
 };
 
 template <class _Tp>
diff --git a/lib/libcxx/include/__ranges/filter_view.h b/lib/libcxx/include/__ranges/filter_view.h
index 07980e7353..3ad69ea100 100644
--- a/lib/libcxx/include/__ranges/filter_view.h
+++ b/lib/libcxx/include/__ranges/filter_view.h
@@ -76,16 +76,16 @@ public:
       : __base_(std::move(__base)), __pred_(in_place, std::move(__pred)) {}
 
   template <class _Vp = _View>
-  _LIBCPP_HIDE_FROM_ABI constexpr _View base() const&
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _View base() const&
     requires copy_constructible<_Vp>
   {
     return __base_;
   }
-  _LIBCPP_HIDE_FROM_ABI constexpr _View base() && { return std::move(__base_); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _View base() && { return std::move(__base_); }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr _Pred const& pred() const { return *__pred_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Pred const& pred() const { return *__pred_; }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr __iterator begin() {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr __iterator begin() {
     // Note: this duplicates a check in `optional` but provides a better error message.
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(
         __pred_.__has_value(), "Trying to call begin() on a filter_view that does not have a valid predicate.");
@@ -99,7 +99,7 @@ public:
     }
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr auto end() {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto end() {
     if constexpr (common_range<_View>)
       return __iterator{*this, ranges::end(__base_)};
     else
@@ -148,10 +148,10 @@ public:
   _LIBCPP_HIDE_FROM_ABI constexpr __iterator(filter_view& __parent, iterator_t<_View> __current)
       : __current_(std::move(__current)), __parent_(std::addressof(__parent)) {}
 
-  _LIBCPP_HIDE_FROM_ABI constexpr iterator_t<_View> const& base() const& noexcept { return __current_; }
-  _LIBCPP_HIDE_FROM_ABI constexpr iterator_t<_View> base() && { return std::move(__current_); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr iterator_t<_View> const& base() const& noexcept { return __current_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr iterator_t<_View> base() && { return std::move(__current_); }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr range_reference_t<_View> operator*() const { return *__current_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr range_reference_t<_View> operator*() const { return *__current_; }
   _LIBCPP_HIDE_FROM_ABI constexpr iterator_t<_View> operator->() const
     requires __has_arrow<iterator_t<_View>> && copyable<iterator_t<_View>>
   {
@@ -194,7 +194,7 @@ public:
     return __x.__current_ == __y.__current_;
   }
 
-  _LIBCPP_HIDE_FROM_ABI friend constexpr range_rvalue_reference_t<_View>
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI friend constexpr range_rvalue_reference_t<_View>
   iter_move(__iterator const& __it) noexcept(noexcept(ranges::iter_move(__it.__current_))) {
     return ranges::iter_move(__it.__current_);
   }
@@ -218,7 +218,7 @@ public:
 
   _LIBCPP_HIDE_FROM_ABI constexpr explicit __sentinel(filter_view& __parent) : __end_(ranges::end(__parent.__base_)) {}
 
-  _LIBCPP_HIDE_FROM_ABI constexpr sentinel_t<_View> base() const { return __end_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr sentinel_t<_View> base() const { return __end_; }
 
   _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(__iterator const& __x, __sentinel const& __y) {
     return __x.__current_ == __y.__end_;
diff --git a/lib/libcxx/include/__ranges/iota_view.h b/lib/libcxx/include/__ranges/iota_view.h
index 4b84585258..6b2576ec6b 100644
--- a/lib/libcxx/include/__ranges/iota_view.h
+++ b/lib/libcxx/include/__ranges/iota_view.h
@@ -30,6 +30,7 @@
 #include <__ranges/movable_box.h>
 #include <__ranges/view_interface.h>
 #include <__type_traits/conditional.h>
+#include <__type_traits/decay.h>
 #include <__type_traits/is_nothrow_constructible.h>
 #include <__type_traits/make_unsigned.h>
 #include <__type_traits/type_identity.h>
@@ -57,11 +58,17 @@ struct __get_wider_signed {
       return type_identity<int>{};
     else if constexpr (sizeof(_Int) < sizeof(long))
       return type_identity<long>{};
-    else
+    else if constexpr (sizeof(_Int) < sizeof(long long))
       return type_identity<long long>{};
-
-    static_assert(
-        sizeof(_Int) <= sizeof(long long), "Found integer-like type that is bigger than largest integer like type.");
+#  if _LIBCPP_HAS_INT128
+    else if constexpr (sizeof(_Int) <= sizeof(__int128))
+      return type_identity<__int128>{};
+#  else
+    else if constexpr (sizeof(_Int) <= sizeof(long long))
+      return type_identity<long long>{};
+#  endif
+    else
+      static_assert(false, "Found integer-like type that is bigger than the largest integer like type.");
   }
 
   using type = typename decltype(__call())::type;
@@ -125,7 +132,8 @@ class iota_view : public view_interface<iota_view<_Start, _BoundSentinel>> {
 
     _LIBCPP_HIDE_FROM_ABI constexpr explicit __iterator(_Start __value) : __value_(std::move(__value)) {}
 
-    _LIBCPP_HIDE_FROM_ABI constexpr _Start operator*() const noexcept(is_nothrow_copy_constructible_v<_Start>) {
+    [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Start operator*() const
+        noexcept(is_nothrow_copy_constructible_v<_Start>) {
       return __value_;
     }
 
@@ -189,7 +197,7 @@ class iota_view : public view_interface<iota_view<_Start, _BoundSentinel>> {
       return *this;
     }
 
-    _LIBCPP_HIDE_FROM_ABI constexpr _Start operator[](difference_type __n) const
+    [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Start operator[](difference_type __n) const
       requires __advanceable<_Start>
     {
       return _Start(__value_ + __n);
@@ -231,27 +239,28 @@ class iota_view : public view_interface<iota_view<_Start, _BoundSentinel>> {
       return __x.__value_ <=> __y.__value_;
     }
 
-    _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator+(__iterator __i, difference_type __n)
+    [[nodiscard]] _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator+(__iterator __i, difference_type __n)
       requires __advanceable<_Start>
     {
       __i += __n;
       return __i;
     }
 
-    _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator+(difference_type __n, __iterator __i)
+    [[nodiscard]] _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator+(difference_type __n, __iterator __i)
       requires __advanceable<_Start>
     {
       return __i + __n;
     }
 
-    _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator-(__iterator __i, difference_type __n)
+    [[nodiscard]] _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator-(__iterator __i, difference_type __n)
       requires __advanceable<_Start>
     {
       __i -= __n;
       return __i;
     }
 
-    _LIBCPP_HIDE_FROM_ABI friend constexpr difference_type operator-(const __iterator& __x, const __iterator& __y)
+    [[nodiscard]] _LIBCPP_HIDE_FROM_ABI friend constexpr difference_type
+    operator-(const __iterator& __x, const __iterator& __y)
       requires __advanceable<_Start>
     {
       if constexpr (__integer_like<_Start>) {
@@ -282,14 +291,14 @@ class iota_view : public view_interface<iota_view<_Start, _BoundSentinel>> {
       return __x.__value_ == __y.__bound_sentinel_;
     }
 
-    _LIBCPP_HIDE_FROM_ABI friend constexpr iter_difference_t<_Start>
+    [[nodiscard]] _LIBCPP_HIDE_FROM_ABI friend constexpr iter_difference_t<_Start>
     operator-(const __iterator& __x, const __sentinel& __y)
       requires sized_sentinel_for<_BoundSentinel, _Start>
     {
       return __x.__value_ - __y.__bound_sentinel_;
     }
 
-    _LIBCPP_HIDE_FROM_ABI friend constexpr iter_difference_t<_Start>
+    [[nodiscard]] _LIBCPP_HIDE_FROM_ABI friend constexpr iter_difference_t<_Start>
     operator-(const __sentinel& __x, const __iterator& __y)
       requires sized_sentinel_for<_BoundSentinel, _Start>
     {
@@ -329,24 +338,24 @@ public:
     requires(!same_as<_Start, _BoundSentinel> && !same_as<_BoundSentinel, unreachable_sentinel_t>)
       : iota_view(std::move(__first.__value_), std::move(__last.__bound_sentinel_)) {}
 
-  _LIBCPP_HIDE_FROM_ABI constexpr __iterator begin() const { return __iterator{__value_}; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr __iterator begin() const { return __iterator{__value_}; }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr auto end() const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto end() const {
     if constexpr (same_as<_BoundSentinel, unreachable_sentinel_t>)
       return unreachable_sentinel;
     else
       return __sentinel{__bound_sentinel_};
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr __iterator end() const
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr __iterator end() const
     requires same_as<_Start, _BoundSentinel>
   {
     return __iterator{__bound_sentinel_};
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr bool empty() const { return __value_ == __bound_sentinel_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool empty() const { return __value_ == __bound_sentinel_; }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr auto size() const
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto size() const
     requires(same_as<_Start, _BoundSentinel> && __advanceable<_Start>) ||
             (integral<_Start> && integral<_BoundSentinel>) || sized_sentinel_for<_BoundSentinel, _Start>
   {
@@ -374,14 +383,15 @@ namespace views {
 namespace __iota {
 struct __fn {
   template <class _Start>
-  _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Start&& __start) const
-      noexcept(noexcept(ranges::iota_view(std::forward<_Start>(__start))))
-          -> decltype(ranges::iota_view(std::forward<_Start>(__start))) {
-    return ranges::iota_view(std::forward<_Start>(__start));
+    requires(requires(_Start __s) { ranges::iota_view<decay_t<_Start>>(std::forward<_Start>(__s)); })
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Start&& __start) const
+      noexcept(noexcept(ranges::iota_view<decay_t<_Start>>(std::forward<_Start>(__start)))) {
+    return ranges::iota_view<decay_t<_Start>>(std::forward<_Start>(__start));
   }
 
   template <class _Start, class _BoundSentinel>
-  _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Start&& __start, _BoundSentinel&& __bound_sentinel) const noexcept(
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto
+  operator()(_Start&& __start, _BoundSentinel&& __bound_sentinel) const noexcept(
       noexcept(ranges::iota_view(std::forward<_Start>(__start), std::forward<_BoundSentinel>(__bound_sentinel))))
       -> decltype(ranges::iota_view(std::forward<_Start>(__start), std::forward<_BoundSentinel>(__bound_sentinel))) {
     return ranges::iota_view(std::forward<_Start>(__start), std::forward<_BoundSentinel>(__bound_sentinel));
@@ -392,6 +402,15 @@ struct __fn {
 inline namespace __cpo {
 inline constexpr auto iota = __iota::__fn{};
 } // namespace __cpo
+
+#  if _LIBCPP_STD_VER >= 26
+
+inline constexpr auto indices = [] [[nodiscard]] (__integer_like auto __size) static {
+  return ranges::views::iota(decltype(__size){}, __size);
+};
+
+#  endif
+
 } // namespace views
 } // namespace ranges
 
diff --git a/lib/libcxx/include/__ranges/owning_view.h b/lib/libcxx/include/__ranges/owning_view.h
index 254bdb4329..1ab81afee7 100644
--- a/lib/libcxx/include/__ranges/owning_view.h
+++ b/lib/libcxx/include/__ranges/owning_view.h
@@ -49,52 +49,52 @@ public:
   _LIBCPP_HIDE_FROM_ABI owning_view(owning_view&&)            = default;
   _LIBCPP_HIDE_FROM_ABI owning_view& operator=(owning_view&&) = default;
 
-  _LIBCPP_HIDE_FROM_ABI constexpr _Rp& base() & noexcept { return __r_; }
-  _LIBCPP_HIDE_FROM_ABI constexpr const _Rp& base() const& noexcept { return __r_; }
-  _LIBCPP_HIDE_FROM_ABI constexpr _Rp&& base() && noexcept { return std::move(__r_); }
-  _LIBCPP_HIDE_FROM_ABI constexpr const _Rp&& base() const&& noexcept { return std::move(__r_); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Rp& base() & noexcept { return __r_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const _Rp& base() const& noexcept { return __r_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Rp&& base() && noexcept { return std::move(__r_); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const _Rp&& base() const&& noexcept { return std::move(__r_); }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr iterator_t<_Rp> begin() { return ranges::begin(__r_); }
-  _LIBCPP_HIDE_FROM_ABI constexpr sentinel_t<_Rp> end() { return ranges::end(__r_); }
-  _LIBCPP_HIDE_FROM_ABI constexpr auto begin() const
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr iterator_t<_Rp> begin() { return ranges::begin(__r_); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr sentinel_t<_Rp> end() { return ranges::end(__r_); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto begin() const
     requires range<const _Rp>
   {
     return ranges::begin(__r_);
   }
-  _LIBCPP_HIDE_FROM_ABI constexpr auto end() const
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto end() const
     requires range<const _Rp>
   {
     return ranges::end(__r_);
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr bool empty()
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool empty()
     requires requires { ranges::empty(__r_); }
   {
     return ranges::empty(__r_);
   }
-  _LIBCPP_HIDE_FROM_ABI constexpr bool empty() const
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool empty() const
     requires requires { ranges::empty(__r_); }
   {
     return ranges::empty(__r_);
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr auto size()
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto size()
     requires sized_range<_Rp>
   {
     return ranges::size(__r_);
   }
-  _LIBCPP_HIDE_FROM_ABI constexpr auto size() const
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto size() const
     requires sized_range<const _Rp>
   {
     return ranges::size(__r_);
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr auto data()
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto data()
     requires contiguous_range<_Rp>
   {
     return ranges::data(__r_);
   }
-  _LIBCPP_HIDE_FROM_ABI constexpr auto data() const
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto data() const
     requires contiguous_range<const _Rp>
   {
     return ranges::data(__r_);
diff --git a/lib/libcxx/include/__ranges/ref_view.h b/lib/libcxx/include/__ranges/ref_view.h
index 5329d778dd..109a10cec2 100644
--- a/lib/libcxx/include/__ranges/ref_view.h
+++ b/lib/libcxx/include/__ranges/ref_view.h
@@ -51,24 +51,24 @@ public:
   _LIBCPP_HIDE_FROM_ABI constexpr ref_view(_Tp&& __t)
       : __range_(std::addressof(static_cast<_Range&>(std::forward<_Tp>(__t)))) {}
 
-  _LIBCPP_HIDE_FROM_ABI constexpr _Range& base() const { return *__range_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Range& base() const { return *__range_; }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr iterator_t<_Range> begin() const { return ranges::begin(*__range_); }
-  _LIBCPP_HIDE_FROM_ABI constexpr sentinel_t<_Range> end() const { return ranges::end(*__range_); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr iterator_t<_Range> begin() const { return ranges::begin(*__range_); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr sentinel_t<_Range> end() const { return ranges::end(*__range_); }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr bool empty() const
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool empty() const
     requires requires { ranges::empty(*__range_); }
   {
     return ranges::empty(*__range_);
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr auto size() const
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto size() const
     requires sized_range<_Range>
   {
     return ranges::size(*__range_);
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr auto data() const
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto data() const
     requires contiguous_range<_Range>
   {
     return ranges::data(*__range_);
diff --git a/lib/libcxx/include/__ranges/repeat_view.h b/lib/libcxx/include/__ranges/repeat_view.h
index 56b09701c8..9192183f48 100644
--- a/lib/libcxx/include/__ranges/repeat_view.h
+++ b/lib/libcxx/include/__ranges/repeat_view.h
@@ -108,17 +108,21 @@ public:
           __bound_ >= 0, "The behavior is undefined if Bound is not unreachable_sentinel_t and bound is negative");
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr __iterator begin() const { return __iterator(std::addressof(*__value_)); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr __iterator begin() const {
+    return __iterator(std::addressof(*__value_));
+  }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr __iterator end() const
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr __iterator end() const
     requires(!same_as<_Bound, unreachable_sentinel_t>)
   {
     return __iterator(std::addressof(*__value_), __bound_);
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr unreachable_sentinel_t end() const noexcept { return unreachable_sentinel; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr unreachable_sentinel_t end() const noexcept {
+    return unreachable_sentinel;
+  }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr auto size() const
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto size() const
     requires(!same_as<_Bound, unreachable_sentinel_t>)
   {
     return std::__to_unsigned_like(__bound_);
@@ -152,7 +156,7 @@ public:
 
   _LIBCPP_HIDE_FROM_ABI __iterator() = default;
 
-  _LIBCPP_HIDE_FROM_ABI constexpr const _Tp& operator*() const noexcept { return *__value_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const _Tp& operator*() const noexcept { return *__value_; }
 
   _LIBCPP_HIDE_FROM_ABI constexpr __iterator& operator++() {
     ++__current_;
@@ -192,7 +196,9 @@ public:
     return *this;
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr const _Tp& operator[](difference_type __n) const noexcept { return *(*this + __n); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const _Tp& operator[](difference_type __n) const noexcept {
+    return *(*this + __n);
+  }
 
   _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const __iterator& __x, const __iterator& __y) {
     return __x.__current_ == __y.__current_;
@@ -202,22 +208,23 @@ public:
     return __x.__current_ <=> __y.__current_;
   }
 
-  _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator+(__iterator __i, difference_type __n) {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator+(__iterator __i, difference_type __n) {
     __i += __n;
     return __i;
   }
 
-  _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator+(difference_type __n, __iterator __i) {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator+(difference_type __n, __iterator __i) {
     __i += __n;
     return __i;
   }
 
-  _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator-(__iterator __i, difference_type __n) {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator-(__iterator __i, difference_type __n) {
     __i -= __n;
     return __i;
   }
 
-  _LIBCPP_HIDE_FROM_ABI friend constexpr difference_type operator-(const __iterator& __x, const __iterator& __y) {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI friend constexpr difference_type
+  operator-(const __iterator& __x, const __iterator& __y) {
     return static_cast<difference_type>(__x.__current_) - static_cast<difference_type>(__y.__current_);
   }
 
diff --git a/lib/libcxx/include/__ranges/single_view.h b/lib/libcxx/include/__ranges/single_view.h
index 955578b99c..213c507138 100644
--- a/lib/libcxx/include/__ranges/single_view.h
+++ b/lib/libcxx/include/__ranges/single_view.h
@@ -63,21 +63,21 @@ public:
   _LIBCPP_HIDE_FROM_ABI constexpr explicit single_view(in_place_t, _Args&&... __args)
       : __value_{in_place, std::forward<_Args>(__args)...} {}
 
-  _LIBCPP_HIDE_FROM_ABI constexpr _Tp* begin() noexcept { return data(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp* begin() noexcept { return data(); }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr const _Tp* begin() const noexcept { return data(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const _Tp* begin() const noexcept { return data(); }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr _Tp* end() noexcept { return data() + 1; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp* end() noexcept { return data() + 1; }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr const _Tp* end() const noexcept { return data() + 1; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const _Tp* end() const noexcept { return data() + 1; }
 
-  _LIBCPP_HIDE_FROM_ABI static constexpr bool empty() noexcept { return false; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr bool empty() noexcept { return false; }
 
-  _LIBCPP_HIDE_FROM_ABI static constexpr size_t size() noexcept { return 1; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr size_t size() noexcept { return 1; }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr _Tp* data() noexcept { return __value_.operator->(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp* data() noexcept { return __value_.operator->(); }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr const _Tp* data() const noexcept { return __value_.operator->(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const _Tp* data() const noexcept { return __value_.operator->(); }
 };
 
 template <class _Tp>
diff --git a/lib/libcxx/include/__ranges/take_view.h b/lib/libcxx/include/__ranges/take_view.h
index 85723dc5e3..13cb4a285d 100644
--- a/lib/libcxx/include/__ranges/take_view.h
+++ b/lib/libcxx/include/__ranges/take_view.h
@@ -75,15 +75,15 @@ public:
     _LIBCPP_ASSERT_UNCATEGORIZED(__count >= 0, "count has to be greater than or equal to zero");
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr _View base() const&
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _View base() const&
     requires copy_constructible<_View>
   {
     return __base_;
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr _View base() && { return std::move(__base_); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _View base() && { return std::move(__base_); }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr auto begin()
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto begin()
     requires(!__simple_view<_View>)
   {
     if constexpr (sized_range<_View>) {
@@ -99,7 +99,7 @@ public:
     }
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr auto begin() const
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto begin() const
     requires range<const _View>
   {
     if constexpr (sized_range<const _View>) {
@@ -115,7 +115,7 @@ public:
     }
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr auto end()
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto end()
     requires(!__simple_view<_View>)
   {
     if constexpr (sized_range<_View>) {
@@ -129,7 +129,7 @@ public:
     }
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr auto end() const
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto end() const
     requires range<const _View>
   {
     if constexpr (sized_range<const _View>) {
@@ -143,14 +143,14 @@ public:
     }
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr auto size()
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto size()
     requires sized_range<_View>
   {
     auto __n = ranges::size(__base_);
     return ranges::min(__n, static_cast<decltype(__n)>(__count_));
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr auto size() const
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto size() const
     requires sized_range<const _View>
   {
     auto __n = ranges::size(__base_);
@@ -178,7 +178,7 @@ public:
     requires _Const && convertible_to<sentinel_t<_View>, sentinel_t<_Base>>
       : __end_(std::move(__s.__end_)) {}
 
-  _LIBCPP_HIDE_FROM_ABI constexpr sentinel_t<_Base> base() const { return __end_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr sentinel_t<_Base> base() const { return __end_; }
 
   _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const _Iter<_Const>& __lhs, const __sentinel& __rhs) {
     return __lhs.count() == 0 || __lhs.base() == __rhs.__end_;
diff --git a/lib/libcxx/include/__ranges/transform_view.h b/lib/libcxx/include/__ranges/transform_view.h
index ae85dfa452..ab1adf9cdb 100644
--- a/lib/libcxx/include/__ranges/transform_view.h
+++ b/lib/libcxx/include/__ranges/transform_view.h
@@ -13,7 +13,6 @@
 #include <__compare/three_way_comparable.h>
 #include <__concepts/constructible.h>
 #include <__concepts/convertible_to.h>
-#include <__concepts/copyable.h>
 #include <__concepts/derived_from.h>
 #include <__concepts/equality_comparable.h>
 #include <__concepts/invocable.h>
@@ -64,7 +63,7 @@ concept __regular_invocable_with_range_ref = regular_invocable<_Fn, range_refere
 template <class _View, class _Fn>
 concept __transform_view_constraints =
     view<_View> && is_object_v<_Fn> && regular_invocable<_Fn&, range_reference_t<_View>> &&
-    __is_referenceable_v<invoke_result_t<_Fn&, range_reference_t<_View>>>;
+    __referenceable<invoke_result_t<_Fn&, range_reference_t<_View>>>;
 
 #  if _LIBCPP_STD_VER >= 23
 template <input_range _View, move_constructible _Fn>
diff --git a/lib/libcxx/include/__ranges/view_interface.h b/lib/libcxx/include/__ranges/view_interface.h
index 3bcfbaf3a2..37b2c9e2c1 100644
--- a/lib/libcxx/include/__ranges/view_interface.h
+++ b/lib/libcxx/include/__ranges/view_interface.h
@@ -87,35 +87,35 @@ public:
   }
 
   template <class _D2 = _Derived>
-  _LIBCPP_HIDE_FROM_ABI constexpr auto data()
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto data()
     requires contiguous_iterator<iterator_t<_D2>>
   {
     return std::to_address(ranges::begin(__derived()));
   }
 
   template <class _D2 = _Derived>
-  _LIBCPP_HIDE_FROM_ABI constexpr auto data() const
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto data() const
     requires range<const _D2> && contiguous_iterator<iterator_t<const _D2>>
   {
     return std::to_address(ranges::begin(__derived()));
   }
 
   template <class _D2 = _Derived>
-  _LIBCPP_HIDE_FROM_ABI constexpr auto size()
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto size()
     requires forward_range<_D2> && sized_sentinel_for<sentinel_t<_D2>, iterator_t<_D2>>
   {
     return std::__to_unsigned_like(ranges::end(__derived()) - ranges::begin(__derived()));
   }
 
   template <class _D2 = _Derived>
-  _LIBCPP_HIDE_FROM_ABI constexpr auto size() const
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto size() const
     requires forward_range<const _D2> && sized_sentinel_for<sentinel_t<const _D2>, iterator_t<const _D2>>
   {
     return std::__to_unsigned_like(ranges::end(__derived()) - ranges::begin(__derived()));
   }
 
   template <class _D2 = _Derived>
-  _LIBCPP_HIDE_FROM_ABI constexpr decltype(auto) front()
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr decltype(auto) front()
     requires forward_range<_D2>
   {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(
@@ -124,7 +124,7 @@ public:
   }
 
   template <class _D2 = _Derived>
-  _LIBCPP_HIDE_FROM_ABI constexpr decltype(auto) front() const
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr decltype(auto) front() const
     requires forward_range<const _D2>
   {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(
@@ -133,7 +133,7 @@ public:
   }
 
   template <class _D2 = _Derived>
-  _LIBCPP_HIDE_FROM_ABI constexpr decltype(auto) back()
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr decltype(auto) back()
     requires bidirectional_range<_D2> && common_range<_D2>
   {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(
@@ -142,7 +142,7 @@ public:
   }
 
   template <class _D2 = _Derived>
-  _LIBCPP_HIDE_FROM_ABI constexpr decltype(auto) back() const
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr decltype(auto) back() const
     requires bidirectional_range<const _D2> && common_range<const _D2>
   {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(
@@ -151,12 +151,12 @@ public:
   }
 
   template <random_access_range _RARange = _Derived>
-  _LIBCPP_HIDE_FROM_ABI constexpr decltype(auto) operator[](range_difference_t<_RARange> __index) {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr decltype(auto) operator[](range_difference_t<_RARange> __index) {
     return ranges::begin(__derived())[__index];
   }
 
   template <random_access_range _RARange = const _Derived>
-  _LIBCPP_HIDE_FROM_ABI constexpr decltype(auto) operator[](range_difference_t<_RARange> __index) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr decltype(auto) operator[](range_difference_t<_RARange> __index) const {
     return ranges::begin(__derived())[__index];
   }
 };
diff --git a/lib/libcxx/include/__ranges/zip_transform_view.h b/lib/libcxx/include/__ranges/zip_transform_view.h
new file mode 100644
index 0000000000..07aa182f28
--- /dev/null
+++ b/lib/libcxx/include/__ranges/zip_transform_view.h
@@ -0,0 +1,357 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___RANGES_ZIP_TRANSFORM_VIEW_H
+#define _LIBCPP___RANGES_ZIP_TRANSFORM_VIEW_H
+
+#include <__config>
+
+#include <__concepts/constructible.h>
+#include <__concepts/convertible_to.h>
+#include <__concepts/derived_from.h>
+#include <__concepts/equality_comparable.h>
+#include <__concepts/invocable.h>
+#include <__functional/invoke.h>
+#include <__iterator/concepts.h>
+#include <__iterator/incrementable_traits.h>
+#include <__iterator/iterator_traits.h>
+#include <__memory/addressof.h>
+#include <__ranges/access.h>
+#include <__ranges/all.h>
+#include <__ranges/concepts.h>
+#include <__ranges/empty_view.h>
+#include <__ranges/movable_box.h>
+#include <__ranges/view_interface.h>
+#include <__ranges/zip_view.h>
+#include <__type_traits/decay.h>
+#include <__type_traits/invoke.h>
+#include <__type_traits/is_object.h>
+#include <__type_traits/is_reference.h>
+#include <__type_traits/is_referenceable.h>
+#include <__type_traits/maybe_const.h>
+#include <__type_traits/remove_cvref.h>
+#include <__utility/forward.h>
+#include <__utility/in_place.h>
+#include <__utility/move.h>
+#include <tuple> // for std::apply
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER >= 23
+
+namespace ranges {
+
+template <move_constructible _Fn, input_range... _Views>
+  requires(view<_Views> && ...) &&
+          (sizeof...(_Views) > 0) && is_object_v<_Fn> && regular_invocable<_Fn&, range_reference_t<_Views>...> &&
+          __referenceable<invoke_result_t<_Fn&, range_reference_t<_Views>...>>
+class zip_transform_view : public view_interface<zip_transform_view<_Fn, _Views...>> {
+  _LIBCPP_NO_UNIQUE_ADDRESS zip_view<_Views...> __zip_;
+  _LIBCPP_NO_UNIQUE_ADDRESS __movable_box<_Fn> __fun_;
+
+  using _InnerView _LIBCPP_NODEBUG = zip_view<_Views...>;
+  template <bool _Const>
+  using __ziperator _LIBCPP_NODEBUG = iterator_t<__maybe_const<_Const, _InnerView>>;
+  template <bool _Const>
+  using __zentinel _LIBCPP_NODEBUG = sentinel_t<__maybe_const<_Const, _InnerView>>;
+
+  template <bool>
+  class __iterator;
+
+  template <bool>
+  class __sentinel;
+
+public:
+  _LIBCPP_HIDE_FROM_ABI zip_transform_view() = default;
+
+  _LIBCPP_HIDE_FROM_ABI constexpr explicit zip_transform_view(_Fn __fun, _Views... __views)
+      : __zip_(std::move(__views)...), __fun_(in_place, std::move(__fun)) {}
+
+  _LIBCPP_HIDE_FROM_ABI constexpr auto begin() { return __iterator<false>(*this, __zip_.begin()); }
+
+  _LIBCPP_HIDE_FROM_ABI constexpr auto begin() const
+    requires range<const _InnerView> && regular_invocable<const _Fn&, range_reference_t<const _Views>...>
+  {
+    return __iterator<true>(*this, __zip_.begin());
+  }
+
+  _LIBCPP_HIDE_FROM_ABI constexpr auto end() {
+    if constexpr (common_range<_InnerView>) {
+      return __iterator<false>(*this, __zip_.end());
+    } else {
+      return __sentinel<false>(__zip_.end());
+    }
+  }
+
+  _LIBCPP_HIDE_FROM_ABI constexpr auto end() const
+    requires range<const _InnerView> && regular_invocable<const _Fn&, range_reference_t<const _Views>...>
+  {
+    if constexpr (common_range<const _InnerView>) {
+      return __iterator<true>(*this, __zip_.end());
+    } else {
+      return __sentinel<true>(__zip_.end());
+    }
+  }
+
+  _LIBCPP_HIDE_FROM_ABI constexpr auto size()
+    requires sized_range<_InnerView>
+  {
+    return __zip_.size();
+  }
+
+  _LIBCPP_HIDE_FROM_ABI constexpr auto size() const
+    requires sized_range<const _InnerView>
+  {
+    return __zip_.size();
+  }
+};
+
+template <class _Fn, class... _Ranges>
+zip_transform_view(_Fn, _Ranges&&...) -> zip_transform_view<_Fn, views::all_t<_Ranges>...>;
+
+template <bool _Const, class _Fn, class... _Views>
+struct __zip_transform_iterator_category_base {};
+
+template <bool _Const, class _Fn, class... _Views>
+  requires forward_range<__maybe_const<_Const, zip_view<_Views...>>>
+struct __zip_transform_iterator_category_base<_Const, _Fn, _Views...> {
+private:
+  template <class _View>
+  using __tag _LIBCPP_NODEBUG = typename iterator_traits<iterator_t<__maybe_const<_Const, _View>>>::iterator_category;
+
+  static consteval auto __get_iterator_category() {
+    if constexpr (!is_reference_v<invoke_result_t<__maybe_const<_Const, _Fn>&,
+                                                  range_reference_t<__maybe_const<_Const, _Views>>...>>) {
+      return input_iterator_tag();
+    } else if constexpr ((derived_from<__tag<_Views>, random_access_iterator_tag> && ...)) {
+      return random_access_iterator_tag();
+    } else if constexpr ((derived_from<__tag<_Views>, bidirectional_iterator_tag> && ...)) {
+      return bidirectional_iterator_tag();
+    } else if constexpr ((derived_from<__tag<_Views>, forward_iterator_tag> && ...)) {
+      return forward_iterator_tag();
+    } else {
+      return input_iterator_tag();
+    }
+  }
+
+public:
+  using iterator_category = decltype(__get_iterator_category());
+};
+
+template <move_constructible _Fn, input_range... _Views>
+  requires(view<_Views> && ...) &&
+          (sizeof...(_Views) > 0) && is_object_v<_Fn> && regular_invocable<_Fn&, range_reference_t<_Views>...> &&
+          __referenceable<invoke_result_t<_Fn&, range_reference_t<_Views>...>>
+template <bool _Const>
+class zip_transform_view<_Fn, _Views...>::__iterator
+    : public __zip_transform_iterator_category_base<_Const, _Fn, _Views...> {
+  using _Parent _LIBCPP_NODEBUG = __maybe_const<_Const, zip_transform_view>;
+  using _Base _LIBCPP_NODEBUG   = __maybe_const<_Const, _InnerView>;
+
+  friend zip_transform_view<_Fn, _Views...>;
+
+  _Parent* __parent_ = nullptr;
+  __ziperator<_Const> __inner_;
+
+  _LIBCPP_HIDE_FROM_ABI constexpr __iterator(_Parent& __parent, __ziperator<_Const> __inner)
+      : __parent_(std::addressof(__parent)), __inner_(std::move(__inner)) {}
+
+  _LIBCPP_HIDE_FROM_ABI constexpr auto __get_deref_and_invoke() const noexcept {
+    return [&__fun = *__parent_->__fun_](const auto&... __iters) noexcept(noexcept(std::invoke(
+               *__parent_->__fun_, *__iters...))) -> decltype(auto) { return std::invoke(__fun, *__iters...); };
+  }
+
+public:
+  using iterator_concept = typename __ziperator<_Const>::iterator_concept;
+  using value_type =
+      remove_cvref_t<invoke_result_t<__maybe_const<_Const, _Fn>&, range_reference_t<__maybe_const<_Const, _Views>>...>>;
+  using difference_type = range_difference_t<_Base>;
+
+  _LIBCPP_HIDE_FROM_ABI __iterator() = default;
+  _LIBCPP_HIDE_FROM_ABI constexpr __iterator(__iterator<!_Const> __i)
+    requires _Const && convertible_to<__ziperator<false>, __ziperator<_Const>>
+      : __parent_(__i.__parent_), __inner_(std::move(__i.__inner_)) {}
+
+  _LIBCPP_HIDE_FROM_ABI constexpr decltype(auto) operator*() const
+      noexcept(noexcept(std::apply(__get_deref_and_invoke(), __zip_view_iterator_access::__get_underlying(__inner_)))) {
+    return std::apply(__get_deref_and_invoke(), __zip_view_iterator_access::__get_underlying(__inner_));
+  }
+
+  _LIBCPP_HIDE_FROM_ABI constexpr __iterator& operator++() {
+    ++__inner_;
+    return *this;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI constexpr void operator++(int) { ++*this; }
+
+  _LIBCPP_HIDE_FROM_ABI constexpr __iterator operator++(int)
+    requires forward_range<_Base>
+  {
+    auto __tmp = *this;
+    ++*this;
+    return __tmp;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI constexpr __iterator& operator--()
+    requires bidirectional_range<_Base>
+  {
+    --__inner_;
+    return *this;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI constexpr __iterator operator--(int)
+    requires bidirectional_range<_Base>
+  {
+    auto __tmp = *this;
+    --*this;
+    return __tmp;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI constexpr __iterator& operator+=(difference_type __x)
+    requires random_access_range<_Base>
+  {
+    __inner_ += __x;
+    return *this;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI constexpr __iterator& operator-=(difference_type __x)
+    requires random_access_range<_Base>
+  {
+    __inner_ -= __x;
+    return *this;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI constexpr decltype(auto) operator[](difference_type __n) const
+    requires random_access_range<_Base>
+  {
+    return std::apply(
+        [&]<class... _Is>(const _Is&... __iters) -> decltype(auto) {
+          return std::invoke(*__parent_->__fun_, __iters[iter_difference_t<_Is>(__n)]...);
+        },
+        __zip_view_iterator_access::__get_underlying(__inner_));
+  }
+
+  _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const __iterator& __x, const __iterator& __y)
+    requires equality_comparable<__ziperator<_Const>>
+  {
+    return __x.__inner_ == __y.__inner_;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI friend constexpr auto operator<=>(const __iterator& __x, const __iterator& __y)
+    requires random_access_range<_Base>
+  {
+    return __x.__inner_ <=> __y.__inner_;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator+(const __iterator& __i, difference_type __n)
+    requires random_access_range<_Base>
+  {
+    return __iterator(*__i.__parent_, __i.__inner_ + __n);
+  }
+
+  _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator+(difference_type __n, const __iterator& __i)
+    requires random_access_range<_Base>
+  {
+    return __iterator(*__i.__parent_, __i.__inner_ + __n);
+  }
+
+  _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator-(const __iterator& __i, difference_type __n)
+    requires random_access_range<_Base>
+  {
+    return __iterator(*__i.__parent_, __i.__inner_ - __n);
+  }
+
+  _LIBCPP_HIDE_FROM_ABI friend constexpr difference_type operator-(const __iterator& __x, const __iterator& __y)
+    requires sized_sentinel_for<__ziperator<_Const>, __ziperator<_Const>>
+  {
+    return __x.__inner_ - __y.__inner_;
+  }
+};
+
+template <move_constructible _Fn, input_range... _Views>
+  requires(view<_Views> && ...) &&
+          (sizeof...(_Views) > 0) && is_object_v<_Fn> && regular_invocable<_Fn&, range_reference_t<_Views>...> &&
+          __referenceable<invoke_result_t<_Fn&, range_reference_t<_Views>...>>
+template <bool _Const>
+class zip_transform_view<_Fn, _Views...>::__sentinel {
+  __zentinel<_Const> __inner_;
+
+  friend zip_transform_view<_Fn, _Views...>;
+
+  _LIBCPP_HIDE_FROM_ABI constexpr explicit __sentinel(__zentinel<_Const> __inner) : __inner_(__inner) {}
+
+public:
+  _LIBCPP_HIDE_FROM_ABI __sentinel() = default;
+
+  _LIBCPP_HIDE_FROM_ABI constexpr __sentinel(__sentinel<!_Const> __i)
+    requires _Const && convertible_to<__zentinel<false>, __zentinel<_Const>>
+      : __inner_(__i.__inner_) {}
+
+  template <bool _OtherConst>
+    requires sentinel_for<__zentinel<_Const>, __ziperator<_OtherConst>>
+  _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const __iterator<_OtherConst>& __x, const __sentinel& __y) {
+    return __x.__inner_ == __y.__inner_;
+  }
+
+  template <bool _OtherConst>
+    requires sized_sentinel_for<__zentinel<_Const>, __ziperator<_OtherConst>>
+  _LIBCPP_HIDE_FROM_ABI friend constexpr range_difference_t<__maybe_const<_OtherConst, _InnerView>>
+  operator-(const __iterator<_OtherConst>& __x, const __sentinel& __y) {
+    return __x.__inner_ - __y.__inner_;
+  }
+
+  template <bool _OtherConst>
+    requires sized_sentinel_for<__zentinel<_Const>, __ziperator<_OtherConst>>
+  _LIBCPP_HIDE_FROM_ABI friend constexpr range_difference_t<__maybe_const<_OtherConst, _InnerView>>
+  operator-(const __sentinel& __x, const __iterator<_OtherConst>& __y) {
+    return __x.__inner_ - __y.__inner_;
+  }
+};
+
+namespace views {
+namespace __zip_transform {
+
+struct __fn {
+  template <class _Fn>
+    requires(move_constructible<decay_t<_Fn>> && regular_invocable<decay_t<_Fn>&> &&
+             is_object_v<invoke_result_t<decay_t<_Fn>&>>)
+  _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Fn&&) const
+      noexcept(noexcept(auto(views::empty<decay_t<invoke_result_t<decay_t<_Fn>&>>>))) {
+    return views::empty<decay_t<invoke_result_t<decay_t<_Fn>&>>>;
+  }
+
+  template <class _Fn, class... _Ranges>
+    requires(sizeof...(_Ranges) > 0)
+  _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Fn&& __fun, _Ranges&&... __rs) const
+      noexcept(noexcept(zip_transform_view(std::forward<_Fn>(__fun), std::forward<_Ranges>(__rs)...)))
+          -> decltype(zip_transform_view(std::forward<_Fn>(__fun), std::forward<_Ranges>(__rs)...)) {
+    return zip_transform_view(std::forward<_Fn>(__fun), std::forward<_Ranges>(__rs)...);
+  }
+};
+
+} // namespace __zip_transform
+inline namespace __cpo {
+inline constexpr auto zip_transform = __zip_transform::__fn{};
+} // namespace __cpo
+} // namespace views
+} // namespace ranges
+
+#endif // _LIBCPP_STD_VER >= 23
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___RANGES_ZIP_TRANSFORM_VIEW_H
diff --git a/lib/libcxx/include/__ranges/zip_view.h b/lib/libcxx/include/__ranges/zip_view.h
index e2a194efcf..bea64c4997 100644
--- a/lib/libcxx/include/__ranges/zip_view.h
+++ b/lib/libcxx/include/__ranges/zip_view.h
@@ -31,6 +31,7 @@
 #include <__ranges/enable_borrowed_range.h>
 #include <__ranges/size.h>
 #include <__ranges/view_interface.h>
+#include <__tuple/tuple_transform.h>
 #include <__type_traits/is_nothrow_constructible.h>
 #include <__type_traits/make_unsigned.h>
 #include <__utility/declval.h>
@@ -58,15 +59,6 @@ concept __zip_is_common =
     (!(bidirectional_range<_Ranges> && ...) && (common_range<_Ranges> && ...)) ||
     ((random_access_range<_Ranges> && ...) && (sized_range<_Ranges> && ...));
 
-template <class _Fun, class _Tuple>
-_LIBCPP_HIDE_FROM_ABI constexpr auto __tuple_transform(_Fun&& __f, _Tuple&& __tuple) {
-  return std::apply(
-      [&]<class... _Types>(_Types&&... __elements) {
-        return tuple<invoke_result_t<_Fun&, _Types>...>(std::invoke(__f, std::forward<_Types>(__elements))...);
-      },
-      std::forward<_Tuple>(__tuple));
-}
-
 template <class _Fun, class _Tuple>
 _LIBCPP_HIDE_FROM_ABI constexpr void __tuple_for_each(_Fun&& __f, _Tuple&& __tuple) {
   std::apply(
@@ -145,24 +137,24 @@ public:
   _LIBCPP_HIDE_FROM_ABI constexpr auto begin()
     requires(!(__simple_view<_Views> && ...))
   {
-    return __iterator<false>(ranges::__tuple_transform(ranges::begin, __views_));
+    return __iterator<false>(std::__tuple_transform(ranges::begin, __views_));
   }
 
   _LIBCPP_HIDE_FROM_ABI constexpr auto begin() const
     requires(range<const _Views> && ...)
   {
-    return __iterator<true>(ranges::__tuple_transform(ranges::begin, __views_));
+    return __iterator<true>(std::__tuple_transform(ranges::begin, __views_));
   }
 
   _LIBCPP_HIDE_FROM_ABI constexpr auto end()
     requires(!(__simple_view<_Views> && ...))
   {
     if constexpr (!__zip_is_common<_Views...>) {
-      return __sentinel<false>(ranges::__tuple_transform(ranges::end, __views_));
+      return __sentinel<false>(std::__tuple_transform(ranges::end, __views_));
     } else if constexpr ((random_access_range<_Views> && ...)) {
       return begin() + iter_difference_t<__iterator<false>>(size());
     } else {
-      return __iterator<false>(ranges::__tuple_transform(ranges::end, __views_));
+      return __iterator<false>(std::__tuple_transform(ranges::end, __views_));
     }
   }
 
@@ -170,11 +162,11 @@ public:
     requires(range<const _Views> && ...)
   {
     if constexpr (!__zip_is_common<const _Views...>) {
-      return __sentinel<true>(ranges::__tuple_transform(ranges::end, __views_));
+      return __sentinel<true>(std::__tuple_transform(ranges::end, __views_));
     } else if constexpr ((random_access_range<const _Views> && ...)) {
       return begin() + iter_difference_t<__iterator<true>>(size());
     } else {
-      return __iterator<true>(ranges::__tuple_transform(ranges::end, __views_));
+      return __iterator<true>(std::__tuple_transform(ranges::end, __views_));
     }
   }
 
@@ -186,7 +178,7 @@ public:
           using _CT = make_unsigned_t<common_type_t<decltype(__sizes)...>>;
           return ranges::min({_CT(__sizes)...});
         },
-        ranges::__tuple_transform(ranges::size, __views_));
+        std::__tuple_transform(ranges::size, __views_));
   }
 
   _LIBCPP_HIDE_FROM_ABI constexpr auto size() const
@@ -197,7 +189,7 @@ public:
           using _CT = make_unsigned_t<common_type_t<decltype(__sizes)...>>;
           return ranges::min({_CT(__sizes)...});
         },
-        ranges::__tuple_transform(ranges::size, __views_));
+        std::__tuple_transform(ranges::size, __views_));
   }
 };
 
@@ -235,6 +227,13 @@ struct __zip_view_iterator_category_base<_Const, _Views...> {
   using iterator_category = input_iterator_tag;
 };
 
+struct __zip_view_iterator_access {
+  template <class _Iter>
+  _LIBCPP_HIDE_FROM_ABI static constexpr decltype(auto) __get_underlying(_Iter& __iter) noexcept {
+    return (__iter.__current_);
+  }
+};
+
 template <input_range... _Views>
   requires(view<_Views> && ...) && (sizeof...(_Views) > 0)
 template <bool _Const>
@@ -255,6 +254,7 @@ class zip_view<_Views...>::__iterator : public __zip_view_iterator_category_base
   static constexpr bool __is_zip_view_iterator = true;
 
   friend struct __product_iterator_traits<__iterator>;
+  friend __zip_view_iterator_access;
 
 public:
   using iterator_concept = decltype(ranges::__get_zip_view_iterator_tag<_Const, _Views...>());
@@ -268,7 +268,7 @@ public:
       : __current_(std::move(__i.__current_)) {}
 
   _LIBCPP_HIDE_FROM_ABI constexpr auto operator*() const {
-    return ranges::__tuple_transform([](auto& __i) -> decltype(auto) { return *__i; }, __current_);
+    return std::__tuple_transform([](auto& __i) -> decltype(auto) { return *__i; }, __current_);
   }
 
   _LIBCPP_HIDE_FROM_ABI constexpr __iterator& operator++() {
@@ -318,7 +318,7 @@ public:
   _LIBCPP_HIDE_FROM_ABI constexpr auto operator[](difference_type __n) const
     requires __zip_all_random_access<_Const, _Views...>
   {
-    return ranges::__tuple_transform(
+    return std::__tuple_transform(
         [&]<class _Iter>(_Iter& __i) -> decltype(auto) { return __i[iter_difference_t<_Iter>(__n)]; }, __current_);
   }
 
@@ -377,7 +377,7 @@ public:
   _LIBCPP_HIDE_FROM_ABI friend constexpr auto iter_move(const __iterator& __i) noexcept(
       (noexcept(ranges::iter_move(std::declval<const iterator_t<__maybe_const<_Const, _Views>>&>())) && ...) &&
       (is_nothrow_move_constructible_v<range_rvalue_reference_t<__maybe_const<_Const, _Views>>> && ...)) {
-    return ranges::__tuple_transform(ranges::iter_move, __i.__current_);
+    return std::__tuple_transform(ranges::iter_move, __i.__current_);
   }
 
   _LIBCPP_HIDE_FROM_ABI friend constexpr void iter_swap(const __iterator& __l, const __iterator& __r) noexcept(
diff --git a/lib/libcxx/include/__split_buffer b/lib/libcxx/include/__split_buffer
index 21e58f4abc..d6176f8ca2 100644
--- a/lib/libcxx/include/__split_buffer
+++ b/lib/libcxx/include/__split_buffer
@@ -13,10 +13,12 @@
 #include <__algorithm/max.h>
 #include <__algorithm/move.h>
 #include <__algorithm/move_backward.h>
+#include <__assert>
 #include <__config>
 #include <__iterator/distance.h>
 #include <__iterator/iterator_traits.h>
 #include <__iterator/move_iterator.h>
+#include <__memory/addressof.h>
 #include <__memory/allocate_at_least.h>
 #include <__memory/allocator.h>
 #include <__memory/allocator_traits.h>
@@ -28,11 +30,9 @@
 #include <__type_traits/integral_constant.h>
 #include <__type_traits/is_nothrow_assignable.h>
 #include <__type_traits/is_nothrow_constructible.h>
-#include <__type_traits/is_replaceable.h>
 #include <__type_traits/is_swappable.h>
 #include <__type_traits/is_trivially_destructible.h>
 #include <__type_traits/is_trivially_relocatable.h>
-#include <__type_traits/remove_reference.h>
 #include <__utility/forward.h>
 #include <__utility/move.h>
 
@@ -45,25 +45,430 @@ _LIBCPP_PUSH_MACROS
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-// __split_buffer allocates a contiguous chunk of memory and stores objects in the range [__begin_, __end_).
-// It has uninitialized memory in the ranges  [__first_, __begin_) and [__end_, __cap_). That allows
-// it to grow both in the front and back without having to move the data.
+template <class _Tp, class _Allocator, template <class, class, class> class _Layout>
+class __split_buffer;
+
+template <class _SplitBuffer, class _Tp, class _Allocator>
+class __split_buffer_pointer_layout {
+protected:
+  using value_type                      = _Tp;
+  using allocator_type                  = _Allocator;
+  using __alloc_traits _LIBCPP_NODEBUG  = allocator_traits<allocator_type>;
+  using reference                       = value_type&;
+  using const_reference                 = const value_type&;
+  using size_type                       = typename __alloc_traits::size_type;
+  using difference_type                 = typename __alloc_traits::difference_type;
+  using pointer                         = typename __alloc_traits::pointer;
+  using const_pointer                   = typename __alloc_traits::const_pointer;
+  using iterator                        = pointer;
+  using const_iterator                  = const_pointer;
+  using __sentinel_type _LIBCPP_NODEBUG = pointer;
 
-template <class _Tp, class _Allocator = allocator<_Tp> >
-struct __split_buffer {
 public:
-  using value_type                     = _Tp;
-  using allocator_type                 = _Allocator;
-  using __alloc_rr _LIBCPP_NODEBUG     = __libcpp_remove_reference_t<allocator_type>;
-  using __alloc_traits _LIBCPP_NODEBUG = allocator_traits<__alloc_rr>;
-  using reference                      = value_type&;
-  using const_reference                = const value_type&;
-  using size_type                      = typename __alloc_traits::size_type;
-  using difference_type                = typename __alloc_traits::difference_type;
-  using pointer                        = typename __alloc_traits::pointer;
-  using const_pointer                  = typename __alloc_traits::const_pointer;
-  using iterator                       = pointer;
-  using const_iterator                 = const_pointer;
+  // Can't be defaulted due to _LIBCPP_COMPRESSED_PAIR not being an aggregate in C++03 and C++11.
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __split_buffer_pointer_layout() : __back_cap_(nullptr) {}
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20
+  _LIBCPP_HIDE_FROM_ABI explicit __split_buffer_pointer_layout(const allocator_type& __alloc)
+      : __back_cap_(nullptr), __alloc_(__alloc) {}
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI pointer __front_cap() _NOEXCEPT { return __front_cap_; }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_pointer __front_cap() const _NOEXCEPT {
+    return __front_cap_;
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI pointer begin() _NOEXCEPT { return __begin_; }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_pointer begin() const _NOEXCEPT { return __begin_; }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI pointer end() _NOEXCEPT { return __end_; }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI pointer end() const _NOEXCEPT { return __end_; }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT {
+    return static_cast<size_type>(__end_ - __begin_);
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __begin_ == __end_; }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type capacity() const _NOEXCEPT {
+    return static_cast<size_type>(__back_cap_ - __front_cap_);
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI allocator_type& __get_allocator() _NOEXCEPT { return __alloc_; }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI allocator_type const& __get_allocator() const _NOEXCEPT {
+    return __alloc_;
+  }
+
+  // Returns the sentinel object directly. Should be used in conjunction with automatic type deduction,
+  // not explicit types.
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __sentinel_type __raw_sentinel() const _NOEXCEPT {
+    return __end_;
+  }
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __sentinel_type __raw_capacity() const _NOEXCEPT {
+    return __back_cap_;
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __set_data(pointer __new_first) _NOEXCEPT {
+    __front_cap_ = __new_first;
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void
+  __set_valid_range(pointer __new_begin, pointer __new_end) _NOEXCEPT {
+    __begin_ = __new_begin;
+    __end_   = __new_end;
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void
+  __set_valid_range(pointer __new_begin, size_type __new_size) _NOEXCEPT {
+    __begin_ = __new_begin;
+    __end_   = __begin_ + __new_size;
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __set_sentinel(pointer __new_end) _NOEXCEPT {
+    _LIBCPP_ASSERT_INTERNAL(__front_cap_ <= __new_end, "__new_end cannot precede __front_cap_");
+    __end_ = __new_end;
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __set_sentinel(size_type __new_size) _NOEXCEPT {
+    __end_ = __begin_ + __new_size;
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __set_capacity(size_type __new_capacity) _NOEXCEPT {
+    __back_cap_ = __front_cap_ + __new_capacity;
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __set_capacity(pointer __new_capacity) _NOEXCEPT {
+    __back_cap_ = __new_capacity;
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type __front_spare() const _NOEXCEPT {
+    return static_cast<size_type>(__begin_ - __front_cap_);
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type __back_spare() const _NOEXCEPT {
+    return static_cast<size_type>(__back_cap_ - __end_);
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI reference back() _NOEXCEPT { return *(__end_ - 1); }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_reference back() const _NOEXCEPT { return *(__end_ - 1); }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __swap_without_allocator(
+      __split_buffer_pointer_layout<__split_buffer<value_type, allocator_type, __split_buffer_pointer_layout>,
+                                    value_type,
+                                    allocator_type>& __other) _NOEXCEPT {
+    std::swap(__front_cap_, __other.__front_cap_);
+    std::swap(__begin_, __other.__begin_);
+    std::swap(__back_cap_, __other.__back_cap_);
+    std::swap(__end_, __other.__end_);
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void swap(__split_buffer_pointer_layout& __other) _NOEXCEPT {
+    std::swap(__front_cap_, __other.__front_cap_);
+    std::swap(__begin_, __other.__begin_);
+    std::swap(__back_cap_, __other.__back_cap_);
+    std::swap(__end_, __other.__end_);
+    std::__swap_allocator(__alloc_, __other.__alloc_);
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __reset() _NOEXCEPT {
+    __front_cap_ = nullptr;
+    __begin_     = nullptr;
+    __end_       = nullptr;
+    __back_cap_  = nullptr;
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void
+  __copy_without_alloc(__split_buffer_pointer_layout const& __other)
+      _NOEXCEPT_(is_nothrow_copy_assignable<pointer>::value) {
+    __front_cap_ = __other.__front_cap_;
+    __begin_     = __other.__begin_;
+    __end_       = __other.__end_;
+    __back_cap_  = __other.__back_cap_;
+  }
+
+private:
+  pointer __front_cap_ = nullptr;
+  pointer __begin_     = nullptr;
+  pointer __end_       = nullptr;
+  _LIBCPP_COMPRESSED_PAIR(pointer, __back_cap_, allocator_type, __alloc_);
+
+  template <class, class, class>
+  friend class __split_buffer_pointer_layout;
+};
+
+template <class _SplitBuffer, class _Tp, class _Allocator>
+class __split_buffer_size_layout {
+protected:
+  using value_type                      = _Tp;
+  using allocator_type                  = _Allocator;
+  using __alloc_traits _LIBCPP_NODEBUG  = allocator_traits<allocator_type>;
+  using reference                       = value_type&;
+  using const_reference                 = const value_type&;
+  using size_type                       = typename __alloc_traits::size_type;
+  using difference_type                 = typename __alloc_traits::difference_type;
+  using pointer                         = typename __alloc_traits::pointer;
+  using const_pointer                   = typename __alloc_traits::const_pointer;
+  using iterator                        = pointer;
+  using const_iterator                  = const_pointer;
+  using __sentinel_type _LIBCPP_NODEBUG = size_type;
+
+public:
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __split_buffer_size_layout() = default;
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI explicit __split_buffer_size_layout(const allocator_type& __alloc)
+      : __alloc_(__alloc) {}
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI pointer __front_cap() _NOEXCEPT { return __front_cap_; }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_pointer __front_cap() const _NOEXCEPT {
+    return __front_cap_;
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI pointer begin() _NOEXCEPT { return __begin_; }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_pointer begin() const _NOEXCEPT { return __begin_; }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI pointer end() _NOEXCEPT { return __begin_ + __size_; }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI pointer end() const _NOEXCEPT { return __begin_ + __size_; }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __size_; }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __size_ == 0; }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type capacity() const _NOEXCEPT { return __cap_; }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI allocator_type& __get_allocator() _NOEXCEPT { return __alloc_; }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI allocator_type const& __get_allocator() const _NOEXCEPT {
+    return __alloc_;
+  }
+
+  // Returns the sentinel object directly. Should be used in conjunction with automatic type deduction,
+  // not explicit types.
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __sentinel_type __raw_sentinel() const _NOEXCEPT {
+    return __size_;
+  }
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __sentinel_type __raw_capacity() const _NOEXCEPT {
+    return __cap_;
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __set_data(pointer __new_first) _NOEXCEPT {
+    __front_cap_ = __new_first;
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void
+  __set_valid_range(pointer __new_begin, pointer __new_end) _NOEXCEPT {
+    // Size-based __split_buffers track their size directly: we need to explicitly update the size
+    // when the front is adjusted.
+    __size_ -= __new_begin - __begin_;
+    __begin_ = __new_begin;
+    __set_sentinel(__new_end);
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void
+  __set_valid_range(pointer __new_begin, size_type __new_size) _NOEXCEPT {
+    // Size-based __split_buffers track their size directly: we need to explicitly update the size
+    // when the front is adjusted.
+    __size_ -= __new_begin - __begin_;
+    __begin_ = __new_begin;
+    __set_sentinel(__new_size);
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __set_sentinel(pointer __new_end) _NOEXCEPT {
+    _LIBCPP_ASSERT_INTERNAL(__front_cap_ <= __new_end, "__new_end cannot precede __front_cap_");
+    __size_ += __new_end - end();
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __set_sentinel(size_type __new_size) _NOEXCEPT {
+    __size_ = __new_size;
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __set_capacity(size_type __new_capacity) _NOEXCEPT {
+    __cap_ = __new_capacity;
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __set_capacity(pointer __new_capacity) _NOEXCEPT {
+    __cap_ = __new_capacity - __begin_;
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type __front_spare() const _NOEXCEPT {
+    return static_cast<size_type>(__begin_ - __front_cap_);
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type __back_spare() const _NOEXCEPT {
+    // `__cap_ - __end_` tells us the total number of spares when in size-mode. We need to remove
+    // the __front_spare from the count.
+    return __cap_ - __size_ - __front_spare();
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI reference back() _NOEXCEPT { return __begin_[__size_ - 1]; }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_reference back() const _NOEXCEPT {
+    return __begin_[__size_ - 1];
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __swap_without_allocator(
+      __split_buffer_pointer_layout<__split_buffer<value_type, allocator_type, __split_buffer_pointer_layout>,
+                                    value_type,
+                                    allocator_type>& __other) _NOEXCEPT {
+    std::swap(__front_cap_, __other.__front_cap_);
+    std::swap(__begin_, __other.__begin_);
+    std::swap(__cap_, __other.__cap_);
+    std::swap(__size_, __other.__size_);
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void swap(__split_buffer_size_layout& __other) _NOEXCEPT {
+    std::swap(__front_cap_, __other.__front_cap_);
+    std::swap(__begin_, __other.__begin_);
+    std::swap(__cap_, __other.__cap_);
+    std::swap(__size_, __other.__size_);
+    std::__swap_allocator(__alloc_, __other.__alloc_);
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __reset() _NOEXCEPT {
+    __front_cap_ = nullptr;
+    __begin_     = nullptr;
+    __size_      = 0;
+    __cap_       = 0;
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void
+  __copy_without_alloc(__split_buffer_size_layout const& __other)
+      _NOEXCEPT_(is_nothrow_copy_assignable<pointer>::value) {
+    __front_cap_ = __other.__front_cap_;
+    __begin_     = __other.__begin_;
+    __cap_       = __other.__cap_;
+    __size_      = __other.__size_;
+  }
+
+private:
+  pointer __front_cap_ = nullptr;
+  pointer __begin_     = nullptr;
+  size_type __size_    = 0;
+  size_type __cap_     = 0;
+  _LIBCPP_NO_UNIQUE_ADDRESS allocator_type __alloc_;
+
+  template <class, class, class>
+  friend class __split_buffer_size_layout;
+};
+
+// `__split_buffer` is a contiguous array data structure. It may hold spare capacity at both ends of
+// the sequence. This allows for a `__split_buffer` to grow from both the front and the back without
+// relocating its contents until it runs out of room. This characteristic sets it apart from
+// `std::vector`, which only holds spare capacity at its end. As such, `__split_buffer` is useful
+// for implementing both `std::vector` and `std::deque`.
+//
+// The sequence is stored as a contiguous chunk of memory delimited by the following "pointers" (`o` denotes
+// uninitialized memory and `x` denotes a valid object):
+//
+//     |oooooooooooooooooooxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxoooooooooooooooooooooooo|
+//      ^                  ^                                    ^                       ^
+//  __front_cap_        __begin_                              __end_               __back_cap_
+//
+// The range [__front_cap_, __begin_) contains uninitialized memory. It is referred to as the "front spare capacity".
+// The range [__begin_, __end_) contains valid objects. It is referred to as the "valid range".
+// The range [__end_, __back_cap_) contains uninitialized memory. It is referred to as the "back spare capacity".
+//
+// The layout of `__split_buffer` is determined by the `_Layout` template template parameter. This
+// `_Layout` allows the above pointers to be stored as different representations, such as integer
+// offsets. A layout class template must provide the following interface:
+//
+//    template<class _Tp, class _Allocator, class _Layout>
+//    class __layout {
+//    protected:
+//      using value_type                     = _Tp;
+//      using allocator_type                 = _Allocator;
+//      using __alloc_traits                 = allocator_traits<allocator_type>;
+//      using reference                      = value_type&;
+//      using const_reference                = const value_type&;
+//      using size_type                      = typename __alloc_traits::size_type;
+//      using difference_type                = typename __alloc_traits::difference_type;
+//      using pointer                        = typename __alloc_traits::pointer;
+//      using const_pointer                  = typename __alloc_traits::const_pointer;
+//      using iterator                       = pointer;
+//      using const_iterator                 = const_pointer;
+//      using __sentinel_type                = /* type that represents the layout's sentinel */;
+//
+//    public:
+//      __layout() = default;
+//      explicit __layout(const allocator_type&);
+//
+//      pointer __front_cap();
+//      const_pointer __front_cap() const;
+//
+//      pointer begin();
+//      const_pointer begin() const;
+//
+//      pointer end();
+//      pointer end() const;
+//
+//      size_type size() const;
+//      bool empty() const;
+//      size_type capacity() const;
+//
+//      allocator_type& __get_allocator();
+//      allocator_type const& __get_allocator() const;
+//
+//      __sentinel_type __raw_sentinel() const;
+//      __sentinel_type __raw_capacity() const;
+//
+//      void __set_data(pointer);
+//      void __set_valid_range(pointer __begin, pointer __end);
+//      void __set_valid_range(pointer __begin, size_type __size);
+//      void __set_sentinel(pointer __end);
+//      void __set_sentinel(size_type __size);
+//
+//      void __set_capacity(size_type __capacity);
+//      void __set_capacity(pointer __capacity);
+//
+//      size_type __front_spare() const;
+//      size_type __back_spare() const;
+//
+//      reference back();
+//      const_reference back() const;
+//
+//      template<class _OtherLayout>
+//      void __swap_without_allocator(_OtherLayout&);
+//      void swap(__layout&);
+//
+//      void __reset();
+//      void __copy_without_alloc(__layout const&);
+//    };
+//
+template <class _Tp, class _Allocator, template <class, class, class> class _Layout>
+class __split_buffer : _Layout<__split_buffer<_Tp, _Allocator, _Layout>, _Tp, _Allocator> {
+  using __base_type _LIBCPP_NODEBUG = _Layout<__split_buffer<_Tp, _Allocator, _Layout>, _Tp, _Allocator>;
+
+public:
+  using __base_type::__back_spare;
+  using __base_type::__copy_without_alloc;
+  using __base_type::__front_cap;
+  using __base_type::__front_spare;
+  using __base_type::__get_allocator;
+  using __base_type::__raw_capacity;
+  using __base_type::__raw_sentinel;
+  using __base_type::__reset;
+  using __base_type::__set_capacity;
+  using __base_type::__set_data;
+  using __base_type::__set_sentinel;
+  using __base_type::__set_valid_range;
+
+  using typename __base_type::__alloc_traits;
+  using typename __base_type::allocator_type;
+  using typename __base_type::const_iterator;
+  using typename __base_type::const_pointer;
+  using typename __base_type::const_reference;
+  using typename __base_type::difference_type;
+  using typename __base_type::iterator;
+  using typename __base_type::pointer;
+  using typename __base_type::reference;
+  using typename __base_type::size_type;
+  using typename __base_type::value_type;
 
   // A __split_buffer contains the following members which may be trivially relocatable:
   // - pointer: may be trivially relocatable, so it's checked
@@ -73,36 +478,24 @@ public:
       __libcpp_is_trivially_relocatable<pointer>::value && __libcpp_is_trivially_relocatable<allocator_type>::value,
       __split_buffer,
       void>;
-  using __replaceable _LIBCPP_NODEBUG =
-      __conditional_t<__is_replaceable_v<pointer> && __container_allocator_is_replaceable<__alloc_traits>::value,
-                      __split_buffer,
-                      void>;
-
-  pointer __first_;
-  pointer __begin_;
-  pointer __end_;
-  _LIBCPP_COMPRESSED_PAIR(pointer, __cap_, allocator_type, __alloc_);
 
   __split_buffer(const __split_buffer&)            = delete;
   __split_buffer& operator=(const __split_buffer&) = delete;
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __split_buffer()
-      _NOEXCEPT_(is_nothrow_default_constructible<allocator_type>::value)
-      : __first_(nullptr), __begin_(nullptr), __end_(nullptr), __cap_(nullptr) {}
+  _LIBCPP_HIDE_FROM_ABI __split_buffer() = default;
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI explicit __split_buffer(__alloc_rr& __a)
-      : __first_(nullptr), __begin_(nullptr), __end_(nullptr), __cap_(nullptr), __alloc_(__a) {}
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI explicit __split_buffer(allocator_type& __a) : __base_type(__a) {}
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI explicit __split_buffer(const __alloc_rr& __a)
-      : __first_(nullptr), __begin_(nullptr), __end_(nullptr), __cap_(nullptr), __alloc_(__a) {}
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI explicit __split_buffer(const allocator_type& __a)
+      : __base_type(__a) {}
 
   _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI
-  __split_buffer(size_type __cap, size_type __start, __alloc_rr& __a);
+  __split_buffer(size_type __cap, size_type __start, allocator_type& __a);
 
   _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __split_buffer(__split_buffer&& __c)
       _NOEXCEPT_(is_nothrow_move_constructible<allocator_type>::value);
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __split_buffer(__split_buffer&& __c, const __alloc_rr& __a);
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __split_buffer(__split_buffer&& __c, const allocator_type& __a);
 
   _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __split_buffer& operator=(__split_buffer&& __c)
       _NOEXCEPT_((__alloc_traits::propagate_on_container_move_assignment::value &&
@@ -111,36 +504,16 @@ public:
 
   _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI ~__split_buffer();
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI iterator begin() _NOEXCEPT { return __begin_; }
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_iterator begin() const _NOEXCEPT { return __begin_; }
+  using __base_type::back;
+  using __base_type::begin;
+  using __base_type::capacity;
+  using __base_type::empty;
+  using __base_type::end;
+  using __base_type::size;
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI iterator end() _NOEXCEPT { return __end_; }
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_iterator end() const _NOEXCEPT { return __end_; }
-
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void clear() _NOEXCEPT { __destruct_at_end(__begin_); }
-
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type size() const {
-    return static_cast<size_type>(__end_ - __begin_);
-  }
-
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool empty() const { return __end_ == __begin_; }
-
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type capacity() const {
-    return static_cast<size_type>(__cap_ - __first_);
-  }
-
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type __front_spare() const {
-    return static_cast<size_type>(__begin_ - __first_);
-  }
-
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type __back_spare() const {
-    return static_cast<size_type>(__cap_ - __end_);
-  }
-
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI reference front() { return *__begin_; }
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_reference front() const { return *__begin_; }
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI reference back() { return *(__end_ - 1); }
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_reference back() const { return *(__end_ - 1); }
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void clear() _NOEXCEPT { __destruct_at_end(begin()); }
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI reference front() { return *begin(); }
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_reference front() const { return *begin(); }
 
   _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void shrink_to_fit() _NOEXCEPT;
 
@@ -149,8 +522,8 @@ public:
   template <class... _Args>
   _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void emplace_back(_Args&&... __args);
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void pop_front() { __destruct_at_begin(__begin_ + 1); }
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void pop_back() { __destruct_at_end(__end_ - 1); }
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void pop_front() { __destruct_at_begin(begin() + 1); }
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void pop_back() { __destruct_at_end(end() - 1); }
 
   _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __construct_at_end(size_type __n);
   _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __construct_at_end(size_type __n, const_reference __x);
@@ -182,244 +555,242 @@ public:
   _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __destruct_at_end(pointer __new_last, true_type) _NOEXCEPT;
 
   _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void swap(__split_buffer& __x)
-      _NOEXCEPT_(!__alloc_traits::propagate_on_container_swap::value || __is_nothrow_swappable_v<__alloc_rr>);
+      _NOEXCEPT_(!__alloc_traits::propagate_on_container_swap::value || __is_nothrow_swappable_v<allocator_type>);
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool __invariants() const;
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool __invariants() const {
+    if (__front_cap() == nullptr) {
+      if (begin() != nullptr)
+        return false;
+
+      if (!empty())
+        return false;
+
+      if (capacity() != 0)
+        return false;
+
+      return true;
+    } else {
+      if (begin() < __front_cap())
+        return false;
+
+      if (capacity() < size())
+        return false;
+
+      if (end() < begin())
+        return false;
+
+      return true;
+    }
+  }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void
+  __swap_without_allocator(__split_buffer<value_type, allocator_type, _Layout>& __other) _NOEXCEPT {
+    __base_type::__swap_without_allocator(__other);
+  }
 
 private:
   _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __move_assign_alloc(__split_buffer& __c, true_type)
       _NOEXCEPT_(is_nothrow_move_assignable<allocator_type>::value) {
-    __alloc_ = std::move(__c.__alloc_);
+    __get_allocator() = std::move(__c.__get_allocator());
   }
 
   _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __move_assign_alloc(__split_buffer&, false_type) _NOEXCEPT {}
 
   struct _ConstructTransaction {
     _LIBCPP_CONSTEXPR_SINCE_CXX20
-    _LIBCPP_HIDE_FROM_ABI explicit _ConstructTransaction(pointer* __p, size_type __n) _NOEXCEPT
-        : __pos_(*__p),
-          __end_(*__p + __n),
-          __dest_(__p) {}
+    _LIBCPP_HIDE_FROM_ABI explicit _ConstructTransaction(__split_buffer* __parent, pointer __p, size_type __n) _NOEXCEPT
+        : __pos_(__p),
+          __end_(__p + __n),
+          __parent_(__parent) {}
 
-    _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI ~_ConstructTransaction() { *__dest_ = __pos_; }
+    _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI ~_ConstructTransaction() { __parent_->__set_sentinel(__pos_); }
 
     pointer __pos_;
     const pointer __end_;
 
   private:
-    pointer* __dest_;
+    __split_buffer* __parent_;
   };
+
+  template <class _T2, class _A2, template <class, class, class> class _L2>
+  friend class __split_buffer;
 };
 
-template <class _Tp, class _Allocator>
-_LIBCPP_CONSTEXPR_SINCE_CXX20 bool __split_buffer<_Tp, _Allocator>::__invariants() const {
-  if (__first_ == nullptr) {
-    if (__begin_ != nullptr)
-      return false;
-    if (__end_ != nullptr)
-      return false;
-    if (__cap_ != nullptr)
-      return false;
-  } else {
-    if (__begin_ < __first_)
-      return false;
-    if (__end_ < __begin_)
-      return false;
-    if (__cap_ < __end_)
-      return false;
-  }
-  return true;
-}
-
-//  Default constructs __n objects starting at __end_
+//  Default constructs __n objects starting at `end()`
 //  throws if construction throws
 //  Precondition:  __n > 0
 //  Precondition:  size() + __n <= capacity()
 //  Postcondition:  size() == size() + __n
-template <class _Tp, class _Allocator>
-_LIBCPP_CONSTEXPR_SINCE_CXX20 void __split_buffer<_Tp, _Allocator>::__construct_at_end(size_type __n) {
-  _ConstructTransaction __tx(std::addressof(this->__end_), __n);
+template <class _Tp, class _Allocator, template <class, class, class> class _Layout>
+_LIBCPP_CONSTEXPR_SINCE_CXX20 void __split_buffer<_Tp, _Allocator, _Layout>::__construct_at_end(size_type __n) {
+  _ConstructTransaction __tx(this, end(), __n);
   for (; __tx.__pos_ != __tx.__end_; ++__tx.__pos_) {
-    __alloc_traits::construct(__alloc_, std::__to_address(__tx.__pos_));
+    __alloc_traits::construct(__get_allocator(), std::__to_address(__tx.__pos_));
   }
 }
 
-//  Copy constructs __n objects starting at __end_ from __x
+//  Copy constructs __n objects starting at `end()` from __x
 //  throws if construction throws
 //  Precondition:  __n > 0
 //  Precondition:  size() + __n <= capacity()
 //  Postcondition:  size() == old size() + __n
 //  Postcondition:  [i] == __x for all i in [size() - __n, __n)
-template <class _Tp, class _Allocator>
+template <class _Tp, class _Allocator, template <class, class, class> class _Layout>
 _LIBCPP_CONSTEXPR_SINCE_CXX20 void
-__split_buffer<_Tp, _Allocator>::__construct_at_end(size_type __n, const_reference __x) {
-  _ConstructTransaction __tx(std::addressof(this->__end_), __n);
+__split_buffer<_Tp, _Allocator, _Layout>::__construct_at_end(size_type __n, const_reference __x) {
+  _ConstructTransaction __tx(this, end(), __n);
   for (; __tx.__pos_ != __tx.__end_; ++__tx.__pos_) {
-    __alloc_traits::construct(__alloc_, std::__to_address(__tx.__pos_), __x);
+    __alloc_traits::construct(__get_allocator(), std::__to_address(__tx.__pos_), __x);
   }
 }
 
-template <class _Tp, class _Allocator>
+template <class _Tp, class _Allocator, template <class, class, class> class _Layout>
 template <class _Iterator, class _Sentinel>
 _LIBCPP_CONSTEXPR_SINCE_CXX20 void
-__split_buffer<_Tp, _Allocator>::__construct_at_end_with_sentinel(_Iterator __first, _Sentinel __last) {
-  __alloc_rr& __a = __alloc_;
+__split_buffer<_Tp, _Allocator, _Layout>::__construct_at_end_with_sentinel(_Iterator __first, _Sentinel __last) {
+  allocator_type& __a = __get_allocator();
   for (; __first != __last; ++__first) {
-    if (__end_ == __cap_) {
-      size_type __old_cap = __cap_ - __first_;
+    if (__back_spare() == 0) {
+      size_type __old_cap = capacity();
       size_type __new_cap = std::max<size_type>(2 * __old_cap, 8);
       __split_buffer __buf(__new_cap, 0, __a);
-      for (pointer __p = __begin_; __p != __end_; ++__p, (void)++__buf.__end_)
-        __alloc_traits::construct(__buf.__alloc_, std::__to_address(__buf.__end_), std::move(*__p));
+      pointer __buf_end = __buf.end();
+      pointer __end     = end();
+      for (pointer __p = begin(); __p != __end; ++__p) {
+        __alloc_traits::construct(__buf.__get_allocator(), std::__to_address(__buf_end), std::move(*__p));
+        __buf.__set_sentinel(++__buf_end);
+      }
       swap(__buf);
     }
-    __alloc_traits::construct(__a, std::__to_address(this->__end_), *__first);
-    ++this->__end_;
+
+    __alloc_traits::construct(__a, std::__to_address(end()), *__first);
+    __set_sentinel(size() + 1);
   }
 }
-template <class _Tp, class _Allocator>
+
+template <class _Tp, class _Allocator, template <class, class, class> class _Layout>
 template <class _ForwardIterator, __enable_if_t<__has_forward_iterator_category<_ForwardIterator>::value, int> >
 _LIBCPP_CONSTEXPR_SINCE_CXX20 void
-__split_buffer<_Tp, _Allocator>::__construct_at_end(_ForwardIterator __first, _ForwardIterator __last) {
+__split_buffer<_Tp, _Allocator, _Layout>::__construct_at_end(_ForwardIterator __first, _ForwardIterator __last) {
   __construct_at_end_with_size(__first, std::distance(__first, __last));
 }
 
-template <class _Tp, class _Allocator>
+template <class _Tp, class _Allocator, template <class, class, class> class _Layout>
 template <class _ForwardIterator>
 _LIBCPP_CONSTEXPR_SINCE_CXX20 void
-__split_buffer<_Tp, _Allocator>::__construct_at_end_with_size(_ForwardIterator __first, size_type __n) {
-  _ConstructTransaction __tx(std::addressof(this->__end_), __n);
+__split_buffer<_Tp, _Allocator, _Layout>::__construct_at_end_with_size(_ForwardIterator __first, size_type __n) {
+  _ConstructTransaction __tx(this, end(), __n);
   for (; __tx.__pos_ != __tx.__end_; ++__tx.__pos_, (void)++__first) {
-    __alloc_traits::construct(__alloc_, std::__to_address(__tx.__pos_), *__first);
+    __alloc_traits::construct(__get_allocator(), std::__to_address(__tx.__pos_), *__first);
   }
 }
 
-template <class _Tp, class _Allocator>
+template <class _Tp, class _Allocator, template <class, class, class> class _Layout>
 _LIBCPP_CONSTEXPR_SINCE_CXX20 inline void
-__split_buffer<_Tp, _Allocator>::__destruct_at_begin(pointer __new_begin, false_type) {
-  while (__begin_ != __new_begin)
-    __alloc_traits::destroy(__alloc_, std::__to_address(__begin_++));
+__split_buffer<_Tp, _Allocator, _Layout>::__destruct_at_begin(pointer __new_begin, false_type) {
+  pointer __begin = begin();
+  // Updating begin at every iteration is unnecessary because destruction can't throw.
+  while (__begin != __new_begin)
+    __alloc_traits::destroy(__get_allocator(), std::__to_address(__begin++));
+  __set_valid_range(__begin, end());
 }
 
-template <class _Tp, class _Allocator>
+template <class _Tp, class _Allocator, template <class, class, class> class _Layout>
 _LIBCPP_CONSTEXPR_SINCE_CXX20 inline void
-__split_buffer<_Tp, _Allocator>::__destruct_at_begin(pointer __new_begin, true_type) {
-  __begin_ = __new_begin;
+__split_buffer<_Tp, _Allocator, _Layout>::__destruct_at_begin(pointer __new_begin, true_type) {
+  __set_valid_range(__new_begin, end());
 }
 
-template <class _Tp, class _Allocator>
+template <class _Tp, class _Allocator, template <class, class, class> class _Layout>
 _LIBCPP_CONSTEXPR_SINCE_CXX20 inline _LIBCPP_HIDE_FROM_ABI void
-__split_buffer<_Tp, _Allocator>::__destruct_at_end(pointer __new_last, false_type) _NOEXCEPT {
-  while (__new_last != __end_)
-    __alloc_traits::destroy(__alloc_, std::__to_address(--__end_));
+__split_buffer<_Tp, _Allocator, _Layout>::__destruct_at_end(pointer __new_last, false_type) _NOEXCEPT {
+  pointer __end = end();
+  // Updating begin at every iteration is unnecessary because destruction can't throw.
+  while (__new_last != __end)
+    __alloc_traits::destroy(__get_allocator(), std::__to_address(--__end));
+  __set_sentinel(__end);
 }
 
-template <class _Tp, class _Allocator>
-_LIBCPP_CONSTEXPR_SINCE_CXX20 inline _LIBCPP_HIDE_FROM_ABI void
-__split_buffer<_Tp, _Allocator>::__destruct_at_end(pointer __new_last, true_type) _NOEXCEPT {
-  __end_ = __new_last;
-}
-
-template <class _Tp, class _Allocator>
+template <class _Tp, class _Allocator, template <class, class, class> class _Layout>
 _LIBCPP_CONSTEXPR_SINCE_CXX20
-__split_buffer<_Tp, _Allocator>::__split_buffer(size_type __cap, size_type __start, __alloc_rr& __a)
-    : __cap_(nullptr), __alloc_(__a) {
-  if (__cap == 0) {
-    __first_ = nullptr;
-  } else {
-    auto __allocation = std::__allocate_at_least(__alloc_, __cap);
-    __first_          = __allocation.ptr;
-    __cap             = __allocation.count;
+__split_buffer<_Tp, _Allocator, _Layout>::__split_buffer(size_type __cap, size_type __start, allocator_type& __a)
+    : __base_type(__a) {
+  _LIBCPP_ASSERT_INTERNAL(__cap >= __start, "can't have a start point outside the capacity");
+  if (__cap > 0) {
+    auto __allocation = std::__allocate_at_least(__get_allocator(), __cap);
+    __set_data(__allocation.ptr);
+    __cap = __allocation.count;
   }
-  __begin_ = __end_ = __first_ + __start;
-  __cap_            = __first_ + __cap;
+
+  pointer __begin = __front_cap() + __start;
+  __set_valid_range(__begin, __begin);
+  __set_capacity(__cap);
 }
 
-template <class _Tp, class _Allocator>
-_LIBCPP_CONSTEXPR_SINCE_CXX20 __split_buffer<_Tp, _Allocator>::~__split_buffer() {
+template <class _Tp, class _Allocator, template <class, class, class> class _Layout>
+_LIBCPP_CONSTEXPR_SINCE_CXX20 __split_buffer<_Tp, _Allocator, _Layout>::~__split_buffer() {
   clear();
-  if (__first_)
-    __alloc_traits::deallocate(__alloc_, __first_, capacity());
+  if (__front_cap())
+    __alloc_traits::deallocate(__get_allocator(), __front_cap(), capacity());
 }
 
-template <class _Tp, class _Allocator>
-_LIBCPP_CONSTEXPR_SINCE_CXX20 __split_buffer<_Tp, _Allocator>::__split_buffer(__split_buffer&& __c)
+template <class _Tp, class _Allocator, template <class, class, class> class _Layout>
+_LIBCPP_CONSTEXPR_SINCE_CXX20 __split_buffer<_Tp, _Allocator, _Layout>::__split_buffer(__split_buffer&& __c)
     _NOEXCEPT_(is_nothrow_move_constructible<allocator_type>::value)
-    : __first_(std::move(__c.__first_)),
-      __begin_(std::move(__c.__begin_)),
-      __end_(std::move(__c.__end_)),
-      __cap_(std::move(__c.__cap_)),
-      __alloc_(std::move(__c.__alloc_)) {
-  __c.__first_ = nullptr;
-  __c.__begin_ = nullptr;
-  __c.__end_   = nullptr;
-  __c.__cap_   = nullptr;
+    : __base_type(std::move(__c)) {
+  __c.__reset();
 }
 
-template <class _Tp, class _Allocator>
+template <class _Tp, class _Allocator, template <class, class, class> class _Layout>
 _LIBCPP_CONSTEXPR_SINCE_CXX20
-__split_buffer<_Tp, _Allocator>::__split_buffer(__split_buffer&& __c, const __alloc_rr& __a)
-    : __cap_(nullptr), __alloc_(__a) {
-  if (__a == __c.__alloc_) {
-    __first_     = __c.__first_;
-    __begin_     = __c.__begin_;
-    __end_       = __c.__end_;
-    __cap_       = __c.__cap_;
-    __c.__first_ = nullptr;
-    __c.__begin_ = nullptr;
-    __c.__end_   = nullptr;
-    __c.__cap_   = nullptr;
+__split_buffer<_Tp, _Allocator, _Layout>::__split_buffer(__split_buffer&& __c, const allocator_type& __a)
+    : __base_type(__a) {
+  if (__a == __c.__get_allocator()) {
+    __set_data(__c.__front_cap());
+    __set_valid_range(__c.begin(), __c.end());
+    __set_capacity(__c.capacity());
+    __c.__reset();
   } else {
-    auto __allocation = std::__allocate_at_least(__alloc_, __c.size());
-    __first_          = __allocation.ptr;
-    __begin_ = __end_ = __first_;
-    __cap_            = __first_ + __allocation.count;
+    auto __allocation = std::__allocate_at_least(__get_allocator(), __c.size());
+    __set_data(__allocation.ptr);
+    __set_valid_range(__front_cap(), __front_cap());
+    __set_capacity(__allocation.count);
     typedef move_iterator<iterator> _Ip;
     __construct_at_end(_Ip(__c.begin()), _Ip(__c.end()));
   }
 }
 
-template <class _Tp, class _Allocator>
-_LIBCPP_CONSTEXPR_SINCE_CXX20 __split_buffer<_Tp, _Allocator>&
-__split_buffer<_Tp, _Allocator>::operator=(__split_buffer&& __c)
+template <class _Tp, class _Allocator, template <class, class, class> class _Layout>
+_LIBCPP_CONSTEXPR_SINCE_CXX20 __split_buffer<_Tp, _Allocator, _Layout>&
+__split_buffer<_Tp, _Allocator, _Layout>::operator=(__split_buffer&& __c)
     _NOEXCEPT_((__alloc_traits::propagate_on_container_move_assignment::value &&
                 is_nothrow_move_assignable<allocator_type>::value) ||
                !__alloc_traits::propagate_on_container_move_assignment::value) {
   clear();
   shrink_to_fit();
-  __first_ = __c.__first_;
-  __begin_ = __c.__begin_;
-  __end_   = __c.__end_;
-  __cap_   = __c.__cap_;
+  __copy_without_alloc(__c);
   __move_assign_alloc(__c, integral_constant<bool, __alloc_traits::propagate_on_container_move_assignment::value>());
-  __c.__first_ = __c.__begin_ = __c.__end_ = __c.__cap_ = nullptr;
+  __c.__reset();
   return *this;
 }
 
-template <class _Tp, class _Allocator>
-_LIBCPP_CONSTEXPR_SINCE_CXX20 void __split_buffer<_Tp, _Allocator>::swap(__split_buffer& __x)
-    _NOEXCEPT_(!__alloc_traits::propagate_on_container_swap::value || __is_nothrow_swappable_v<__alloc_rr>) {
-  std::swap(__first_, __x.__first_);
-  std::swap(__begin_, __x.__begin_);
-  std::swap(__end_, __x.__end_);
-  std::swap(__cap_, __x.__cap_);
-  std::__swap_allocator(__alloc_, __x.__alloc_);
+template <class _Tp, class _Allocator, template <class, class, class> class _Layout>
+_LIBCPP_CONSTEXPR_SINCE_CXX20 void __split_buffer<_Tp, _Allocator, _Layout>::swap(__split_buffer& __x)
+    _NOEXCEPT_(!__alloc_traits::propagate_on_container_swap::value || __is_nothrow_swappable_v<allocator_type>) {
+  __base_type::swap(__x);
 }
 
-template <class _Tp, class _Allocator>
-_LIBCPP_CONSTEXPR_SINCE_CXX20 void __split_buffer<_Tp, _Allocator>::shrink_to_fit() _NOEXCEPT {
+template <class _Tp, class _Allocator, template <class, class, class> class _Layout>
+_LIBCPP_CONSTEXPR_SINCE_CXX20 void __split_buffer<_Tp, _Allocator, _Layout>::shrink_to_fit() _NOEXCEPT {
   if (capacity() > size()) {
 #if _LIBCPP_HAS_EXCEPTIONS
     try {
 #endif // _LIBCPP_HAS_EXCEPTIONS
-      __split_buffer<value_type, __alloc_rr&> __t(size(), 0, __alloc_);
+      __split_buffer<value_type, allocator_type, _Layout> __t(size(), 0, __get_allocator());
       if (__t.capacity() < capacity()) {
-        __t.__construct_at_end(move_iterator<pointer>(__begin_), move_iterator<pointer>(__end_));
-        __t.__end_ = __t.__begin_ + (__end_ - __begin_);
-        std::swap(__first_, __t.__first_);
-        std::swap(__begin_, __t.__begin_);
-        std::swap(__end_, __t.__end_);
-        std::swap(__cap_, __t.__cap_);
+        __t.__construct_at_end(move_iterator<pointer>(begin()), move_iterator<pointer>(end()));
+        __t.__set_sentinel(size());
+        __swap_without_allocator(__t);
       }
 #if _LIBCPP_HAS_EXCEPTIONS
     } catch (...) {
@@ -428,55 +799,56 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 void __split_buffer<_Tp, _Allocator>::shrink_to_fi
   }
 }
 
-template <class _Tp, class _Allocator>
+template <class _Tp, class _Allocator, template <class, class, class> class _Layout>
 template <class... _Args>
-_LIBCPP_CONSTEXPR_SINCE_CXX20 void __split_buffer<_Tp, _Allocator>::emplace_front(_Args&&... __args) {
-  if (__begin_ == __first_) {
-    if (__end_ < __cap_) {
-      difference_type __d = __cap_ - __end_;
+_LIBCPP_CONSTEXPR_SINCE_CXX20 void __split_buffer<_Tp, _Allocator, _Layout>::emplace_front(_Args&&... __args) {
+  if (__front_spare() == 0) {
+    pointer __end = end();
+    if (__back_spare() > 0) {
+      // The elements are pressed up against the front of the buffer: we need to move them back a
+      // little bit to make `emplace_front` have amortised O(1) complexity.
+      difference_type __d = __back_spare();
       __d                 = (__d + 1) / 2;
-      __begin_            = std::move_backward(__begin_, __end_, __end_ + __d);
-      __end_ += __d;
+      auto __new_end      = __end + __d;
+      __set_valid_range(std::move_backward(begin(), __end, __new_end), __new_end);
     } else {
-      size_type __c = std::max<size_type>(2 * static_cast<size_type>(__cap_ - __first_), 1);
-      __split_buffer<value_type, __alloc_rr&> __t(__c, (__c + 3) / 4, __alloc_);
-      __t.__construct_at_end(move_iterator<pointer>(__begin_), move_iterator<pointer>(__end_));
-      std::swap(__first_, __t.__first_);
-      std::swap(__begin_, __t.__begin_);
-      std::swap(__end_, __t.__end_);
-      std::swap(__cap_, __t.__cap_);
+      size_type __c = std::max<size_type>(2 * capacity(), 1);
+      __split_buffer<value_type, allocator_type, _Layout> __t(__c, (__c + 3) / 4, __get_allocator());
+      __t.__construct_at_end(move_iterator<pointer>(begin()), move_iterator<pointer>(__end));
+      __base_type::__swap_without_allocator(__t);
     }
   }
-  __alloc_traits::construct(__alloc_, std::__to_address(__begin_ - 1), std::forward<_Args>(__args)...);
-  --__begin_;
+
+  __alloc_traits::construct(__get_allocator(), std::__to_address(begin() - 1), std::forward<_Args>(__args)...);
+  __set_valid_range(begin() - 1, size() + 1);
 }
 
-template <class _Tp, class _Allocator>
+template <class _Tp, class _Allocator, template <class, class, class> class _Layout>
 template <class... _Args>
-_LIBCPP_CONSTEXPR_SINCE_CXX20 void __split_buffer<_Tp, _Allocator>::emplace_back(_Args&&... __args) {
-  if (__end_ == __cap_) {
-    if (__begin_ > __first_) {
-      difference_type __d = __begin_ - __first_;
+_LIBCPP_CONSTEXPR_SINCE_CXX20 void __split_buffer<_Tp, _Allocator, _Layout>::emplace_back(_Args&&... __args) {
+  pointer __end = end();
+  if (__back_spare() == 0) {
+    if (__front_spare() > 0) {
+      difference_type __d = __front_spare();
       __d                 = (__d + 1) / 2;
-      __end_              = std::move(__begin_, __end_, __begin_ - __d);
-      __begin_ -= __d;
+      __end               = std::move(begin(), __end, begin() - __d);
+      __set_valid_range(begin() - __d, __end);
     } else {
-      size_type __c = std::max<size_type>(2 * static_cast<size_type>(__cap_ - __first_), 1);
-      __split_buffer<value_type, __alloc_rr&> __t(__c, __c / 4, __alloc_);
-      __t.__construct_at_end(move_iterator<pointer>(__begin_), move_iterator<pointer>(__end_));
-      std::swap(__first_, __t.__first_);
-      std::swap(__begin_, __t.__begin_);
-      std::swap(__end_, __t.__end_);
-      std::swap(__cap_, __t.__cap_);
+      size_type __c = std::max<size_type>(2 * capacity(), 1);
+      __split_buffer<value_type, allocator_type, _Layout> __t(__c, __c / 4, __get_allocator());
+      __t.__construct_at_end(move_iterator<pointer>(begin()), move_iterator<pointer>(__end));
+      __base_type::__swap_without_allocator(__t);
     }
   }
-  __alloc_traits::construct(__alloc_, std::__to_address(__end_), std::forward<_Args>(__args)...);
-  ++__end_;
+
+  __alloc_traits::construct(__get_allocator(), std::__to_address(__end), std::forward<_Args>(__args)...);
+  __set_sentinel(++__end);
 }
 
-template <class _Tp, class _Allocator>
+template <class _Tp, class _Allocator, template <class, class, class> class _Layout>
 _LIBCPP_CONSTEXPR_SINCE_CXX20 inline _LIBCPP_HIDE_FROM_ABI void
-swap(__split_buffer<_Tp, _Allocator>& __x, __split_buffer<_Tp, _Allocator>& __y) _NOEXCEPT_(_NOEXCEPT_(__x.swap(__y))) {
+swap(__split_buffer<_Tp, _Allocator, _Layout>& __x, __split_buffer<_Tp, _Allocator, _Layout>& __y)
+    _NOEXCEPT_(_NOEXCEPT_(__x.swap(__y))) {
   __x.swap(__y);
 }
 
diff --git a/lib/libcxx/include/__stop_token/atomic_unique_lock.h b/lib/libcxx/include/__stop_token/atomic_unique_lock.h
index 05e8f22316..4b0ae05ca8 100644
--- a/lib/libcxx/include/__stop_token/atomic_unique_lock.h
+++ b/lib/libcxx/include/__stop_token/atomic_unique_lock.h
@@ -27,7 +27,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 // where State contains a lock bit and might contain other data,
 // and LockedBit is the value of State when the lock bit is set, e.g  1 << 2
 template <class _State, _State _LockedBit>
-class _LIBCPP_AVAILABILITY_SYNC __atomic_unique_lock {
+class __atomic_unique_lock {
   static_assert(std::__popcount(static_cast<unsigned long long>(_LockedBit)) == 1,
                 "LockedBit must be an integer where only one bit is set");
 
diff --git a/lib/libcxx/include/__stop_token/stop_callback.h b/lib/libcxx/include/__stop_token/stop_callback.h
index a4d7a29953..76d438e096 100644
--- a/lib/libcxx/include/__stop_token/stop_callback.h
+++ b/lib/libcxx/include/__stop_token/stop_callback.h
@@ -34,7 +34,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 #if _LIBCPP_STD_VER >= 20 && _LIBCPP_HAS_THREADS
 
 template <class _Callback>
-class _LIBCPP_AVAILABILITY_SYNC stop_callback : private __stop_callback_base {
+class stop_callback : private __stop_callback_base {
   static_assert(invocable<_Callback>,
                 "Mandates: stop_callback is instantiated with an argument for the template parameter Callback that "
                 "satisfies invocable.");
@@ -91,7 +91,7 @@ private:
 };
 
 template <class _Callback>
-_LIBCPP_AVAILABILITY_SYNC stop_callback(stop_token, _Callback) -> stop_callback<_Callback>;
+stop_callback(stop_token, _Callback) -> stop_callback<_Callback>;
 
 #endif // _LIBCPP_STD_VER >= 20 && _LIBCPP_HAS_THREADS
 
diff --git a/lib/libcxx/include/__stop_token/stop_source.h b/lib/libcxx/include/__stop_token/stop_source.h
index 85d67efe06..aea9429388 100644
--- a/lib/libcxx/include/__stop_token/stop_source.h
+++ b/lib/libcxx/include/__stop_token/stop_source.h
@@ -30,7 +30,7 @@ struct nostopstate_t {
 
 inline constexpr nostopstate_t nostopstate{};
 
-class _LIBCPP_AVAILABILITY_SYNC stop_source {
+class stop_source {
 public:
   _LIBCPP_HIDE_FROM_ABI stop_source() : __state_(new __stop_state()) { __state_->__increment_stop_source_counter(); }
 
diff --git a/lib/libcxx/include/__stop_token/stop_state.h b/lib/libcxx/include/__stop_token/stop_state.h
index cc1f1d830e..74fafbdc63 100644
--- a/lib/libcxx/include/__stop_token/stop_state.h
+++ b/lib/libcxx/include/__stop_token/stop_state.h
@@ -100,7 +100,7 @@ public:
     return ((__curent_state & __stop_requested_bit) != 0) || ((__curent_state >> __stop_source_counter_shift) != 0);
   }
 
-  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI bool __request_stop() noexcept {
+  _LIBCPP_HIDE_FROM_ABI bool __request_stop() noexcept {
     auto __cb_list_lock = __try_lock_for_request_stop();
     if (!__cb_list_lock.__owns_lock()) {
       return false;
@@ -137,7 +137,7 @@ public:
     return true;
   }
 
-  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI bool __add_callback(__stop_callback_base* __cb) noexcept {
+  _LIBCPP_HIDE_FROM_ABI bool __add_callback(__stop_callback_base* __cb) noexcept {
     // If it is already stop_requested. Do not try to request it again.
     const auto __give_up_trying_to_lock_condition = [__cb](__state_t __state) {
       if ((__state & __stop_requested_bit) != 0) {
@@ -164,7 +164,7 @@ public:
   }
 
   // called by the destructor of stop_callback
-  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void __remove_callback(__stop_callback_base* __cb) noexcept {
+  _LIBCPP_HIDE_FROM_ABI void __remove_callback(__stop_callback_base* __cb) noexcept {
     __callback_list_lock __cb_list_lock(__state_);
 
     // under below condition, the request_stop call just popped __cb from the list and could execute it now
@@ -192,7 +192,7 @@ public:
   }
 
 private:
-  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI __callback_list_lock __try_lock_for_request_stop() noexcept {
+  _LIBCPP_HIDE_FROM_ABI __callback_list_lock __try_lock_for_request_stop() noexcept {
     // If it is already stop_requested, do not try to request stop or lock the list again.
     const auto __lock_fail_condition = [](__state_t __state) { return (__state & __stop_requested_bit) != 0; };
 
diff --git a/lib/libcxx/include/__stop_token/stop_token.h b/lib/libcxx/include/__stop_token/stop_token.h
index 178b1728c3..4a6ca27ac4 100644
--- a/lib/libcxx/include/__stop_token/stop_token.h
+++ b/lib/libcxx/include/__stop_token/stop_token.h
@@ -22,7 +22,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 #if _LIBCPP_STD_VER >= 20 && _LIBCPP_HAS_THREADS
 
-class _LIBCPP_AVAILABILITY_SYNC stop_token {
+class stop_token {
 public:
   _LIBCPP_HIDE_FROM_ABI stop_token() noexcept = default;
 
diff --git a/lib/libcxx/include/__string/char_traits.h b/lib/libcxx/include/__string/char_traits.h
index 86c92477cb..d98595030e 100644
--- a/lib/libcxx/include/__string/char_traits.h
+++ b/lib/libcxx/include/__string/char_traits.h
@@ -94,16 +94,17 @@ struct char_traits<char> {
   }
 
   // TODO: Make this _LIBCPP_HIDE_FROM_ABI
-  static inline _LIBCPP_HIDDEN _LIBCPP_CONSTEXPR bool eq(char_type __c1, char_type __c2) _NOEXCEPT {
+  [[__nodiscard__]] static inline _LIBCPP_HIDDEN _LIBCPP_CONSTEXPR bool eq(char_type __c1, char_type __c2) _NOEXCEPT {
     return __c1 == __c2;
   }
-  static inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool lt(char_type __c1, char_type __c2) _NOEXCEPT {
+  [[__nodiscard__]] static inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool
+  lt(char_type __c1, char_type __c2) _NOEXCEPT {
     return (unsigned char)__c1 < (unsigned char)__c2;
   }
 
   // __constexpr_memcmp requires a trivially lexicographically comparable type, but char is not when char is a signed
   // type
-  static _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 int
+  [[__nodiscard__]] static _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 int
   compare(const char_type* __lhs, const char_type* __rhs, size_t __count) _NOEXCEPT {
     if (__libcpp_is_constant_evaluated()) {
 #ifdef _LIBCPP_COMPILER_CLANG_BASED
@@ -126,11 +127,12 @@ struct char_traits<char> {
     }
   }
 
-  static inline _LIBCPP_HIDE_FROM_ABI size_t _LIBCPP_CONSTEXPR_SINCE_CXX17 length(const char_type* __s) _NOEXCEPT {
+  [[__nodiscard__]] static inline _LIBCPP_HIDE_FROM_ABI size_t _LIBCPP_CONSTEXPR_SINCE_CXX17
+  length(const char_type* __s) _NOEXCEPT {
     return std::__constexpr_strlen(__s);
   }
 
-  static _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 const char_type*
+  [[__nodiscard__]] static _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 const char_type*
   find(const char_type* __s, size_t __n, const char_type& __a) _NOEXCEPT {
     return std::__constexpr_memchr(__s, __a, __n);
   }
@@ -154,19 +156,24 @@ struct char_traits<char> {
     return __s;
   }
 
-  static inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int_type not_eof(int_type __c) _NOEXCEPT {
+  [[__nodiscard__]] static inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int_type not_eof(int_type __c) _NOEXCEPT {
     return eq_int_type(__c, eof()) ? ~eof() : __c;
   }
-  static inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR char_type to_char_type(int_type __c) _NOEXCEPT {
+  [[__nodiscard__]] static inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR char_type
+  to_char_type(int_type __c) _NOEXCEPT {
     return char_type(__c);
   }
-  static inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int_type to_int_type(char_type __c) _NOEXCEPT {
+  [[__nodiscard__]] static inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int_type
+  to_int_type(char_type __c) _NOEXCEPT {
     return int_type((unsigned char)__c);
   }
-  static inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool eq_int_type(int_type __c1, int_type __c2) _NOEXCEPT {
+  [[__nodiscard__]] static inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool
+  eq_int_type(int_type __c1, int_type __c2) _NOEXCEPT {
     return __c1 == __c2;
   }
-  static inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int_type eof() _NOEXCEPT { return int_type(EOF); }
+  [[__nodiscard__]] static inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int_type eof() _NOEXCEPT {
+    return int_type(EOF);
+  }
 };
 
 template <class _CharT, class _IntT, _IntT _EOFVal>
@@ -187,11 +194,11 @@ struct __char_traits_base {
     __lhs = __rhs;
   }
 
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR bool eq(char_type __lhs, char_type __rhs) _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR bool eq(char_type __lhs, char_type __rhs) _NOEXCEPT {
     return __lhs == __rhs;
   }
 
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR bool lt(char_type __lhs, char_type __rhs) _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR bool lt(char_type __lhs, char_type __rhs) _NOEXCEPT {
     return __lhs < __rhs;
   }
 
@@ -213,19 +220,22 @@ struct __char_traits_base {
     return __str;
   }
 
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR char_type to_char_type(int_type __c) _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR char_type to_char_type(int_type __c) _NOEXCEPT {
     return char_type(__c);
   }
 
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR int_type to_int_type(char_type __c) _NOEXCEPT { return int_type(__c); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR int_type to_int_type(char_type __c) _NOEXCEPT {
+    return int_type(__c);
+  }
 
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR bool eq_int_type(int_type __lhs, int_type __rhs) _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR bool
+  eq_int_type(int_type __lhs, int_type __rhs) _NOEXCEPT {
     return __lhs == __rhs;
   }
 
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR int_type eof() _NOEXCEPT { return _EOFVal; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR int_type eof() _NOEXCEPT { return _EOFVal; }
 
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR int_type not_eof(int_type __c) _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR int_type not_eof(int_type __c) _NOEXCEPT {
     return eq_int_type(__c, eof()) ? static_cast<int_type>(~eof()) : __c;
   }
 };
@@ -235,18 +245,19 @@ struct __char_traits_base {
 #if _LIBCPP_HAS_WIDE_CHARACTERS
 template <>
 struct char_traits<wchar_t> : __char_traits_base<wchar_t, wint_t, static_cast<wint_t>(WEOF)> {
-  static _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 int
+  [[__nodiscard__]] static _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 int
   compare(const char_type* __s1, const char_type* __s2, size_t __n) _NOEXCEPT {
     if (__n == 0)
       return 0;
     return std::__constexpr_wmemcmp(__s1, __s2, __n);
   }
 
-  static _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 size_t length(const char_type* __s) _NOEXCEPT {
+  [[__nodiscard__]] static _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 size_t
+  length(const char_type* __s) _NOEXCEPT {
     return std::__constexpr_wcslen(__s);
   }
 
-  static _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 const char_type*
+  [[__nodiscard__]] static _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 const char_type*
   find(const char_type* __s, size_t __n, const char_type& __a) _NOEXCEPT {
     return std::__constexpr_wmemchr(__s, __a, __n);
   }
@@ -257,16 +268,16 @@ struct char_traits<wchar_t> : __char_traits_base<wchar_t, wint_t, static_cast<wi
 
 template <>
 struct char_traits<char8_t> : __char_traits_base<char8_t, unsigned int, static_cast<unsigned int>(EOF)> {
-  static _LIBCPP_HIDE_FROM_ABI constexpr int
+  [[nodiscard]] static _LIBCPP_HIDE_FROM_ABI constexpr int
   compare(const char_type* __s1, const char_type* __s2, size_t __n) noexcept {
     return std::__constexpr_memcmp(__s1, __s2, __element_count(__n));
   }
 
-  static _LIBCPP_HIDE_FROM_ABI constexpr size_t length(const char_type* __str) noexcept {
+  [[nodiscard]] static _LIBCPP_HIDE_FROM_ABI constexpr size_t length(const char_type* __str) noexcept {
     return std::__constexpr_strlen(__str);
   }
 
-  _LIBCPP_HIDE_FROM_ABI static constexpr const char_type*
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr const char_type*
   find(const char_type* __s, size_t __n, const char_type& __a) noexcept {
     return std::__constexpr_memchr(__s, __a, __n);
   }
@@ -276,11 +287,11 @@ struct char_traits<char8_t> : __char_traits_base<char8_t, unsigned int, static_c
 
 template <>
 struct char_traits<char16_t> : __char_traits_base<char16_t, uint_least16_t, static_cast<uint_least16_t>(0xFFFF)> {
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR_SINCE_CXX17 int
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR_SINCE_CXX17 int
   compare(const char_type* __s1, const char_type* __s2, size_t __n) _NOEXCEPT;
   _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR_SINCE_CXX17 size_t length(const char_type* __s) _NOEXCEPT;
 
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR_SINCE_CXX17 const char_type*
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR_SINCE_CXX17 const char_type*
   find(const char_type* __s, size_t __n, const char_type& __a) _NOEXCEPT {
     __identity __proj;
     const char_type* __match = std::__find(__s, __s + __n, __a, __proj);
@@ -290,7 +301,7 @@ struct char_traits<char16_t> : __char_traits_base<char16_t, uint_least16_t, stat
   }
 };
 
-inline _LIBCPP_CONSTEXPR_SINCE_CXX17 int
+[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX17 int
 char_traits<char16_t>::compare(const char_type* __s1, const char_type* __s2, size_t __n) _NOEXCEPT {
   for (; __n; --__n, ++__s1, ++__s2) {
     if (lt(*__s1, *__s2))
@@ -301,7 +312,8 @@ char_traits<char16_t>::compare(const char_type* __s1, const char_type* __s2, siz
   return 0;
 }
 
-inline _LIBCPP_CONSTEXPR_SINCE_CXX17 size_t char_traits<char16_t>::length(const char_type* __s) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX17 size_t
+char_traits<char16_t>::length(const char_type* __s) _NOEXCEPT {
   size_t __len = 0;
   for (; !eq(*__s, char_type(0)); ++__s)
     ++__len;
@@ -310,11 +322,11 @@ inline _LIBCPP_CONSTEXPR_SINCE_CXX17 size_t char_traits<char16_t>::length(const
 
 template <>
 struct char_traits<char32_t> : __char_traits_base<char32_t, uint_least32_t, static_cast<uint_least32_t>(0xFFFFFFFF)> {
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR_SINCE_CXX17 int
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR_SINCE_CXX17 int
   compare(const char_type* __s1, const char_type* __s2, size_t __n) _NOEXCEPT;
   _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR_SINCE_CXX17 size_t length(const char_type* __s) _NOEXCEPT;
 
-  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR_SINCE_CXX17 const char_type*
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR_SINCE_CXX17 const char_type*
   find(const char_type* __s, size_t __n, const char_type& __a) _NOEXCEPT {
     __identity __proj;
     const char_type* __match = std::__find(__s, __s + __n, __a, __proj);
@@ -324,7 +336,7 @@ struct char_traits<char32_t> : __char_traits_base<char32_t, uint_least32_t, stat
   }
 };
 
-inline _LIBCPP_CONSTEXPR_SINCE_CXX17 int
+[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX17 int
 char_traits<char32_t>::compare(const char_type* __s1, const char_type* __s2, size_t __n) _NOEXCEPT {
   for (; __n; --__n, ++__s1, ++__s2) {
     if (lt(*__s1, *__s2))
@@ -335,7 +347,8 @@ char_traits<char32_t>::compare(const char_type* __s1, const char_type* __s2, siz
   return 0;
 }
 
-inline _LIBCPP_CONSTEXPR_SINCE_CXX17 size_t char_traits<char32_t>::length(const char_type* __s) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX17 size_t
+char_traits<char32_t>::length(const char_type* __s) _NOEXCEPT {
   size_t __len = 0;
   for (; !eq(*__s, char_type(0)); ++__s)
     ++__len;
@@ -369,6 +382,13 @@ _LIBCPP_HIDE_FROM_ABI inline _LIBCPP_CONSTEXPR_SINCE_CXX14 const _CharT* __searc
   if (__len1 < __len2)
     return __last1;
 
+  if (__builtin_constant_p(__len2 == 1) && __len2 == 1) {
+    auto __res = _Traits::find(__first1, __len1, *__first2);
+    if (__res == nullptr)
+      return __last1;
+    return __res;
+  }
+
   // First element of __first2 is loop invariant.
   _CharT __f2 = *__first2;
   while (true) {
diff --git a/lib/libcxx/include/__string/constexpr_c_functions.h b/lib/libcxx/include/__string/constexpr_c_functions.h
index 119669e16b..4b05e862b8 100644
--- a/lib/libcxx/include/__string/constexpr_c_functions.h
+++ b/lib/libcxx/include/__string/constexpr_c_functions.h
@@ -22,7 +22,6 @@
 #include <__type_traits/is_equality_comparable.h>
 #include <__type_traits/is_integral.h>
 #include <__type_traits/is_same.h>
-#include <__type_traits/is_trivially_copyable.h>
 #include <__type_traits/is_trivially_lexicographically_comparable.h>
 #include <__type_traits/remove_cv.h>
 #include <__utility/element_count.h>
@@ -96,14 +95,13 @@ __constexpr_memcmp(const _Tp* __lhs, const _Up* __rhs, __element_count __n) {
   }
 }
 
-// Because of __libcpp_is_trivially_equality_comparable we know that comparing the object representations is equivalent
+// Because of __is_trivially_equality_comparable_v we know that comparing the object representations is equivalent
 // to a std::memcmp(...) == 0. Since we have multiple objects contiguously in memory, we can call memcmp once instead
 // of invoking it on every object individually.
 template <class _Tp, class _Up>
 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 bool
 __constexpr_memcmp_equal(const _Tp* __lhs, const _Up* __rhs, __element_count __n) {
-  static_assert(__libcpp_is_trivially_equality_comparable<_Tp, _Up>::value,
-                "_Tp and _Up have to be trivially equality comparable");
+  static_assert(__is_trivially_equality_comparable_v<_Tp, _Up>, "_Tp and _Up have to be trivially equality comparable");
 
   auto __count = static_cast<size_t>(__n);
 
@@ -128,7 +126,7 @@ __constexpr_memcmp_equal(const _Tp* __lhs, const _Up* __rhs, __element_count __n
 
 template <class _Tp, class _Up>
 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp* __constexpr_memchr(_Tp* __str, _Up __value, size_t __count) {
-  static_assert(sizeof(_Tp) == 1 && __libcpp_is_trivially_equality_comparable<_Tp, _Up>::value,
+  static_assert(sizeof(_Tp) == 1 && __is_trivially_equality_comparable_v<_Tp, _Up>,
                 "Calling memchr on non-trivially equality comparable types is unsafe.");
 
   if (__libcpp_is_constant_evaluated()) {
@@ -225,6 +223,8 @@ __constexpr_memmove(_Tp* __dest, _Up* __src, __element_count __n) {
           std::__assign_trivially_copyable(__dest[__i], __src[__i]);
       }
     }
+  } else if _LIBCPP_CONSTEXPR (sizeof(_Tp) == __datasizeof_v<_Tp>) {
+    ::__builtin_memmove(__dest, __src, __count * sizeof(_Tp));
   } else if (__count > 0) {
     ::__builtin_memmove(__dest, __src, (__count - 1) * sizeof(_Tp) + __datasizeof_v<_Tp>);
   }
diff --git a/lib/libcxx/include/__support/xlocale/__strtonum_fallback.h b/lib/libcxx/include/__support/xlocale/__strtonum_fallback.h
index 5275aead35..90bd59d36c 100644
--- a/lib/libcxx/include/__support/xlocale/__strtonum_fallback.h
+++ b/lib/libcxx/include/__support/xlocale/__strtonum_fallback.h
@@ -34,12 +34,4 @@ inline _LIBCPP_HIDE_FROM_ABI long double strtold_l(const char* __nptr, char** __
   return ::strtold(__nptr, __endptr);
 }
 
-inline _LIBCPP_HIDE_FROM_ABI long long strtoll_l(const char* __nptr, char** __endptr, int __base, locale_t) {
-  return ::strtoll(__nptr, __endptr, __base);
-}
-
-inline _LIBCPP_HIDE_FROM_ABI unsigned long long strtoull_l(const char* __nptr, char** __endptr, int __base, locale_t) {
-  return ::strtoull(__nptr, __endptr, __base);
-}
-
 #endif // _LIBCPP___SUPPORT_XLOCALE_STRTONUM_FALLBACK_H
diff --git a/lib/libcxx/include/__system_error/error_category.h b/lib/libcxx/include/__system_error/error_category.h
index 7233e22110..7f7c7355c7 100644
--- a/lib/libcxx/include/__system_error/error_category.h
+++ b/lib/libcxx/include/__system_error/error_category.h
@@ -20,7 +20,7 @@
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-class _LIBCPP_EXPORTED_FROM_ABI error_condition;
+class error_condition;
 class _LIBCPP_EXPORTED_FROM_ABI error_code;
 
 class _LIBCPP_HIDDEN __do_message;
@@ -37,11 +37,11 @@ public:
   error_category(const error_category&)            = delete;
   error_category& operator=(const error_category&) = delete;
 
-  virtual const char* name() const _NOEXCEPT = 0;
-  virtual error_condition default_error_condition(int __ev) const _NOEXCEPT;
-  virtual bool equivalent(int __code, const error_condition& __condition) const _NOEXCEPT;
-  virtual bool equivalent(const error_code& __code, int __condition) const _NOEXCEPT;
-  virtual string message(int __ev) const = 0;
+  [[__nodiscard__]] virtual const char* name() const _NOEXCEPT = 0;
+  [[__nodiscard__]] virtual error_condition default_error_condition(int __ev) const _NOEXCEPT;
+  [[__nodiscard__]] virtual bool equivalent(int __code, const error_condition& __condition) const _NOEXCEPT;
+  [[__nodiscard__]] virtual bool equivalent(const error_code& __code, int __condition) const _NOEXCEPT;
+  [[__nodiscard__]] virtual string message(int __ev) const = 0;
 
   _LIBCPP_HIDE_FROM_ABI bool operator==(const error_category& __rhs) const _NOEXCEPT { return this == &__rhs; }
 
@@ -67,8 +67,8 @@ public:
   string message(int __ev) const override;
 };
 
-[[__gnu__::__const__]] _LIBCPP_EXPORTED_FROM_ABI const error_category& generic_category() _NOEXCEPT;
-[[__gnu__::__const__]] _LIBCPP_EXPORTED_FROM_ABI const error_category& system_category() _NOEXCEPT;
+[[__gnu__::__const__]] [[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI const error_category& generic_category() _NOEXCEPT;
+[[__gnu__::__const__]] [[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI const error_category& system_category() _NOEXCEPT;
 
 _LIBCPP_END_NAMESPACE_STD
 
diff --git a/lib/libcxx/include/__system_error/error_code.h b/lib/libcxx/include/__system_error/error_code.h
index f6ea40d6ef..e904376939 100644
--- a/lib/libcxx/include/__system_error/error_code.h
+++ b/lib/libcxx/include/__system_error/error_code.h
@@ -71,20 +71,20 @@ public:
     __cat_ = &system_category();
   }
 
-  _LIBCPP_HIDE_FROM_ABI int value() const _NOEXCEPT { return __val_; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI int value() const _NOEXCEPT { return __val_; }
 
-  _LIBCPP_HIDE_FROM_ABI const error_category& category() const _NOEXCEPT { return *__cat_; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const error_category& category() const _NOEXCEPT { return *__cat_; }
 
-  _LIBCPP_HIDE_FROM_ABI error_condition default_error_condition() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI error_condition default_error_condition() const _NOEXCEPT {
     return __cat_->default_error_condition(__val_);
   }
 
-  string message() const;
+  [[__nodiscard__]] string message() const;
 
   _LIBCPP_HIDE_FROM_ABI explicit operator bool() const _NOEXCEPT { return __val_ != 0; }
 };
 
-inline _LIBCPP_HIDE_FROM_ABI error_code make_error_code(errc __e) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI error_code make_error_code(errc __e) _NOEXCEPT {
   return error_code(static_cast<int>(__e), generic_category());
 }
 
diff --git a/lib/libcxx/include/__system_error/error_condition.h b/lib/libcxx/include/__system_error/error_condition.h
index 34819f4b6d..be7deaba04 100644
--- a/lib/libcxx/include/__system_error/error_condition.h
+++ b/lib/libcxx/include/__system_error/error_condition.h
@@ -80,15 +80,15 @@ public:
     __cat_ = &generic_category();
   }
 
-  _LIBCPP_HIDE_FROM_ABI int value() const _NOEXCEPT { return __val_; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI int value() const _NOEXCEPT { return __val_; }
 
-  _LIBCPP_HIDE_FROM_ABI const error_category& category() const _NOEXCEPT { return *__cat_; }
-  string message() const;
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const error_category& category() const _NOEXCEPT { return *__cat_; }
+  [[__nodiscard__]] string message() const;
 
   _LIBCPP_HIDE_FROM_ABI explicit operator bool() const _NOEXCEPT { return __val_ != 0; }
 };
 
-inline _LIBCPP_HIDE_FROM_ABI error_condition make_error_condition(errc __e) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI error_condition make_error_condition(errc __e) _NOEXCEPT {
   return error_condition(static_cast<int>(__e), generic_category());
 }
 
diff --git a/lib/libcxx/include/__system_error/system_error.h b/lib/libcxx/include/__system_error/system_error.h
index 36ccf94cc0..74427d8f0b 100644
--- a/lib/libcxx/include/__system_error/system_error.h
+++ b/lib/libcxx/include/__system_error/system_error.h
@@ -36,7 +36,7 @@ public:
   _LIBCPP_HIDE_FROM_ABI system_error(const system_error&) _NOEXCEPT = default;
   ~system_error() _NOEXCEPT override;
 
-  _LIBCPP_HIDE_FROM_ABI const error_code& code() const _NOEXCEPT { return __ec_; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const error_code& code() const _NOEXCEPT { return __ec_; }
 };
 
 // __ev is expected to be an error in the generic_category domain (e.g. from
diff --git a/lib/libcxx/include/__thread/id.h b/lib/libcxx/include/__thread/id.h
index c9c86c80c8..14a51fc9ee 100644
--- a/lib/libcxx/include/__thread/id.h
+++ b/lib/libcxx/include/__thread/id.h
@@ -23,7 +23,7 @@
 _LIBCPP_BEGIN_NAMESPACE_STD
 
 #if _LIBCPP_HAS_THREADS
-class _LIBCPP_EXPORTED_FROM_ABI __thread_id;
+class __thread_id;
 
 namespace this_thread {
 
diff --git a/lib/libcxx/include/__thread/jthread.h b/lib/libcxx/include/__thread/jthread.h
index 7289b835d3..481ffe296c 100644
--- a/lib/libcxx/include/__thread/jthread.h
+++ b/lib/libcxx/include/__thread/jthread.h
@@ -36,7 +36,7 @@ _LIBCPP_PUSH_MACROS
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-class _LIBCPP_AVAILABILITY_SYNC jthread {
+class jthread {
 public:
   // types
   using id                 = thread::id;
diff --git a/lib/libcxx/include/__thread/poll_with_backoff.h b/lib/libcxx/include/__thread/poll_with_backoff.h
index 4f961fe3f7..e007e7746c 100644
--- a/lib/libcxx/include/__thread/poll_with_backoff.h
+++ b/lib/libcxx/include/__thread/poll_with_backoff.h
@@ -22,33 +22,50 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 static _LIBCPP_CONSTEXPR const int __libcpp_polling_count = 64;
 
+enum class __backoff_results : unsigned char {
+  __continue_poll   = 1,
+  __poll_success    = 2,
+  __timeout         = 3,
+  __backoff_failure = 4,
+};
+
+enum class __poll_with_backoff_results : unsigned char {
+  __poll_success    = static_cast<unsigned char>(__backoff_results::__poll_success),
+  __timeout         = static_cast<unsigned char>(__backoff_results::__timeout),
+  __backoff_failure = static_cast<unsigned char>(__backoff_results::__backoff_failure),
+};
+
 // Polls a thread for a condition given by a predicate, and backs off based on a backoff policy
 // before polling again.
 //
 // - __poll is the "test function" that should return true if polling succeeded, and false if it failed.
 //
 // - __backoff is the "backoff policy", which is called with the duration since we started polling. It should
-//   return false in order to resume polling, and true if polling should stop entirely for some reason.
+//   return  __backoff_results::__continue_poll in order to resume polling, and other appropriate  __backoff_results
+//   if polling should stop entirely for some reason.
 //   In general, backoff policies sleep for some time before returning control to the polling loop.
 //
 // - __max_elapsed is the maximum duration to try polling for. If the maximum duration is exceeded,
-//   the polling loop will return false to report a timeout.
+//   the polling loop will return __poll_with_backoff_results::__timeout to report a timeout.
+
 template <class _Poll, class _Backoff>
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI bool __libcpp_thread_poll_with_backoff(
+_LIBCPP_HIDE_FROM_ABI __poll_with_backoff_results __libcpp_thread_poll_with_backoff(
     _Poll&& __poll, _Backoff&& __backoff, chrono::nanoseconds __max_elapsed = chrono::nanoseconds::zero()) {
   auto const __start = chrono::high_resolution_clock::now();
   for (int __count = 0;;) {
     if (__poll())
-      return true; // __poll completion means success
+      return __poll_with_backoff_results::__poll_success;
     if (__count < __libcpp_polling_count) {
       __count += 1;
       continue;
     }
     chrono::nanoseconds const __elapsed = chrono::high_resolution_clock::now() - __start;
     if (__max_elapsed != chrono::nanoseconds::zero() && __max_elapsed < __elapsed)
-      return false; // timeout failure
-    if (__backoff(__elapsed))
-      return false; // __backoff completion means failure
+      return __poll_with_backoff_results::__timeout;
+    if (auto __backoff_res = __backoff(__elapsed); __backoff_res == __backoff_results::__continue_poll)
+      continue;
+    else
+      return static_cast<__poll_with_backoff_results>(__backoff_res);
   }
 }
 
@@ -59,7 +76,9 @@ _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI bool __libcpp_thread_poll_with_b
 // so this should most likely only be used on single-threaded systems where there
 // are no other threads to compete with.
 struct __spinning_backoff_policy {
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool operator()(chrono::nanoseconds const&) const { return false; }
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR __backoff_results operator()(chrono::nanoseconds const&) const {
+    return __backoff_results::__continue_poll;
+  }
 };
 
 _LIBCPP_END_NAMESPACE_STD
diff --git a/lib/libcxx/include/__thread/support/c11.h b/lib/libcxx/include/__thread/support/c11.h
index fe00a2d97f..463c8496ba 100644
--- a/lib/libcxx/include/__thread/support/c11.h
+++ b/lib/libcxx/include/__thread/support/c11.h
@@ -39,17 +39,17 @@ inline _LIBCPP_HIDE_FROM_ABI int __libcpp_recursive_mutex_init(__libcpp_recursiv
   return mtx_init(__m, mtx_plain | mtx_recursive) == thrd_success ? 0 : EINVAL;
 }
 
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_NO_THREAD_SAFETY_ANALYSIS int
+_LIBCPP_NO_THREAD_SAFETY_ANALYSIS inline _LIBCPP_HIDE_FROM_ABI int
 __libcpp_recursive_mutex_lock(__libcpp_recursive_mutex_t* __m) {
   return mtx_lock(__m) == thrd_success ? 0 : EINVAL;
 }
 
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_NO_THREAD_SAFETY_ANALYSIS bool
+_LIBCPP_NO_THREAD_SAFETY_ANALYSIS inline _LIBCPP_HIDE_FROM_ABI bool
 __libcpp_recursive_mutex_trylock(__libcpp_recursive_mutex_t* __m) {
   return mtx_trylock(__m) == thrd_success;
 }
 
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_NO_THREAD_SAFETY_ANALYSIS int
+_LIBCPP_NO_THREAD_SAFETY_ANALYSIS inline _LIBCPP_HIDE_FROM_ABI int
 __libcpp_recursive_mutex_unlock(__libcpp_recursive_mutex_t* __m) {
   return mtx_unlock(__m) == thrd_success ? 0 : EINVAL;
 }
@@ -59,15 +59,15 @@ inline _LIBCPP_HIDE_FROM_ABI int __libcpp_recursive_mutex_destroy(__libcpp_recur
   return 0;
 }
 
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_NO_THREAD_SAFETY_ANALYSIS int __libcpp_mutex_lock(__libcpp_mutex_t* __m) {
+_LIBCPP_NO_THREAD_SAFETY_ANALYSIS inline _LIBCPP_HIDE_FROM_ABI int __libcpp_mutex_lock(__libcpp_mutex_t* __m) {
   return mtx_lock(__m) == thrd_success ? 0 : EINVAL;
 }
 
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_NO_THREAD_SAFETY_ANALYSIS bool __libcpp_mutex_trylock(__libcpp_mutex_t* __m) {
+_LIBCPP_NO_THREAD_SAFETY_ANALYSIS inline _LIBCPP_HIDE_FROM_ABI bool __libcpp_mutex_trylock(__libcpp_mutex_t* __m) {
   return mtx_trylock(__m) == thrd_success;
 }
 
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_NO_THREAD_SAFETY_ANALYSIS int __libcpp_mutex_unlock(__libcpp_mutex_t* __m) {
+_LIBCPP_NO_THREAD_SAFETY_ANALYSIS inline _LIBCPP_HIDE_FROM_ABI int __libcpp_mutex_unlock(__libcpp_mutex_t* __m) {
   return mtx_unlock(__m) == thrd_success ? 0 : EINVAL;
 }
 
@@ -92,12 +92,12 @@ inline _LIBCPP_HIDE_FROM_ABI int __libcpp_condvar_broadcast(__libcpp_condvar_t*
   return cnd_broadcast(__cv) == thrd_success ? 0 : EINVAL;
 }
 
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_NO_THREAD_SAFETY_ANALYSIS int
+_LIBCPP_NO_THREAD_SAFETY_ANALYSIS inline _LIBCPP_HIDE_FROM_ABI int
 __libcpp_condvar_wait(__libcpp_condvar_t* __cv, __libcpp_mutex_t* __m) {
   return cnd_wait(__cv, __m) == thrd_success ? 0 : EINVAL;
 }
 
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_NO_THREAD_SAFETY_ANALYSIS int
+_LIBCPP_NO_THREAD_SAFETY_ANALYSIS inline _LIBCPP_HIDE_FROM_ABI int
 __libcpp_condvar_timedwait(__libcpp_condvar_t* __cv, __libcpp_mutex_t* __m, timespec* __ts) {
   int __ec = cnd_timedwait(__cv, __m, __ts);
   return __ec == thrd_timedout ? ETIMEDOUT : __ec;
diff --git a/lib/libcxx/include/__thread/support/pthread.h b/lib/libcxx/include/__thread/support/pthread.h
index 14e92079da..4cf5c03424 100644
--- a/lib/libcxx/include/__thread/support/pthread.h
+++ b/lib/libcxx/include/__thread/support/pthread.h
@@ -72,17 +72,17 @@ inline _LIBCPP_HIDE_FROM_ABI int __libcpp_recursive_mutex_init(__libcpp_recursiv
   return 0;
 }
 
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_NO_THREAD_SAFETY_ANALYSIS int
+_LIBCPP_NO_THREAD_SAFETY_ANALYSIS inline _LIBCPP_HIDE_FROM_ABI int
 __libcpp_recursive_mutex_lock(__libcpp_recursive_mutex_t* __m) {
   return pthread_mutex_lock(__m);
 }
 
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_NO_THREAD_SAFETY_ANALYSIS bool
+_LIBCPP_NO_THREAD_SAFETY_ANALYSIS inline _LIBCPP_HIDE_FROM_ABI bool
 __libcpp_recursive_mutex_trylock(__libcpp_recursive_mutex_t* __m) {
   return pthread_mutex_trylock(__m) == 0;
 }
 
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_NO_THREAD_SAFETY_ANALYSIS int
+_LIBCPP_NO_THREAD_SAFETY_ANALYSIS inline _LIBCPP_HIDE_FROM_ABI int
 __libcpp_recursive_mutex_unlock(__libcpp_recursive_mutex_t* __m) {
   return pthread_mutex_unlock(__m);
 }
@@ -91,15 +91,15 @@ inline _LIBCPP_HIDE_FROM_ABI int __libcpp_recursive_mutex_destroy(__libcpp_recur
   return pthread_mutex_destroy(__m);
 }
 
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_NO_THREAD_SAFETY_ANALYSIS int __libcpp_mutex_lock(__libcpp_mutex_t* __m) {
+_LIBCPP_NO_THREAD_SAFETY_ANALYSIS inline _LIBCPP_HIDE_FROM_ABI int __libcpp_mutex_lock(__libcpp_mutex_t* __m) {
   return pthread_mutex_lock(__m);
 }
 
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_NO_THREAD_SAFETY_ANALYSIS bool __libcpp_mutex_trylock(__libcpp_mutex_t* __m) {
+_LIBCPP_NO_THREAD_SAFETY_ANALYSIS inline _LIBCPP_HIDE_FROM_ABI bool __libcpp_mutex_trylock(__libcpp_mutex_t* __m) {
   return pthread_mutex_trylock(__m) == 0;
 }
 
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_NO_THREAD_SAFETY_ANALYSIS int __libcpp_mutex_unlock(__libcpp_mutex_t* __m) {
+_LIBCPP_NO_THREAD_SAFETY_ANALYSIS inline _LIBCPP_HIDE_FROM_ABI int __libcpp_mutex_unlock(__libcpp_mutex_t* __m) {
   return pthread_mutex_unlock(__m);
 }
 
@@ -117,12 +117,12 @@ inline _LIBCPP_HIDE_FROM_ABI int __libcpp_condvar_broadcast(__libcpp_condvar_t*
   return pthread_cond_broadcast(__cv);
 }
 
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_NO_THREAD_SAFETY_ANALYSIS int
+_LIBCPP_NO_THREAD_SAFETY_ANALYSIS inline _LIBCPP_HIDE_FROM_ABI int
 __libcpp_condvar_wait(__libcpp_condvar_t* __cv, __libcpp_mutex_t* __m) {
   return pthread_cond_wait(__cv, __m);
 }
 
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_NO_THREAD_SAFETY_ANALYSIS int
+_LIBCPP_NO_THREAD_SAFETY_ANALYSIS inline _LIBCPP_HIDE_FROM_ABI int
 __libcpp_condvar_timedwait(__libcpp_condvar_t* __cv, __libcpp_mutex_t* __m, __libcpp_timespec_t* __ts) {
   return pthread_cond_timedwait(__cv, __m, __ts);
 }
diff --git a/lib/libcxx/include/__thread/support/windows.h b/lib/libcxx/include/__thread/support/windows.h
index 2921ed900e..558b5c81dc 100644
--- a/lib/libcxx/include/__thread/support/windows.h
+++ b/lib/libcxx/include/__thread/support/windows.h
@@ -36,22 +36,22 @@ typedef void* __libcpp_recursive_mutex_t[6];
 
 _LIBCPP_EXPORTED_FROM_ABI int __libcpp_recursive_mutex_init(__libcpp_recursive_mutex_t* __m);
 
-_LIBCPP_EXPORTED_FROM_ABI _LIBCPP_NO_THREAD_SAFETY_ANALYSIS int
+_LIBCPP_NO_THREAD_SAFETY_ANALYSIS _LIBCPP_EXPORTED_FROM_ABI int
 __libcpp_recursive_mutex_lock(__libcpp_recursive_mutex_t* __m);
 
-_LIBCPP_EXPORTED_FROM_ABI _LIBCPP_NO_THREAD_SAFETY_ANALYSIS bool
+_LIBCPP_NO_THREAD_SAFETY_ANALYSIS _LIBCPP_EXPORTED_FROM_ABI bool
 __libcpp_recursive_mutex_trylock(__libcpp_recursive_mutex_t* __m);
 
-_LIBCPP_EXPORTED_FROM_ABI _LIBCPP_NO_THREAD_SAFETY_ANALYSIS int
+_LIBCPP_NO_THREAD_SAFETY_ANALYSIS _LIBCPP_EXPORTED_FROM_ABI int
 __libcpp_recursive_mutex_unlock(__libcpp_recursive_mutex_t* __m);
 
 _LIBCPP_EXPORTED_FROM_ABI int __libcpp_recursive_mutex_destroy(__libcpp_recursive_mutex_t* __m);
 
-_LIBCPP_EXPORTED_FROM_ABI _LIBCPP_NO_THREAD_SAFETY_ANALYSIS int __libcpp_mutex_lock(__libcpp_mutex_t* __m);
+_LIBCPP_NO_THREAD_SAFETY_ANALYSIS _LIBCPP_EXPORTED_FROM_ABI int __libcpp_mutex_lock(__libcpp_mutex_t* __m);
 
-_LIBCPP_EXPORTED_FROM_ABI _LIBCPP_NO_THREAD_SAFETY_ANALYSIS bool __libcpp_mutex_trylock(__libcpp_mutex_t* __m);
+_LIBCPP_NO_THREAD_SAFETY_ANALYSIS _LIBCPP_EXPORTED_FROM_ABI bool __libcpp_mutex_trylock(__libcpp_mutex_t* __m);
 
-_LIBCPP_EXPORTED_FROM_ABI _LIBCPP_NO_THREAD_SAFETY_ANALYSIS int __libcpp_mutex_unlock(__libcpp_mutex_t* __m);
+_LIBCPP_NO_THREAD_SAFETY_ANALYSIS _LIBCPP_EXPORTED_FROM_ABI int __libcpp_mutex_unlock(__libcpp_mutex_t* __m);
 
 _LIBCPP_EXPORTED_FROM_ABI int __libcpp_mutex_destroy(__libcpp_mutex_t* __m);
 
@@ -65,10 +65,10 @@ _LIBCPP_EXPORTED_FROM_ABI int __libcpp_condvar_signal(__libcpp_condvar_t* __cv);
 
 _LIBCPP_EXPORTED_FROM_ABI int __libcpp_condvar_broadcast(__libcpp_condvar_t* __cv);
 
-_LIBCPP_EXPORTED_FROM_ABI _LIBCPP_NO_THREAD_SAFETY_ANALYSIS int
+_LIBCPP_NO_THREAD_SAFETY_ANALYSIS _LIBCPP_EXPORTED_FROM_ABI int
 __libcpp_condvar_wait(__libcpp_condvar_t* __cv, __libcpp_mutex_t* __m);
 
-_LIBCPP_EXPORTED_FROM_ABI _LIBCPP_NO_THREAD_SAFETY_ANALYSIS int
+_LIBCPP_NO_THREAD_SAFETY_ANALYSIS _LIBCPP_EXPORTED_FROM_ABI int
 __libcpp_condvar_timedwait(__libcpp_condvar_t* __cv, __libcpp_mutex_t* __m, __libcpp_timespec_t* __ts);
 
 _LIBCPP_EXPORTED_FROM_ABI int __libcpp_condvar_destroy(__libcpp_condvar_t* __cv);
diff --git a/lib/libcxx/include/__thread/thread.h b/lib/libcxx/include/__thread/thread.h
index 1b51571ce3..b2f51aa816 100644
--- a/lib/libcxx/include/__thread/thread.h
+++ b/lib/libcxx/include/__thread/thread.h
@@ -25,6 +25,8 @@
 #include <__thread/support.h>
 #include <__type_traits/decay.h>
 #include <__type_traits/enable_if.h>
+#include <__type_traits/invoke.h>
+#include <__type_traits/is_constructible.h>
 #include <__type_traits/is_same.h>
 #include <__type_traits/remove_cvref.h>
 #include <__utility/forward.h>
@@ -155,8 +157,8 @@ operator<<(basic_ostream<_CharT, _Traits>& __os, __thread_id __id) {
 #  ifndef _LIBCPP_CXX03_LANG
 
 template <class _TSp, class _Fp, class... _Args, size_t... _Indices>
-inline _LIBCPP_HIDE_FROM_ABI void __thread_execute(tuple<_TSp, _Fp, _Args...>& __t, __tuple_indices<_Indices...>) {
-  std::__invoke(std::move(std::get<1>(__t)), std::move(std::get<_Indices>(__t))...);
+inline _LIBCPP_HIDE_FROM_ABI void __thread_execute(tuple<_TSp, _Fp, _Args...>& __t, __index_sequence<_Indices...>) {
+  std::__invoke(std::move(std::get<_Indices + 1>(__t))...);
 }
 
 template <class _Fp>
@@ -164,8 +166,7 @@ _LIBCPP_HIDE_FROM_ABI void* __thread_proxy(void* __vp) {
   // _Fp = tuple< unique_ptr<__thread_struct>, Functor, Args...>
   unique_ptr<_Fp> __p(static_cast<_Fp*>(__vp));
   __thread_local_data().set_pointer(std::get<0>(*__p.get()).release());
-  typedef typename __make_tuple_indices<tuple_size<_Fp>::value, 2>::type _Index;
-  std::__thread_execute(*__p.get(), _Index());
+  std::__thread_execute(*__p.get(), __make_index_sequence<tuple_size<_Fp>::value - 1>());
   return nullptr;
 }
 
@@ -206,6 +207,10 @@ public:
 #  ifndef _LIBCPP_CXX03_LANG
   template <class _Fp, class... _Args, __enable_if_t<!is_same<__remove_cvref_t<_Fp>, thread>::value, int> = 0>
   _LIBCPP_HIDE_FROM_ABI explicit thread(_Fp&& __f, _Args&&... __args) {
+    static_assert(is_constructible<__decay_t<_Fp>, _Fp>::value, "");
+    static_assert(_And<is_constructible<__decay_t<_Args>, _Args>...>::value, "");
+    static_assert(__is_invocable_v<__decay_t<_Fp>, __decay_t<_Args>...>, "");
+
     typedef unique_ptr<__thread_struct> _TSPtr;
     _TSPtr __tsp(new __thread_struct);
     typedef tuple<_TSPtr, __decay_t<_Fp>, __decay_t<_Args>...> _Gp;
@@ -243,13 +248,13 @@ public:
 
   _LIBCPP_HIDE_FROM_ABI void swap(thread& __t) _NOEXCEPT { std::swap(__t_, __t.__t_); }
 
-  _LIBCPP_HIDE_FROM_ABI bool joinable() const _NOEXCEPT { return !__libcpp_thread_isnull(&__t_); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool joinable() const _NOEXCEPT { return !__libcpp_thread_isnull(&__t_); }
   void join();
   void detach();
-  _LIBCPP_HIDE_FROM_ABI id get_id() const _NOEXCEPT { return __libcpp_thread_get_id(&__t_); }
-  _LIBCPP_HIDE_FROM_ABI native_handle_type native_handle() _NOEXCEPT { return __t_; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI id get_id() const _NOEXCEPT { return __libcpp_thread_get_id(&__t_); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI native_handle_type native_handle() _NOEXCEPT { return __t_; }
 
-  static unsigned hardware_concurrency() _NOEXCEPT;
+  [[__nodiscard__]] static unsigned hardware_concurrency() _NOEXCEPT;
 };
 
 inline _LIBCPP_HIDE_FROM_ABI void swap(thread& __x, thread& __y) _NOEXCEPT { __x.swap(__y); }
diff --git a/lib/libcxx/include/__thread/timed_backoff_policy.h b/lib/libcxx/include/__thread/timed_backoff_policy.h
index 35a72eb61f..01fe2dd045 100644
--- a/lib/libcxx/include/__thread/timed_backoff_policy.h
+++ b/lib/libcxx/include/__thread/timed_backoff_policy.h
@@ -11,6 +11,7 @@
 #define _LIBCPP___THREAD_TIMED_BACKOFF_POLICY_H
 
 #include <__config>
+#include <__thread/poll_with_backoff.h>
 
 #if _LIBCPP_HAS_THREADS
 
@@ -24,7 +25,7 @@
 _LIBCPP_BEGIN_NAMESPACE_STD
 
 struct __libcpp_timed_backoff_policy {
-  _LIBCPP_HIDE_FROM_ABI bool operator()(chrono::nanoseconds __elapsed) const {
+  _LIBCPP_HIDE_FROM_ABI __backoff_results operator()(chrono::nanoseconds __elapsed) const {
     if (__elapsed > chrono::milliseconds(128))
       __libcpp_thread_sleep_for(chrono::milliseconds(8));
     else if (__elapsed > chrono::microseconds(64))
@@ -33,7 +34,7 @@ struct __libcpp_timed_backoff_policy {
       __libcpp_thread_yield();
     else {
     } // poll
-    return false;
+    return __backoff_results::__continue_poll;
   }
 };
 
diff --git a/lib/libcxx/include/__tree b/lib/libcxx/include/__tree
index b3c0ece8e5..eb17f7d369 100644
--- a/lib/libcxx/include/__tree
+++ b/lib/libcxx/include/__tree
@@ -11,37 +11,39 @@
 #define _LIBCPP___TREE
 
 #include <__algorithm/min.h>
+#include <__algorithm/specialized_algorithms.h>
 #include <__assert>
 #include <__config>
-#include <__fwd/map.h>
 #include <__fwd/pair.h>
-#include <__fwd/set.h>
 #include <__iterator/distance.h>
 #include <__iterator/iterator_traits.h>
 #include <__iterator/next.h>
 #include <__memory/addressof.h>
 #include <__memory/allocator_traits.h>
 #include <__memory/compressed_pair.h>
+#include <__memory/construct_at.h>
 #include <__memory/pointer_traits.h>
 #include <__memory/swap_allocator.h>
 #include <__memory/unique_ptr.h>
-#include <__type_traits/can_extract_key.h>
+#include <__new/launder.h>
 #include <__type_traits/copy_cvref.h>
 #include <__type_traits/enable_if.h>
 #include <__type_traits/invoke.h>
-#include <__type_traits/is_const.h>
 #include <__type_traits/is_constructible.h>
 #include <__type_traits/is_nothrow_assignable.h>
 #include <__type_traits/is_nothrow_constructible.h>
 #include <__type_traits/is_same.h>
+#include <__type_traits/is_specialization.h>
 #include <__type_traits/is_swappable.h>
+#include <__type_traits/make_transparent.h>
 #include <__type_traits/remove_const.h>
-#include <__type_traits/remove_const_ref.h>
 #include <__type_traits/remove_cvref.h>
 #include <__utility/forward.h>
+#include <__utility/lazy_synth_three_way_comparator.h>
 #include <__utility/move.h>
 #include <__utility/pair.h>
 #include <__utility/swap.h>
+#include <__utility/try_key_extraction.h>
 #include <limits>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -51,14 +53,31 @@
 _LIBCPP_PUSH_MACROS
 #include <__undef_macros>
 
-_LIBCPP_BEGIN_NAMESPACE_STD
+_LIBCPP_DIAGNOSTIC_PUSH
+// GCC complains about the backslashes at the end, see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=121528
+_LIBCPP_GCC_DIAGNOSTIC_IGNORED("-Wcomment")
+// __tree is a red-black-tree implementation used for the associative containers (i.e. (multi)map/set). It stores
+// - (1) a pointer to the node with the smallest (i.e. leftmost) element, namely __begin_node_
+// - (2) the number of nodes in the tree, namely __size_
+// - (3) a pointer to the root of the tree, namely __end_node_
+//
+// Storing (1) and (2) is required to allow for constant time lookups. A tree looks like this in memory:
+//
+//      __end_node_
+//           |
+//          root
+//         /    \
+//       l1       r1
+//      /  \     /  \
+//    ...  ... ...  ...
+//
+// All nodes except __end_node_ have a __left_ and __right_ pointer as well as a __parent_ pointer.
+// __end_node_ only contains a __left_ pointer, which points to the root of the tree.
+// This layout allows for iteration through the tree without a need for special handling of the end node. See
+// __tree_next_iter and __tree_prev_iter for more details.
+_LIBCPP_DIAGNOSTIC_POP
 
-template <class _Tp, class _Compare, class _Allocator>
-class __tree;
-template <class _Tp, class _NodePtr, class _DiffType>
-class __tree_iterator;
-template <class _Tp, class _ConstNodePtr, class _DiffType>
-class __tree_const_iterator;
+_LIBCPP_BEGIN_NAMESPACE_STD
 
 template <class _Pointer>
 class __tree_end_node;
@@ -70,13 +89,6 @@ class __tree_node;
 template <class _Key, class _Value>
 struct __value_type;
 
-template <class _Allocator>
-class __map_node_destructor;
-template <class _TreeIterator>
-class __map_iterator;
-template <class _TreeIterator>
-class __map_const_iterator;
-
 /*
 
 _NodePtr algorithms
@@ -185,6 +197,11 @@ _LIBCPP_HIDE_FROM_ABI _NodePtr __tree_next(_NodePtr __x) _NOEXCEPT {
   return __x->__parent_unsafe();
 }
 
+// __tree_next_iter and __tree_prev_iter implement iteration through the tree. The order is as follows:
+// left sub-tree -> node -> right sub-tree. When the right-most node of a sub-tree is reached, we walk up the tree until
+// we find a node where we were in the left sub-tree. We are _always_ in a left sub-tree, since the __end_node_ points
+// to the actual root of the tree through a __left_ pointer. Incrementing the end() pointer is UB, so we can assume that
+// never happens.
 template <class _EndNodePtr, class _NodePtr>
 inline _LIBCPP_HIDE_FROM_ABI _EndNodePtr __tree_next_iter(_NodePtr __x) _NOEXCEPT {
   _LIBCPP_ASSERT_INTERNAL(__x != nullptr, "node shouldn't be null");
@@ -494,16 +511,7 @@ _LIBCPP_HIDE_FROM_ABI void __tree_remove(_NodePtr __root, _NodePtr __z) _NOEXCEP
 // node traits
 
 template <class _Tp>
-struct __is_tree_value_type_imp : false_type {};
-
-template <class _Key, class _Value>
-struct __is_tree_value_type_imp<__value_type<_Key, _Value> > : true_type {};
-
-template <class... _Args>
-struct __is_tree_value_type : false_type {};
-
-template <class _One>
-struct __is_tree_value_type<_One> : __is_tree_value_type_imp<__remove_cvref_t<_One> > {};
+inline const bool __is_tree_value_type_v = __is_specialization_v<_Tp, __value_type>;
 
 template <class _Tp>
 struct __get_tree_key_type {
@@ -549,15 +557,14 @@ private:
 template <class _Pointer>
 class __tree_end_node {
 public:
-  typedef _Pointer pointer;
+  using pointer = _Pointer;
   pointer __left_;
 
   _LIBCPP_HIDE_FROM_ABI __tree_end_node() _NOEXCEPT : __left_() {}
 };
 
 template <class _VoidPtr>
-class _LIBCPP_STANDALONE_DEBUG
-__tree_node_base : public __tree_end_node<__rebind_pointer_t<_VoidPtr, __tree_node_base<_VoidPtr> > > {
+class __tree_node_base : public __tree_end_node<__rebind_pointer_t<_VoidPtr, __tree_node_base<_VoidPtr> > > {
 public:
   using pointer                            = __rebind_pointer_t<_VoidPtr, __tree_node_base>;
   using __end_node_pointer _LIBCPP_NODEBUG = __rebind_pointer_t<_VoidPtr, __tree_end_node<pointer> >;
@@ -570,20 +577,41 @@ public:
 
   _LIBCPP_HIDE_FROM_ABI void __set_parent(pointer __p) { __parent_ = static_cast<__end_node_pointer>(__p); }
 
-  ~__tree_node_base()                                  = delete;
+  _LIBCPP_HIDE_FROM_ABI __tree_node_base()             = default;
   __tree_node_base(__tree_node_base const&)            = delete;
   __tree_node_base& operator=(__tree_node_base const&) = delete;
 };
 
 template <class _Tp, class _VoidPtr>
-class _LIBCPP_STANDALONE_DEBUG __tree_node : public __tree_node_base<_VoidPtr> {
+class __tree_node : public __tree_node_base<_VoidPtr> {
 public:
   using __node_value_type _LIBCPP_NODEBUG = __get_node_value_type_t<_Tp>;
 
-  __node_value_type __value_;
+// We use a union to avoid initialization during member initialization, which allows us
+// to use the allocator from the container to construct the `__node_value_type` in the
+// memory provided by the union member
+#ifndef _LIBCPP_CXX03_LANG
 
+private:
+  union {
+    __node_value_type __value_;
+  };
+
+public:
   _LIBCPP_HIDE_FROM_ABI __node_value_type& __get_value() { return __value_; }
+#else
 
+private:
+  _ALIGNAS_TYPE(__node_value_type) unsigned char __buffer_[sizeof(__node_value_type)];
+
+public:
+  _LIBCPP_HIDE_FROM_ABI __node_value_type& __get_value() { return *reinterpret_cast<__node_value_type*>(__buffer_); }
+#endif
+
+  template <class _Alloc, class... _Args>
+  _LIBCPP_HIDE_FROM_ABI explicit __tree_node(_Alloc& __na, _Args&&... __args) {
+    allocator_traits<_Alloc>::construct(__na, std::addressof(__get_value()), std::forward<_Args>(__args)...);
+  }
   ~__tree_node()                             = delete;
   __tree_node(__tree_node const&)            = delete;
   __tree_node& operator=(__tree_node const&) = delete;
@@ -591,11 +619,11 @@ public:
 
 template <class _Allocator>
 class __tree_node_destructor {
-  typedef _Allocator allocator_type;
-  typedef allocator_traits<allocator_type> __alloc_traits;
+  using allocator_type                 = _Allocator;
+  using __alloc_traits _LIBCPP_NODEBUG = allocator_traits<allocator_type>;
 
 public:
-  typedef typename __alloc_traits::pointer pointer;
+  using pointer = typename __alloc_traits::pointer;
 
 private:
   allocator_type& __na_;
@@ -612,7 +640,7 @@ public:
 
   _LIBCPP_HIDE_FROM_ABI void operator()(pointer __p) _NOEXCEPT {
     if (__value_constructed)
-      __alloc_traits::destroy(__na_, std::addressof(__p->__value_));
+      __alloc_traits::destroy(__na_, std::addressof(__p->__get_value()));
     if (__p)
       __alloc_traits::deallocate(__na_, __p, 1);
   }
@@ -630,12 +658,60 @@ struct __generic_container_node_destructor<__tree_node<_Tp, _VoidPtr>, _Alloc> :
 };
 #endif
 
+// Do an in-order traversal of the tree until `__break` returns true. Takes the root node of the tree.
+template <class _Reference, class _Break, class _NodePtr, class _Func, class _Proj>
+#ifndef _LIBCPP_COMPILER_GCC // This function is recursive, so GCC complains about always_inline.
+_LIBCPP_HIDE_FROM_ABI
+#endif
+bool __tree_iterate_from_root(_Break __break, _NodePtr __root, _Func& __func, _Proj& __proj) {
+  if (__root->__left_) {
+    if (std::__tree_iterate_from_root<_Reference>(__break, static_cast<_NodePtr>(__root->__left_), __func, __proj))
+      return true;
+  }
+  if (__break(__root))
+    return true;
+  std::__invoke(__func, std::__invoke(__proj, static_cast<_Reference>(__root->__get_value())));
+  if (__root->__right_)
+    return std::__tree_iterate_from_root<_Reference>(__break, static_cast<_NodePtr>(__root->__right_), __func, __proj);
+  return false;
+}
+
+// Do an in-order traversal of the tree from __first to __last.
+template <class _NodeIter, class _Func, class _Proj>
+_LIBCPP_HIDE_FROM_ABI void
+__tree_iterate_subrange(_NodeIter __first_it, _NodeIter __last_it, _Func& __func, _Proj& __proj) {
+  using _NodePtr   = typename _NodeIter::__node_pointer;
+  using _Reference = typename _NodeIter::reference;
+
+  auto __first = __first_it.__ptr_;
+  auto __last  = __last_it.__ptr_;
+
+  while (true) {
+    if (__first == __last)
+      return;
+    const auto __nfirst = static_cast<_NodePtr>(__first);
+    std::__invoke(__func, std::__invoke(__proj, static_cast<_Reference>(__nfirst->__get_value())));
+    if (__nfirst->__right_) {
+      if (std::__tree_iterate_from_root<_Reference>(
+              [&](_NodePtr __node) -> bool { return __node == __last; },
+              static_cast<_NodePtr>(__nfirst->__right_),
+              __func,
+              __proj))
+        return;
+    }
+    while (!std::__tree_is_left_child(static_cast<_NodePtr>(__first)))
+      __first = static_cast<_NodePtr>(__first)->__parent_;
+    __first = static_cast<_NodePtr>(__first)->__parent_;
+  }
+}
+
 template <class _Tp, class _NodePtr, class _DiffType>
 class __tree_iterator {
-  typedef __tree_node_types<_NodePtr> _NodeTypes;
-  typedef _NodePtr __node_pointer;
-  typedef typename _NodeTypes::__node_base_pointer __node_base_pointer;
-  typedef typename _NodeTypes::__end_node_pointer __end_node_pointer;
+  using _NodeTypes _LIBCPP_NODEBUG = __tree_node_types<_NodePtr>;
+  // NOLINTNEXTLINE(libcpp-nodebug-on-aliases) lldb relies on this alias for pretty printing
+  using __node_pointer                      = _NodePtr;
+  using __node_base_pointer _LIBCPP_NODEBUG = typename _NodeTypes::__node_base_pointer;
+  using __end_node_pointer _LIBCPP_NODEBUG  = typename _NodeTypes::__end_node_pointer;
 
   __end_node_pointer __ptr_;
 
@@ -646,15 +722,12 @@ public:
   using reference         = value_type&;
   using pointer           = __rebind_pointer_t<_NodePtr, value_type>;
 
-  _LIBCPP_HIDE_FROM_ABI __tree_iterator() _NOEXCEPT
-#if _LIBCPP_STD_VER >= 14
-      : __ptr_(nullptr)
-#endif
-  {
-  }
+  _LIBCPP_HIDE_FROM_ABI __tree_iterator() _NOEXCEPT : __ptr_(nullptr) {}
 
-  _LIBCPP_HIDE_FROM_ABI reference operator*() const { return __get_np()->__value_; }
-  _LIBCPP_HIDE_FROM_ABI pointer operator->() const { return pointer_traits<pointer>::pointer_to(__get_np()->__value_); }
+  _LIBCPP_HIDE_FROM_ABI reference operator*() const { return __get_np()->__get_value(); }
+  _LIBCPP_HIDE_FROM_ABI pointer operator->() const {
+    return pointer_traits<pointer>::pointer_to(__get_np()->__get_value());
+  }
 
   _LIBCPP_HIDE_FROM_ABI __tree_iterator& operator++() {
     __ptr_ = std::__tree_next_iter<__end_node_pointer>(static_cast<__node_base_pointer>(__ptr_));
@@ -691,50 +764,54 @@ private:
   friend class __tree;
   template <class, class, class>
   friend class __tree_const_iterator;
-  template <class>
-  friend class __map_iterator;
-  template <class, class, class, class>
-  friend class map;
-  template <class, class, class, class>
-  friend class multimap;
-  template <class, class, class>
-  friend class set;
-  template <class, class, class>
-  friend class multiset;
+
+  template <class _NodeIter, class _Func, class _Proj>
+  friend void __tree_iterate_subrange(_NodeIter, _NodeIter, _Func&, _Proj&);
 };
 
+#ifndef _LIBCPP_CXX03_LANG
+// This also handles {multi,}set::iterator, since they're just aliases to __tree::iterator
+template <class _Tp, class _NodePtr, class _DiffType>
+struct __specialized_algorithm<
+    _Algorithm::__for_each,
+    __iterator_pair<__tree_iterator<_Tp, _NodePtr, _DiffType>, __tree_iterator<_Tp, _NodePtr, _DiffType>>> {
+  static const bool __has_algorithm = true;
+
+  using __iterator _LIBCPP_NODEBUG = __tree_iterator<_Tp, _NodePtr, _DiffType>;
+
+  template <class _Func, class _Proj>
+  _LIBCPP_HIDE_FROM_ABI static void operator()(__iterator __first, __iterator __last, _Func& __func, _Proj& __proj) {
+    std::__tree_iterate_subrange(__first, __last, __func, __proj);
+  }
+};
+#endif
+
 template <class _Tp, class _NodePtr, class _DiffType>
 class __tree_const_iterator {
-  typedef __tree_node_types<_NodePtr> _NodeTypes;
+  using _NodeTypes _LIBCPP_NODEBUG = __tree_node_types<_NodePtr>;
   // NOLINTNEXTLINE(libcpp-nodebug-on-aliases) lldb relies on this alias for pretty printing
-  using __node_pointer = _NodePtr;
-  typedef typename _NodeTypes::__node_base_pointer __node_base_pointer;
-  typedef typename _NodeTypes::__end_node_pointer __end_node_pointer;
+  using __node_pointer                      = _NodePtr;
+  using __node_base_pointer _LIBCPP_NODEBUG = typename _NodeTypes::__node_base_pointer;
+  using __end_node_pointer _LIBCPP_NODEBUG  = typename _NodeTypes::__end_node_pointer;
 
   __end_node_pointer __ptr_;
 
 public:
-  using iterator_category = bidirectional_iterator_tag;
-  using value_type        = __get_node_value_type_t<_Tp>;
-  using difference_type   = _DiffType;
-  using reference         = const value_type&;
-  using pointer           = __rebind_pointer_t<_NodePtr, const value_type>;
+  using iterator_category                    = bidirectional_iterator_tag;
+  using value_type                           = __get_node_value_type_t<_Tp>;
+  using difference_type                      = _DiffType;
+  using reference                            = const value_type&;
+  using pointer                              = __rebind_pointer_t<_NodePtr, const value_type>;
+  using __non_const_iterator _LIBCPP_NODEBUG = __tree_iterator<_Tp, __node_pointer, difference_type>;
 
-  _LIBCPP_HIDE_FROM_ABI __tree_const_iterator() _NOEXCEPT
-#if _LIBCPP_STD_VER >= 14
-      : __ptr_(nullptr)
-#endif
-  {
-  }
+  _LIBCPP_HIDE_FROM_ABI __tree_const_iterator() _NOEXCEPT : __ptr_(nullptr) {}
 
-private:
-  typedef __tree_iterator<_Tp, __node_pointer, difference_type> __non_const_iterator;
-
-public:
   _LIBCPP_HIDE_FROM_ABI __tree_const_iterator(__non_const_iterator __p) _NOEXCEPT : __ptr_(__p.__ptr_) {}
 
-  _LIBCPP_HIDE_FROM_ABI reference operator*() const { return __get_np()->__value_; }
-  _LIBCPP_HIDE_FROM_ABI pointer operator->() const { return pointer_traits<pointer>::pointer_to(__get_np()->__value_); }
+  _LIBCPP_HIDE_FROM_ABI reference operator*() const { return __get_np()->__get_value(); }
+  _LIBCPP_HIDE_FROM_ABI pointer operator->() const {
+    return pointer_traits<pointer>::pointer_to(__get_np()->__get_value());
+  }
 
   _LIBCPP_HIDE_FROM_ABI __tree_const_iterator& operator++() {
     __ptr_ = std::__tree_next_iter<__end_node_pointer>(static_cast<__node_base_pointer>(__ptr_));
@@ -772,18 +849,28 @@ private:
 
   template <class, class, class>
   friend class __tree;
-  template <class, class, class, class>
-  friend class map;
-  template <class, class, class, class>
-  friend class multimap;
-  template <class, class, class>
-  friend class set;
-  template <class, class, class>
-  friend class multiset;
-  template <class>
-  friend class __map_const_iterator;
+
+  template <class _NodeIter, class _Func, class _Proj>
+  friend void __tree_iterate_subrange(_NodeIter, _NodeIter, _Func&, _Proj&);
 };
 
+#ifndef _LIBCPP_CXX03_LANG
+// This also handles {multi,}set::const_iterator, since they're just aliases to __tree::iterator
+template <class _Tp, class _NodePtr, class _DiffType>
+struct __specialized_algorithm<
+    _Algorithm::__for_each,
+    __iterator_pair<__tree_const_iterator<_Tp, _NodePtr, _DiffType>, __tree_const_iterator<_Tp, _NodePtr, _DiffType>>> {
+  static const bool __has_algorithm = true;
+
+  using __iterator _LIBCPP_NODEBUG = __tree_const_iterator<_Tp, _NodePtr, _DiffType>;
+
+  template <class _Func, class _Proj>
+  _LIBCPP_HIDE_FROM_ABI static void operator()(__iterator __first, __iterator __last, _Func& __func, _Proj& __proj) {
+    std::__tree_iterate_subrange(__first, __last, __func, __proj);
+  }
+};
+#endif
+
 template <class _Tp, class _Compare>
 #ifndef _LIBCPP_CXX03_LANG
 _LIBCPP_DIAGNOSE_WARNING(!__is_invocable_v<_Compare const&, _Tp const&, _Tp const&>,
@@ -794,21 +881,20 @@ int __diagnose_non_const_comparator();
 template <class _Tp, class _Compare, class _Allocator>
 class __tree {
 public:
-  using value_type = __get_node_value_type_t<_Tp>;
-  typedef _Compare value_compare;
-  typedef _Allocator allocator_type;
+  using value_type     = __get_node_value_type_t<_Tp>;
+  using value_compare  = _Compare;
+  using allocator_type = _Allocator;
 
 private:
-  typedef allocator_traits<allocator_type> __alloc_traits;
-  using key_type = __get_tree_key_type_t<_Tp>;
+  using __alloc_traits _LIBCPP_NODEBUG = allocator_traits<allocator_type>;
+  using key_type                       = __get_tree_key_type_t<_Tp>;
 
 public:
-  typedef typename __alloc_traits::pointer pointer;
-  typedef typename __alloc_traits::const_pointer const_pointer;
-  typedef typename __alloc_traits::size_type size_type;
-  typedef typename __alloc_traits::difference_type difference_type;
+  using pointer         = typename __alloc_traits::pointer;
+  using const_pointer   = typename __alloc_traits::const_pointer;
+  using size_type       = typename __alloc_traits::size_type;
+  using difference_type = typename __alloc_traits::difference_type;
 
-public:
   using __void_pointer _LIBCPP_NODEBUG = typename __alloc_traits::void_pointer;
 
   using __node _LIBCPP_NODEBUG = __tree_node<_Tp, __void_pointer>;
@@ -821,22 +907,8 @@ public:
   using __end_node_t _LIBCPP_NODEBUG       = __tree_end_node<__node_base_pointer>;
   using __end_node_pointer _LIBCPP_NODEBUG = __rebind_pointer_t<__void_pointer, __end_node_t>;
 
-  using __parent_pointer _LIBCPP_NODEBUG = __end_node_pointer; // TODO: Remove this once the uses in <map> are removed
-
-  typedef __rebind_alloc<__alloc_traits, __node> __node_allocator;
-  typedef allocator_traits<__node_allocator> __node_traits;
-
-// TODO(LLVM 22): Remove this check
-#ifndef _LIBCPP_ABI_TREE_REMOVE_NODE_POINTER_UB
-  static_assert(sizeof(__node_base_pointer) == sizeof(__end_node_pointer) && _LIBCPP_ALIGNOF(__node_base_pointer) ==
-                    _LIBCPP_ALIGNOF(__end_node_pointer),
-                "It looks like you are using std::__tree (an implementation detail for (multi)map/set) with a fancy "
-                "pointer type that thas a different representation depending on whether it points to a __tree base "
-                "pointer or a __tree node pointer (both of which are implementation details of the standard library). "
-                "This means that your ABI is being broken between LLVM 19 and LLVM 20. If you don't care about your "
-                "ABI being broken, define the _LIBCPP_ABI_TREE_REMOVE_NODE_POINTER_UB macro to silence this "
-                "diagnostic.");
-#endif
+  using __node_allocator _LIBCPP_NODEBUG = __rebind_alloc<__alloc_traits, __node>;
+  using __node_traits _LIBCPP_NODEBUG    = allocator_traits<__node_allocator>;
 
 private:
   // check for sane allocator pointer rebinding semantics. Rebinding the
@@ -844,8 +916,8 @@ private:
   // the pointer using 'pointer_traits'.
   static_assert(is_same<__node_pointer, typename __node_traits::pointer>::value,
                 "Allocator does not rebind pointers in a sane manner.");
-  typedef __rebind_alloc<__node_traits, __node_base> __node_base_allocator;
-  typedef allocator_traits<__node_base_allocator> __node_base_traits;
+  using __node_base_allocator _LIBCPP_NODEBUG = __rebind_alloc<__node_traits, __node_base>;
+  using __node_base_traits _LIBCPP_NODEBUG    = allocator_traits<__node_base_allocator>;
   static_assert(is_same<__node_base_pointer, typename __node_base_traits::pointer>::value,
                 "Allocator does not rebind pointers in a sane manner.");
 
@@ -865,17 +937,11 @@ public:
 
 private:
   _LIBCPP_HIDE_FROM_ABI const __node_allocator& __node_alloc() const _NOEXCEPT { return __node_alloc_; }
-  _LIBCPP_HIDE_FROM_ABI __end_node_pointer& __begin_node() _NOEXCEPT { return __begin_node_; }
-  _LIBCPP_HIDE_FROM_ABI const __end_node_pointer& __begin_node() const _NOEXCEPT { return __begin_node_; }
 
 public:
   _LIBCPP_HIDE_FROM_ABI allocator_type __alloc() const _NOEXCEPT { return allocator_type(__node_alloc()); }
 
-private:
-  _LIBCPP_HIDE_FROM_ABI size_type& size() _NOEXCEPT { return __size_; }
-
-public:
-  _LIBCPP_HIDE_FROM_ABI const size_type& size() const _NOEXCEPT { return __size_; }
+  _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __size_; }
   _LIBCPP_HIDE_FROM_ABI value_compare& value_comp() _NOEXCEPT { return __value_comp_; }
   _LIBCPP_HIDE_FROM_ABI const value_compare& value_comp() const _NOEXCEPT { return __value_comp_; }
 
@@ -888,32 +954,61 @@ public:
     return std::addressof(__end_node()->__left_);
   }
 
-  typedef __tree_iterator<_Tp, __node_pointer, difference_type> iterator;
-  typedef __tree_const_iterator<_Tp, __node_pointer, difference_type> const_iterator;
+  using iterator       = __tree_iterator<_Tp, __node_pointer, difference_type>;
+  using const_iterator = __tree_const_iterator<_Tp, __node_pointer, difference_type>;
 
   _LIBCPP_HIDE_FROM_ABI explicit __tree(const value_compare& __comp) _NOEXCEPT_(
-      is_nothrow_default_constructible<__node_allocator>::value&& is_nothrow_copy_constructible<value_compare>::value);
-  _LIBCPP_HIDE_FROM_ABI explicit __tree(const allocator_type& __a);
-  _LIBCPP_HIDE_FROM_ABI __tree(const value_compare& __comp, const allocator_type& __a);
+      is_nothrow_default_constructible<__node_allocator>::value&& is_nothrow_copy_constructible<value_compare>::value)
+      : __size_(0), __value_comp_(__comp) {
+    __begin_node_ = __end_node();
+  }
+
+  _LIBCPP_HIDE_FROM_ABI explicit __tree(const allocator_type& __a)
+      : __begin_node_(), __node_alloc_(__node_allocator(__a)), __size_(0) {
+    __begin_node_ = __end_node();
+  }
+
+  _LIBCPP_HIDE_FROM_ABI __tree(const value_compare& __comp, const allocator_type& __a)
+      : __begin_node_(), __node_alloc_(__node_allocator(__a)), __size_(0), __value_comp_(__comp) {
+    __begin_node_ = __end_node();
+  }
+
   _LIBCPP_HIDE_FROM_ABI __tree(const __tree& __t);
+
+  _LIBCPP_HIDE_FROM_ABI __tree(const __tree& __other, const allocator_type& __alloc)
+      : __begin_node_(__end_node()), __node_alloc_(__alloc), __size_(0), __value_comp_(__other.value_comp()) {
+    if (__other.size() == 0)
+      return;
+
+    *__root_ptr()       = static_cast<__node_base_pointer>(__copy_construct_tree(__other.__root()));
+    __root()->__parent_ = __end_node();
+    __begin_node_       = static_cast<__end_node_pointer>(std::__tree_min(__end_node()->__left_));
+    __size_             = __other.size();
+  }
+
   _LIBCPP_HIDE_FROM_ABI __tree& operator=(const __tree& __t);
   template <class _ForwardIterator>
   _LIBCPP_HIDE_FROM_ABI void __assign_unique(_ForwardIterator __first, _ForwardIterator __last);
-  template <class _InputIterator>
-  _LIBCPP_HIDE_FROM_ABI void __assign_multi(_InputIterator __first, _InputIterator __last);
   _LIBCPP_HIDE_FROM_ABI __tree(__tree&& __t) _NOEXCEPT_(
       is_nothrow_move_constructible<__node_allocator>::value&& is_nothrow_move_constructible<value_compare>::value);
   _LIBCPP_HIDE_FROM_ABI __tree(__tree&& __t, const allocator_type& __a);
+
   _LIBCPP_HIDE_FROM_ABI __tree& operator=(__tree&& __t)
       _NOEXCEPT_(is_nothrow_move_assignable<value_compare>::value &&
                  ((__node_traits::propagate_on_container_move_assignment::value &&
                    is_nothrow_move_assignable<__node_allocator>::value) ||
-                  allocator_traits<__node_allocator>::is_always_equal::value));
+                  allocator_traits<__node_allocator>::is_always_equal::value)) {
+    __move_assign(__t, integral_constant<bool, __node_traits::propagate_on_container_move_assignment::value>());
+    return *this;
+  }
 
-  _LIBCPP_HIDE_FROM_ABI ~__tree();
+  _LIBCPP_HIDE_FROM_ABI ~__tree() {
+    static_assert(is_copy_constructible<value_compare>::value, "Comparator must be copy-constructible.");
+    destroy(__root());
+  }
 
-  _LIBCPP_HIDE_FROM_ABI iterator begin() _NOEXCEPT { return iterator(__begin_node()); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator begin() const _NOEXCEPT { return const_iterator(__begin_node()); }
+  _LIBCPP_HIDE_FROM_ABI iterator begin() _NOEXCEPT { return iterator(__begin_node_); }
+  _LIBCPP_HIDE_FROM_ABI const_iterator begin() const _NOEXCEPT { return const_iterator(__begin_node_); }
   _LIBCPP_HIDE_FROM_ABI iterator end() _NOEXCEPT { return iterator(__end_node()); }
   _LIBCPP_HIDE_FROM_ABI const_iterator end() const _NOEXCEPT { return const_iterator(__end_node()); }
 
@@ -931,116 +1026,151 @@ public:
       _NOEXCEPT_(__is_nothrow_swappable_v<value_compare>);
 #endif
 
-  template <class _Key, class... _Args>
-  _LIBCPP_HIDE_FROM_ABI pair<iterator, bool> __emplace_unique_key_args(_Key const&, _Args&&... __args);
-  template <class _Key, class... _Args>
-  _LIBCPP_HIDE_FROM_ABI pair<iterator, bool> __emplace_hint_unique_key_args(const_iterator, _Key const&, _Args&&...);
-
-  template <class... _Args>
-  _LIBCPP_HIDE_FROM_ABI pair<iterator, bool> __emplace_unique_impl(_Args&&... __args);
-
-  template <class... _Args>
-  _LIBCPP_HIDE_FROM_ABI iterator __emplace_hint_unique_impl(const_iterator __p, _Args&&... __args);
-
   template <class... _Args>
   _LIBCPP_HIDE_FROM_ABI iterator __emplace_multi(_Args&&... __args);
 
   template <class... _Args>
   _LIBCPP_HIDE_FROM_ABI iterator __emplace_hint_multi(const_iterator __p, _Args&&... __args);
 
-  template <class _Pp>
-  _LIBCPP_HIDE_FROM_ABI pair<iterator, bool> __emplace_unique(_Pp&& __x) {
-    return __emplace_unique_extract_key(std::forward<_Pp>(__x), __can_extract_key<_Pp, key_type>());
-  }
-
-  template <class _First,
-            class _Second,
-            __enable_if_t<__can_extract_map_key<_First, key_type, value_type>::value, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI pair<iterator, bool> __emplace_unique(_First&& __f, _Second&& __s) {
-    return __emplace_unique_key_args(__f, std::forward<_First>(__f), std::forward<_Second>(__s));
-  }
-
   template <class... _Args>
   _LIBCPP_HIDE_FROM_ABI pair<iterator, bool> __emplace_unique(_Args&&... __args) {
-    return __emplace_unique_impl(std::forward<_Args>(__args)...);
-  }
-
-  template <class _Pp>
-  _LIBCPP_HIDE_FROM_ABI pair<iterator, bool> __emplace_unique_extract_key(_Pp&& __x, __extract_key_fail_tag) {
-    return __emplace_unique_impl(std::forward<_Pp>(__x));
-  }
-
-  template <class _Pp>
-  _LIBCPP_HIDE_FROM_ABI pair<iterator, bool> __emplace_unique_extract_key(_Pp&& __x, __extract_key_self_tag) {
-    return __emplace_unique_key_args(__x, std::forward<_Pp>(__x));
-  }
-
-  template <class _Pp>
-  _LIBCPP_HIDE_FROM_ABI pair<iterator, bool> __emplace_unique_extract_key(_Pp&& __x, __extract_key_first_tag) {
-    return __emplace_unique_key_args(__x.first, std::forward<_Pp>(__x));
-  }
-
-  template <class _Pp>
-  _LIBCPP_HIDE_FROM_ABI iterator __emplace_hint_unique(const_iterator __p, _Pp&& __x) {
-    return __emplace_hint_unique_extract_key(__p, std::forward<_Pp>(__x), __can_extract_key<_Pp, key_type>());
-  }
-
-  template <class _First,
-            class _Second,
-            __enable_if_t<__can_extract_map_key<_First, key_type, value_type>::value, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI iterator __emplace_hint_unique(const_iterator __p, _First&& __f, _Second&& __s) {
-    return __emplace_hint_unique_key_args(__p, __f, std::forward<_First>(__f), std::forward<_Second>(__s)).first;
+    return std::__try_key_extraction<key_type>(
+        [this](const key_type& __key, _Args&&... __args2) {
+          auto [__parent, __child] = __find_equal(__key);
+          __node_pointer __r       = static_cast<__node_pointer>(__child);
+          bool __inserted          = false;
+          if (__child == nullptr) {
+            __node_holder __h = __construct_node(std::forward<_Args>(__args2)...);
+            __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__h.get()));
+            __r        = __h.release();
+            __inserted = true;
+          }
+          return pair<iterator, bool>(iterator(__r), __inserted);
+        },
+        [this](_Args&&... __args2) {
+          __node_holder __h        = __construct_node(std::forward<_Args>(__args2)...);
+          auto [__parent, __child] = __find_equal(__h->__get_value());
+          __node_pointer __r       = static_cast<__node_pointer>(__child);
+          bool __inserted          = false;
+          if (__child == nullptr) {
+            __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__h.get()));
+            __r        = __h.release();
+            __inserted = true;
+          }
+          return pair<iterator, bool>(iterator(__r), __inserted);
+        },
+        std::forward<_Args>(__args)...);
   }
 
   template <class... _Args>
-  _LIBCPP_HIDE_FROM_ABI iterator __emplace_hint_unique(const_iterator __p, _Args&&... __args) {
-    return __emplace_hint_unique_impl(__p, std::forward<_Args>(__args)...);
+  _LIBCPP_HIDE_FROM_ABI pair<iterator, bool> __emplace_hint_unique(const_iterator __p, _Args&&... __args) {
+    return std::__try_key_extraction<key_type>(
+        [this, __p](const key_type& __key, _Args&&... __args2) {
+          __node_base_pointer __dummy;
+          auto [__parent, __child] = __find_equal(__p, __dummy, __key);
+          __node_pointer __r       = static_cast<__node_pointer>(__child);
+          bool __inserted          = false;
+          if (__child == nullptr) {
+            __node_holder __h = __construct_node(std::forward<_Args>(__args2)...);
+            __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__h.get()));
+            __r        = __h.release();
+            __inserted = true;
+          }
+          return pair<iterator, bool>(iterator(__r), __inserted);
+        },
+        [this, __p](_Args&&... __args2) {
+          __node_holder __h = __construct_node(std::forward<_Args>(__args2)...);
+          __node_base_pointer __dummy;
+          auto [__parent, __child] = __find_equal(__p, __dummy, __h->__get_value());
+          __node_pointer __r       = static_cast<__node_pointer>(__child);
+          if (__child == nullptr) {
+            __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__h.get()));
+            __r = __h.release();
+          }
+          return pair<iterator, bool>(iterator(__r), __child == nullptr);
+        },
+        std::forward<_Args>(__args)...);
   }
 
-  template <class _Pp>
-  _LIBCPP_HIDE_FROM_ABI iterator
-  __emplace_hint_unique_extract_key(const_iterator __p, _Pp&& __x, __extract_key_fail_tag) {
-    return __emplace_hint_unique_impl(__p, std::forward<_Pp>(__x));
+  template <class _InIter, class _Sent>
+  _LIBCPP_HIDE_FROM_ABI void __insert_range_multi(_InIter __first, _Sent __last) {
+    if (__first == __last)
+      return;
+
+    if (__root() == nullptr) { // Make sure we always have a root node
+      __insert_node_at(
+          __end_node(), __end_node()->__left_, static_cast<__node_base_pointer>(__construct_node(*__first).release()));
+      ++__first;
+    }
+
+    auto __max_node = static_cast<__node_pointer>(std::__tree_max(static_cast<__node_base_pointer>(__root())));
+
+    for (; __first != __last; ++__first) {
+      __node_holder __nd = __construct_node(*__first);
+      // Always check the max node first. This optimizes for sorted ranges inserted at the end.
+      if (!value_comp()(__nd->__get_value(), __max_node->__get_value())) { // __node >= __max_val
+        __insert_node_at(static_cast<__end_node_pointer>(__max_node),
+                         __max_node->__right_,
+                         static_cast<__node_base_pointer>(__nd.get()));
+        __max_node = __nd.release();
+      } else {
+        __end_node_pointer __parent;
+        __node_base_pointer& __child = __find_leaf_high(__parent, __nd->__get_value());
+        __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__nd.release()));
+      }
+    }
   }
 
-  template <class _Pp>
-  _LIBCPP_HIDE_FROM_ABI iterator
-  __emplace_hint_unique_extract_key(const_iterator __p, _Pp&& __x, __extract_key_self_tag) {
-    return __emplace_hint_unique_key_args(__p, __x, std::forward<_Pp>(__x)).first;
+  template <class _InIter, class _Sent>
+  _LIBCPP_HIDE_FROM_ABI void __insert_range_unique(_InIter __first, _Sent __last) {
+    if (__first == __last)
+      return;
+
+    if (__root() == nullptr) {
+      __insert_node_at(
+          __end_node(), __end_node()->__left_, static_cast<__node_base_pointer>(__construct_node(*__first).release()));
+      ++__first;
+    }
+
+    auto __max_node = static_cast<__node_pointer>(std::__tree_max(static_cast<__node_base_pointer>(__root())));
+
+    using __reference = decltype(*__first);
+
+    for (; __first != __last; ++__first) {
+      std::__try_key_extraction<key_type>(
+          [this, &__max_node](const key_type& __key, __reference&& __val) {
+            if (value_comp()(__max_node->__get_value(), __key)) { // __key > __max_node
+              __node_holder __nd = __construct_node(std::forward<__reference>(__val));
+              __insert_node_at(static_cast<__end_node_pointer>(__max_node),
+                               __max_node->__right_,
+                               static_cast<__node_base_pointer>(__nd.get()));
+              __max_node = __nd.release();
+            } else {
+              auto [__parent, __child] = __find_equal(__key);
+              if (__child == nullptr) {
+                __node_holder __nd = __construct_node(std::forward<__reference>(__val));
+                __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__nd.release()));
+              }
+            }
+          },
+          [this, &__max_node](__reference&& __val) {
+            __node_holder __nd = __construct_node(std::forward<__reference>(__val));
+            if (value_comp()(__max_node->__get_value(), __nd->__get_value())) { // __node > __max_node
+              __insert_node_at(static_cast<__end_node_pointer>(__max_node),
+                               __max_node->__right_,
+                               static_cast<__node_base_pointer>(__nd.get()));
+              __max_node = __nd.release();
+            } else {
+              auto [__parent, __child] = __find_equal(__nd->__get_value());
+              if (__child == nullptr) {
+                __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__nd.release()));
+              }
+            }
+          },
+          *__first);
+    }
   }
 
-  template <class _Pp>
-  _LIBCPP_HIDE_FROM_ABI iterator
-  __emplace_hint_unique_extract_key(const_iterator __p, _Pp&& __x, __extract_key_first_tag) {
-    return __emplace_hint_unique_key_args(__p, __x.first, std::forward<_Pp>(__x)).first;
-  }
-
-  template <class _ValueT = _Tp, __enable_if_t<__is_tree_value_type<_ValueT>::value, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI void
-  __insert_unique_from_orphaned_node(const_iterator __p, __get_node_value_type_t<_Tp>&& __value) {
-    __emplace_hint_unique(__p, const_cast<key_type&&>(__value.first), std::move(__value.second));
-  }
-
-  template <class _ValueT = _Tp, __enable_if_t<!__is_tree_value_type<_ValueT>::value, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI void __insert_unique_from_orphaned_node(const_iterator __p, _Tp&& __value) {
-    __emplace_hint_unique(__p, std::move(__value));
-  }
-
-  template <class _ValueT = _Tp, __enable_if_t<__is_tree_value_type<_ValueT>::value, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI void __insert_multi_from_orphaned_node(const_iterator __p, value_type&& __value) {
-    __emplace_hint_multi(__p, const_cast<key_type&&>(__value.first), std::move(__value.second));
-  }
-
-  template <class _ValueT = _Tp, __enable_if_t<!__is_tree_value_type<_ValueT>::value, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI void __insert_multi_from_orphaned_node(const_iterator __p, _Tp&& __value) {
-    __emplace_hint_multi(__p, std::move(__value));
-  }
-
-  _LIBCPP_HIDE_FROM_ABI pair<iterator, bool> __node_assign_unique(const value_type& __v, __node_pointer __dest);
-
-  _LIBCPP_HIDE_FROM_ABI iterator __node_insert_multi(__node_pointer __nd);
-  _LIBCPP_HIDE_FROM_ABI iterator __node_insert_multi(const_iterator __p, __node_pointer __nd);
-
   _LIBCPP_HIDE_FROM_ABI iterator __remove_node_pointer(__node_pointer) _NOEXCEPT;
 
 #if _LIBCPP_STD_VER >= 17
@@ -1048,15 +1178,15 @@ public:
   _LIBCPP_HIDE_FROM_ABI _InsertReturnType __node_handle_insert_unique(_NodeHandle&&);
   template <class _NodeHandle>
   _LIBCPP_HIDE_FROM_ABI iterator __node_handle_insert_unique(const_iterator, _NodeHandle&&);
-  template <class _Tree>
-  _LIBCPP_HIDE_FROM_ABI void __node_handle_merge_unique(_Tree& __source);
+  template <class _Comp2>
+  _LIBCPP_HIDE_FROM_ABI void __node_handle_merge_unique(__tree<_Tp, _Comp2, _Allocator>& __source);
 
   template <class _NodeHandle>
   _LIBCPP_HIDE_FROM_ABI iterator __node_handle_insert_multi(_NodeHandle&&);
   template <class _NodeHandle>
   _LIBCPP_HIDE_FROM_ABI iterator __node_handle_insert_multi(const_iterator, _NodeHandle&&);
-  template <class _Tree>
-  _LIBCPP_HIDE_FROM_ABI void __node_handle_merge_multi(_Tree& __source);
+  template <class _Comp2>
+  _LIBCPP_HIDE_FROM_ABI void __node_handle_merge_multi(__tree<_Tp, _Comp2, _Allocator>& __source);
 
   template <class _NodeHandle>
   _LIBCPP_HIDE_FROM_ABI _NodeHandle __node_handle_extract(key_type const&);
@@ -1075,41 +1205,157 @@ public:
   __insert_node_at(__end_node_pointer __parent, __node_base_pointer& __child, __node_base_pointer __new_node) _NOEXCEPT;
 
   template <class _Key>
-  _LIBCPP_HIDE_FROM_ABI iterator find(const _Key& __v);
+  _LIBCPP_HIDE_FROM_ABI iterator find(const _Key& __key) {
+    auto [__, __match] = __find_equal(__key);
+    if (__match == nullptr)
+      return end();
+    return iterator(static_cast<__node_pointer>(__match));
+  }
+
   template <class _Key>
-  _LIBCPP_HIDE_FROM_ABI const_iterator find(const _Key& __v) const;
+  _LIBCPP_HIDE_FROM_ABI const_iterator find(const _Key& __key) const {
+    auto [__, __match] = __find_equal(__key);
+    if (__match == nullptr)
+      return end();
+    return const_iterator(static_cast<__node_pointer>(__match));
+  }
 
   template <class _Key>
   _LIBCPP_HIDE_FROM_ABI size_type __count_unique(const _Key& __k) const;
   template <class _Key>
   _LIBCPP_HIDE_FROM_ABI size_type __count_multi(const _Key& __k) const;
 
-  template <class _Key>
-  _LIBCPP_HIDE_FROM_ABI iterator lower_bound(const _Key& __v) {
-    return __lower_bound(__v, __root(), __end_node());
+  template <bool _LowerBound, class _Key>
+  _LIBCPP_HIDE_FROM_ABI __end_node_pointer __lower_upper_bound_unique_impl(const _Key& __v) const {
+    auto __rt     = __root();
+    auto __result = __end_node();
+    auto __comp   = __lazy_synth_three_way_comparator<_Compare, _Key, value_type>(value_comp());
+    while (__rt != nullptr) {
+      auto __comp_res = __comp(__v, __rt->__get_value());
+
+      if (__comp_res.__less()) {
+        __result = static_cast<__end_node_pointer>(__rt);
+        __rt     = static_cast<__node_pointer>(__rt->__left_);
+      } else if (__comp_res.__greater()) {
+        __rt = static_cast<__node_pointer>(__rt->__right_);
+      } else if _LIBCPP_CONSTEXPR (_LowerBound) {
+        return static_cast<__end_node_pointer>(__rt);
+      } else {
+        return __rt->__right_ ? static_cast<__end_node_pointer>(std::__tree_min(__rt->__right_)) : __result;
+      }
+    }
+    return __result;
   }
+
+  // Compatibility escape hatch for comparators that are not strict weak orderings. This
+  // can be removed for the LLVM 23 release.
+#if defined(_LIBCPP_ENABLE_LEGACY_TREE_LOWER_UPPER_BOUND)
   template <class _Key>
-  _LIBCPP_HIDE_FROM_ABI iterator __lower_bound(const _Key& __v, __node_pointer __root, __end_node_pointer __result);
-  template <class _Key>
-  _LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const _Key& __v) const {
-    return __lower_bound(__v, __root(), __end_node());
+  _LIBCPP_HIDE_FROM_ABI __end_node_pointer __lower_bound_unique_compat_impl(const _Key& __v) const {
+    auto __rt     = __root();
+    auto __result = __end_node();
+    while (__rt != nullptr) {
+      if (!value_comp()(__rt->__get_value(), __v)) {
+        __result = std::__static_fancy_pointer_cast<__end_node_pointer>(__rt);
+        __rt     = std::__static_fancy_pointer_cast<__node_pointer>(__rt->__left_);
+      } else {
+        __rt = std::__static_fancy_pointer_cast<__node_pointer>(__rt->__right_);
+      }
+    }
+    return __result;
   }
+
+  template <class _Key>
+  _LIBCPP_HIDE_FROM_ABI __end_node_pointer __upper_bound_unique_compat_impl(const _Key& __v) const {
+    auto __rt     = __root();
+    auto __result = __end_node();
+    while (__rt != nullptr) {
+      if (value_comp()(__v, __rt->__get_value())) {
+        __result = std::__static_fancy_pointer_cast<__end_node_pointer>(__rt);
+        __rt     = std::__static_fancy_pointer_cast<__node_pointer>(__rt->__left_);
+      } else {
+        __rt = std::__static_fancy_pointer_cast<__node_pointer>(__rt->__right_);
+      }
+    }
+    return __result;
+  }
+#endif // _LIBCPP_ENABLE_LEGACY_TREE_LOWER_UPPER_BOUND
+
+  template <class _Key>
+  _LIBCPP_HIDE_FROM_ABI iterator __lower_bound_unique(const _Key& __v) {
+#if defined(_LIBCPP_ENABLE_LEGACY_TREE_LOWER_UPPER_BOUND)
+    return iterator(__lower_bound_unique_compat_impl(__v));
+#else
+    return iterator(__lower_upper_bound_unique_impl<true>(__v));
+#endif
+  }
+
+  template <class _Key>
+  _LIBCPP_HIDE_FROM_ABI const_iterator __lower_bound_unique(const _Key& __v) const {
+#if defined(_LIBCPP_ENABLE_LEGACY_TREE_LOWER_UPPER_BOUND)
+    return const_iterator(__lower_bound_unique_compat_impl(__v));
+#else
+    return const_iterator(__lower_upper_bound_unique_impl<true>(__v));
+#endif
+  }
+
+  template <class _Key>
+  _LIBCPP_HIDE_FROM_ABI iterator __upper_bound_unique(const _Key& __v) {
+#if defined(_LIBCPP_ENABLE_LEGACY_TREE_LOWER_UPPER_BOUND)
+    return iterator(__upper_bound_unique_compat_impl(__v));
+#else
+    return iterator(__lower_upper_bound_unique_impl<false>(__v));
+#endif
+  }
+
+  template <class _Key>
+  _LIBCPP_HIDE_FROM_ABI const_iterator __upper_bound_unique(const _Key& __v) const {
+#if defined(_LIBCPP_ENABLE_LEGACY_TREE_LOWER_UPPER_BOUND)
+    return iterator(__upper_bound_unique_compat_impl(__v));
+#else
+    return iterator(__lower_upper_bound_unique_impl<false>(__v));
+#endif
+  }
+
+private:
+  template <class _Key>
+  _LIBCPP_HIDE_FROM_ABI iterator
+  __lower_bound_multi(const _Key& __v, __node_pointer __root, __end_node_pointer __result);
+
   template <class _Key>
   _LIBCPP_HIDE_FROM_ABI const_iterator
-  __lower_bound(const _Key& __v, __node_pointer __root, __end_node_pointer __result) const;
+  __lower_bound_multi(const _Key& __v, __node_pointer __root, __end_node_pointer __result) const;
+
+public:
   template <class _Key>
-  _LIBCPP_HIDE_FROM_ABI iterator upper_bound(const _Key& __v) {
-    return __upper_bound(__v, __root(), __end_node());
+  _LIBCPP_HIDE_FROM_ABI iterator __lower_bound_multi(const _Key& __v) {
+    return __lower_bound_multi(__v, __root(), __end_node());
   }
   template <class _Key>
-  _LIBCPP_HIDE_FROM_ABI iterator __upper_bound(const _Key& __v, __node_pointer __root, __end_node_pointer __result);
-  template <class _Key>
-  _LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const _Key& __v) const {
-    return __upper_bound(__v, __root(), __end_node());
+  _LIBCPP_HIDE_FROM_ABI const_iterator __lower_bound_multi(const _Key& __v) const {
+    return __lower_bound_multi(__v, __root(), __end_node());
   }
+
+  template <class _Key>
+  _LIBCPP_HIDE_FROM_ABI iterator __upper_bound_multi(const _Key& __v) {
+    return __upper_bound_multi(__v, __root(), __end_node());
+  }
+
+  template <class _Key>
+  _LIBCPP_HIDE_FROM_ABI const_iterator __upper_bound_multi(const _Key& __v) const {
+    return __upper_bound_multi(__v, __root(), __end_node());
+  }
+
+private:
+  template <class _Key>
+  _LIBCPP_HIDE_FROM_ABI iterator
+  __upper_bound_multi(const _Key& __v, __node_pointer __root, __end_node_pointer __result);
+
   template <class _Key>
   _LIBCPP_HIDE_FROM_ABI const_iterator
-  __upper_bound(const _Key& __v, __node_pointer __root, __end_node_pointer __result) const;
+  __upper_bound_multi(const _Key& __v, __node_pointer __root, __end_node_pointer __result) const;
+
+public:
   template <class _Key>
   _LIBCPP_HIDE_FROM_ABI pair<iterator, iterator> __equal_range_unique(const _Key& __k);
   template <class _Key>
@@ -1120,22 +1366,24 @@ public:
   template <class _Key>
   _LIBCPP_HIDE_FROM_ABI pair<const_iterator, const_iterator> __equal_range_multi(const _Key& __k) const;
 
-  typedef __tree_node_destructor<__node_allocator> _Dp;
-  typedef unique_ptr<__node, _Dp> __node_holder;
+  using _Dp _LIBCPP_NODEBUG           = __tree_node_destructor<__node_allocator>;
+  using __node_holder _LIBCPP_NODEBUG = unique_ptr<__node, _Dp>;
 
   _LIBCPP_HIDE_FROM_ABI __node_holder remove(const_iterator __p) _NOEXCEPT;
 
   // FIXME: Make this function const qualified. Unfortunately doing so
   // breaks existing code which uses non-const callable comparators.
   template <class _Key>
-  _LIBCPP_HIDE_FROM_ABI __node_base_pointer& __find_equal(__end_node_pointer& __parent, const _Key& __v);
+  _LIBCPP_HIDE_FROM_ABI pair<__end_node_pointer, __node_base_pointer&> __find_equal(const _Key& __v);
+
   template <class _Key>
-  _LIBCPP_HIDE_FROM_ABI __node_base_pointer& __find_equal(__end_node_pointer& __parent, const _Key& __v) const {
-    return const_cast<__tree*>(this)->__find_equal(__parent, __v);
+  _LIBCPP_HIDE_FROM_ABI pair<__end_node_pointer, __node_base_pointer&> __find_equal(const _Key& __v) const {
+    return const_cast<__tree*>(this)->__find_equal(__v);
   }
+
   template <class _Key>
-  _LIBCPP_HIDE_FROM_ABI __node_base_pointer&
-  __find_equal(const_iterator __hint, __end_node_pointer& __parent, __node_base_pointer& __dummy, const _Key& __v);
+  _LIBCPP_HIDE_FROM_ABI pair<__end_node_pointer, __node_base_pointer&>
+  __find_equal(const_iterator __hint, __node_base_pointer& __dummy, const _Key& __v);
 
   _LIBCPP_HIDE_FROM_ABI void __copy_assign_alloc(const __tree& __t) {
     __copy_assign_alloc(__t, integral_constant<bool, __node_traits::propagate_on_container_copy_assignment::value>());
@@ -1160,7 +1408,7 @@ private:
   _LIBCPP_HIDE_FROM_ABI __node_holder __construct_node(_Args&&... __args);
 
   // TODO: Make this _LIBCPP_HIDE_FROM_ABI
-  _LIBCPP_HIDDEN void destroy(__node_pointer __nd) _NOEXCEPT;
+  _LIBCPP_HIDDEN void destroy(__node_pointer __nd) _NOEXCEPT { (__tree_deleter(__node_alloc_))(__nd); }
 
   _LIBCPP_HIDE_FROM_ABI void __move_assign(__tree& __t, false_type);
   _LIBCPP_HIDE_FROM_ABI void __move_assign(__tree& __t, true_type) _NOEXCEPT_(
@@ -1178,7 +1426,7 @@ private:
   }
   _LIBCPP_HIDE_FROM_ABI void __move_assign_alloc(__tree&, false_type) _NOEXCEPT {}
 
-  template <class _From, class _ValueT = _Tp, __enable_if_t<__is_tree_value_type<_ValueT>::value, int> = 0>
+  template <class _From, class _ValueT = _Tp, __enable_if_t<__is_tree_value_type_v<_ValueT>, int> = 0>
   _LIBCPP_HIDE_FROM_ABI static void __assign_value(__get_node_value_type_t<value_type>& __lhs, _From&& __rhs) {
     using __key_type = __remove_const_t<typename value_type::first_type>;
 
@@ -1188,166 +1436,203 @@ private:
     __lhs.second                         = std::forward<_From>(__rhs).second;
   }
 
-  template <class _To, class _From, class _ValueT = _Tp, __enable_if_t<!__is_tree_value_type<_ValueT>::value, int> = 0>
+  template <class _To, class _From, class _ValueT = _Tp, __enable_if_t<!__is_tree_value_type_v<_ValueT>, int> = 0>
   _LIBCPP_HIDE_FROM_ABI static void __assign_value(_To& __lhs, _From&& __rhs) {
     __lhs = std::forward<_From>(__rhs);
   }
 
-  struct _DetachedTreeCache {
-    _LIBCPP_HIDE_FROM_ABI explicit _DetachedTreeCache(__tree* __t) _NOEXCEPT
-        : __t_(__t),
-          __cache_root_(__detach_from_tree(__t)) {
-      __advance();
+  class __tree_deleter {
+    __node_allocator& __alloc_;
+
+  public:
+    using pointer = __node_pointer;
+
+    _LIBCPP_HIDE_FROM_ABI __tree_deleter(__node_allocator& __alloc) : __alloc_(__alloc) {}
+
+#ifdef _LIBCPP_COMPILER_CLANG_BASED // FIXME: GCC complains about not being able to always_inline a recursive function
+    _LIBCPP_HIDE_FROM_ABI
+#endif
+    void
+    operator()(__node_pointer __ptr) {
+      if (!__ptr)
+        return;
+
+      (*this)(static_cast<__node_pointer>(__ptr->__left_));
+
+      auto __right = __ptr->__right_;
+
+      __node_traits::destroy(__alloc_, std::addressof(__ptr->__get_value()));
+      __node_traits::deallocate(__alloc_, __ptr, 1);
+
+      (*this)(static_cast<__node_pointer>(__right));
     }
-
-    _LIBCPP_HIDE_FROM_ABI __node_pointer __get() const _NOEXCEPT { return __cache_elem_; }
-
-    _LIBCPP_HIDE_FROM_ABI void __advance() _NOEXCEPT {
-      __cache_elem_ = __cache_root_;
-      if (__cache_root_) {
-        __cache_root_ = __detach_next(__cache_root_);
-      }
-    }
-
-    _LIBCPP_HIDE_FROM_ABI ~_DetachedTreeCache() {
-      __t_->destroy(__cache_elem_);
-      if (__cache_root_) {
-        while (__cache_root_->__parent_ != nullptr)
-          __cache_root_ = static_cast<__node_pointer>(__cache_root_->__parent_);
-        __t_->destroy(__cache_root_);
-      }
-    }
-
-    _DetachedTreeCache(_DetachedTreeCache const&)            = delete;
-    _DetachedTreeCache& operator=(_DetachedTreeCache const&) = delete;
-
-  private:
-    _LIBCPP_HIDE_FROM_ABI static __node_pointer __detach_from_tree(__tree* __t) _NOEXCEPT;
-    _LIBCPP_HIDE_FROM_ABI static __node_pointer __detach_next(__node_pointer) _NOEXCEPT;
-
-    __tree* __t_;
-    __node_pointer __cache_root_;
-    __node_pointer __cache_elem_;
   };
+
+  // This copy construction will always produce a correct red-black-tree assuming the incoming tree is correct, since we
+  // copy the exact structure 1:1. Since this is for copy construction _only_ we know that we get a correct tree. If we
+  // didn't get a correct tree, the invariants of __tree are broken and we have a much bigger problem than an improperly
+  // balanced tree.
+  template <class _NodeConstructor>
+#ifdef _LIBCPP_COMPILER_CLANG_BASED // FIXME: GCC complains about not being able to always_inline a recursive function
+  _LIBCPP_HIDE_FROM_ABI
+#endif
+  __node_pointer __construct_from_tree(__node_pointer __src, _NodeConstructor __construct) {
+    if (!__src)
+      return nullptr;
+
+    __node_holder __new_node = __construct(__src->__get_value());
+
+    unique_ptr<__node, __tree_deleter> __left(
+        __construct_from_tree(static_cast<__node_pointer>(__src->__left_), __construct), __node_alloc_);
+    __node_pointer __right = __construct_from_tree(static_cast<__node_pointer>(__src->__right_), __construct);
+
+    __node_pointer __new_node_ptr = __new_node.release();
+
+    __new_node_ptr->__is_black_ = __src->__is_black_;
+    __new_node_ptr->__left_     = static_cast<__node_base_pointer>(__left.release());
+    __new_node_ptr->__right_    = static_cast<__node_base_pointer>(__right);
+    if (__new_node_ptr->__left_)
+      __new_node_ptr->__left_->__parent_ = static_cast<__end_node_pointer>(__new_node_ptr);
+    if (__new_node_ptr->__right_)
+      __new_node_ptr->__right_->__parent_ = static_cast<__end_node_pointer>(__new_node_ptr);
+    return __new_node_ptr;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI __node_pointer __copy_construct_tree(__node_pointer __src) {
+    return __construct_from_tree(__src, [this](const value_type& __val) { return __construct_node(__val); });
+  }
+
+  template <class _ValueT = _Tp, __enable_if_t<__is_tree_value_type_v<_ValueT>, int> = 0>
+  _LIBCPP_HIDE_FROM_ABI __node_pointer __move_construct_tree(__node_pointer __src) {
+    return __construct_from_tree(__src, [this](value_type& __val) {
+      return __construct_node(const_cast<key_type&&>(__val.first), std::move(__val.second));
+    });
+  }
+
+  template <class _ValueT = _Tp, __enable_if_t<!__is_tree_value_type_v<_ValueT>, int> = 0>
+  _LIBCPP_HIDE_FROM_ABI __node_pointer __move_construct_tree(__node_pointer __src) {
+    return __construct_from_tree(__src, [this](value_type& __val) { return __construct_node(std::move(__val)); });
+  }
+
+  template <class _Assignment, class _ConstructionAlg>
+  // This copy assignment will always produce a correct red-black-tree assuming the incoming tree is correct, since our
+  // own tree is a red-black-tree and the incoming tree is a red-black-tree. The invariants of a red-black-tree are
+  // temporarily not met until all of the incoming red-black tree is copied.
+#ifdef _LIBCPP_COMPILER_CLANG_BASED // FIXME: GCC complains about not being able to always_inline a recursive function
+  _LIBCPP_HIDE_FROM_ABI
+#endif
+  __node_pointer __assign_from_tree(
+      __node_pointer __dest, __node_pointer __src, _Assignment __assign, _ConstructionAlg __construct_subtree) {
+    if (!__src) {
+      destroy(__dest);
+      return nullptr;
+    }
+
+    __assign(__dest->__get_value(), __src->__get_value());
+    __dest->__is_black_ = __src->__is_black_;
+
+    // If we already have a left node in the destination tree, reuse it and copy-assign recursively
+    if (__dest->__left_) {
+      __dest->__left_ = static_cast<__node_base_pointer>(__assign_from_tree(
+          static_cast<__node_pointer>(__dest->__left_),
+          static_cast<__node_pointer>(__src->__left_),
+          __assign,
+          __construct_subtree));
+
+      // Otherwise, we must create new nodes; copy-construct from here on
+    } else if (__src->__left_) {
+      auto __new_left       = __construct_subtree(static_cast<__node_pointer>(__src->__left_));
+      __dest->__left_       = static_cast<__node_base_pointer>(__new_left);
+      __new_left->__parent_ = static_cast<__end_node_pointer>(__dest);
+    }
+
+    // Identical to the left case above, just for the right nodes
+    if (__dest->__right_) {
+      __dest->__right_ = static_cast<__node_base_pointer>(__assign_from_tree(
+          static_cast<__node_pointer>(__dest->__right_),
+          static_cast<__node_pointer>(__src->__right_),
+          __assign,
+          __construct_subtree));
+    } else if (__src->__right_) {
+      auto __new_right       = __construct_subtree(static_cast<__node_pointer>(__src->__right_));
+      __dest->__right_       = static_cast<__node_base_pointer>(__new_right);
+      __new_right->__parent_ = static_cast<__end_node_pointer>(__dest);
+    }
+
+    return __dest;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI __node_pointer __copy_assign_tree(__node_pointer __dest, __node_pointer __src) {
+    return __assign_from_tree(
+        __dest,
+        __src,
+        [](value_type& __lhs, const value_type& __rhs) { __assign_value(__lhs, __rhs); },
+        [this](__node_pointer __nd) { return __copy_construct_tree(__nd); });
+  }
+
+  _LIBCPP_HIDE_FROM_ABI __node_pointer __move_assign_tree(__node_pointer __dest, __node_pointer __src) {
+    return __assign_from_tree(
+        __dest,
+        __src,
+        [](value_type& __lhs, value_type& __rhs) { __assign_value(__lhs, std::move(__rhs)); },
+        [this](__node_pointer __nd) { return __move_construct_tree(__nd); });
+  }
+
+  friend struct __specialized_algorithm<_Algorithm::__for_each, __single_range<__tree> >;
 };
 
+#if _LIBCPP_STD_VER >= 14
 template <class _Tp, class _Compare, class _Allocator>
-__tree<_Tp, _Compare, _Allocator>::__tree(const value_compare& __comp) _NOEXCEPT_(
-    is_nothrow_default_constructible<__node_allocator>::value&& is_nothrow_copy_constructible<value_compare>::value)
-    : __size_(0), __value_comp_(__comp) {
-  __begin_node() = __end_node();
-}
+struct __specialized_algorithm<_Algorithm::__for_each, __single_range<__tree<_Tp, _Compare, _Allocator> > > {
+  static const bool __has_algorithm = true;
 
-template <class _Tp, class _Compare, class _Allocator>
-__tree<_Tp, _Compare, _Allocator>::__tree(const allocator_type& __a)
-    : __begin_node_(), __node_alloc_(__node_allocator(__a)), __size_(0) {
-  __begin_node() = __end_node();
-}
+  using __node_pointer _LIBCPP_NODEBUG = typename __tree<_Tp, _Compare, _Allocator>::__node_pointer;
 
-template <class _Tp, class _Compare, class _Allocator>
-__tree<_Tp, _Compare, _Allocator>::__tree(const value_compare& __comp, const allocator_type& __a)
-    : __begin_node_(), __node_alloc_(__node_allocator(__a)), __size_(0), __value_comp_(__comp) {
-  __begin_node() = __end_node();
-}
-
-// Precondition:  size() != 0
-template <class _Tp, class _Compare, class _Allocator>
-typename __tree<_Tp, _Compare, _Allocator>::__node_pointer
-__tree<_Tp, _Compare, _Allocator>::_DetachedTreeCache::__detach_from_tree(__tree* __t) _NOEXCEPT {
-  __node_pointer __cache                = static_cast<__node_pointer>(__t->__begin_node());
-  __t->__begin_node()                   = __t->__end_node();
-  __t->__end_node()->__left_->__parent_ = nullptr;
-  __t->__end_node()->__left_            = nullptr;
-  __t->size()                           = 0;
-  // __cache->__left_ == nullptr
-  if (__cache->__right_ != nullptr)
-    __cache = static_cast<__node_pointer>(__cache->__right_);
-  // __cache->__left_ == nullptr
-  // __cache->__right_ == nullptr
-  return __cache;
-}
-
-// Precondition:  __cache != nullptr
-//    __cache->left_ == nullptr
-//    __cache->right_ == nullptr
-//    This is no longer a red-black tree
-template <class _Tp, class _Compare, class _Allocator>
-typename __tree<_Tp, _Compare, _Allocator>::__node_pointer
-__tree<_Tp, _Compare, _Allocator>::_DetachedTreeCache::__detach_next(__node_pointer __cache) _NOEXCEPT {
-  if (__cache->__parent_ == nullptr)
-    return nullptr;
-  if (std::__tree_is_left_child(static_cast<__node_base_pointer>(__cache))) {
-    __cache->__parent_->__left_ = nullptr;
-    __cache                     = static_cast<__node_pointer>(__cache->__parent_);
-    if (__cache->__right_ == nullptr)
-      return __cache;
-    return static_cast<__node_pointer>(std::__tree_leaf(__cache->__right_));
+  template <class _Tree, class _Func, class _Proj>
+  _LIBCPP_HIDE_FROM_ABI static auto operator()(_Tree&& __range, _Func __func, _Proj __proj) {
+    if (__range.size() != 0)
+      std::__tree_iterate_from_root<__copy_cvref_t<_Tree, typename __remove_cvref_t<_Tree>::value_type>>(
+          [](__node_pointer) { return false; }, __range.__root(), __func, __proj);
+    return std::make_pair(__range.end(), std::move(__func));
   }
-  // __cache is right child
-  __cache->__parent_unsafe()->__right_ = nullptr;
-  __cache                              = static_cast<__node_pointer>(__cache->__parent_);
-  if (__cache->__left_ == nullptr)
-    return __cache;
-  return static_cast<__node_pointer>(std::__tree_leaf(__cache->__left_));
-}
+};
+#endif
 
 template <class _Tp, class _Compare, class _Allocator>
 __tree<_Tp, _Compare, _Allocator>& __tree<_Tp, _Compare, _Allocator>::operator=(const __tree& __t) {
-  if (this != std::addressof(__t)) {
-    value_comp() = __t.value_comp();
-    __copy_assign_alloc(__t);
-    __assign_multi(__t.begin(), __t.end());
+  if (this == std::addressof(__t))
+    return *this;
+
+  value_comp() = __t.value_comp();
+  __copy_assign_alloc(__t);
+
+  if (__size_ != 0) {
+    *__root_ptr() = static_cast<__node_base_pointer>(__copy_assign_tree(__root(), __t.__root()));
+  } else {
+    *__root_ptr() = static_cast<__node_base_pointer>(__copy_construct_tree(__t.__root()));
+    if (__root())
+      __root()->__parent_ = __end_node();
   }
+  __begin_node_ =
+      __end_node()->__left_ ? static_cast<__end_node_pointer>(std::__tree_min(__end_node()->__left_)) : __end_node();
+  __size_ = __t.size();
+
   return *this;
 }
 
-template <class _Tp, class _Compare, class _Allocator>
-template <class _ForwardIterator>
-void __tree<_Tp, _Compare, _Allocator>::__assign_unique(_ForwardIterator __first, _ForwardIterator __last) {
-  typedef iterator_traits<_ForwardIterator> _ITraits;
-  typedef typename _ITraits::value_type _ItValueType;
-  static_assert(
-      is_same<_ItValueType, value_type>::value, "__assign_unique may only be called with the containers value type");
-  static_assert(
-      __has_forward_iterator_category<_ForwardIterator>::value, "__assign_unique requires a forward iterator");
-  if (size() != 0) {
-    _DetachedTreeCache __cache(this);
-    for (; __cache.__get() != nullptr && __first != __last; ++__first) {
-      if (__node_assign_unique(*__first, __cache.__get()).second)
-        __cache.__advance();
-    }
-  }
-  for (; __first != __last; ++__first)
-    __emplace_unique(*__first);
-}
-
-template <class _Tp, class _Compare, class _Allocator>
-template <class _InputIterator>
-void __tree<_Tp, _Compare, _Allocator>::__assign_multi(_InputIterator __first, _InputIterator __last) {
-  typedef iterator_traits<_InputIterator> _ITraits;
-  typedef typename _ITraits::value_type _ItValueType;
-  static_assert(
-      is_same<_ItValueType, value_type>::value, "__assign_multi may only be called with the containers value_type");
-  if (size() != 0) {
-    _DetachedTreeCache __cache(this);
-    for (; __cache.__get() && __first != __last; ++__first) {
-      __assign_value(__cache.__get()->__value_, *__first);
-      __node_insert_multi(__cache.__get());
-      __cache.__advance();
-    }
-  }
-  const_iterator __e = end();
-  for (; __first != __last; ++__first)
-    __emplace_hint_multi(__e, *__first);
-}
-
 template <class _Tp, class _Compare, class _Allocator>
 __tree<_Tp, _Compare, _Allocator>::__tree(const __tree& __t)
-    : __begin_node_(),
+    : __begin_node_(__end_node()),
       __node_alloc_(__node_traits::select_on_container_copy_construction(__t.__node_alloc())),
       __size_(0),
       __value_comp_(__t.value_comp()) {
-  __begin_node() = __end_node();
+  if (__t.size() == 0)
+    return;
+
+  *__root_ptr()       = static_cast<__node_base_pointer>(__copy_construct_tree(__t.__root()));
+  __root()->__parent_ = __end_node();
+  __begin_node_       = static_cast<__end_node_pointer>(std::__tree_min(__end_node()->__left_));
+  __size_             = __t.size();
 }
 
 template <class _Tp, class _Compare, class _Allocator>
@@ -1358,33 +1643,38 @@ __tree<_Tp, _Compare, _Allocator>::__tree(__tree&& __t) _NOEXCEPT_(
       __node_alloc_(std::move(__t.__node_alloc_)),
       __size_(__t.__size_),
       __value_comp_(std::move(__t.__value_comp_)) {
-  if (size() == 0)
-    __begin_node() = __end_node();
+  if (__size_ == 0)
+    __begin_node_ = __end_node();
   else {
     __end_node()->__left_->__parent_ = static_cast<__end_node_pointer>(__end_node());
-    __t.__begin_node()               = __t.__end_node();
+    __t.__begin_node_                = __t.__end_node();
     __t.__end_node()->__left_        = nullptr;
-    __t.size()                       = 0;
+    __t.__size_                      = 0;
   }
 }
 
 template <class _Tp, class _Compare, class _Allocator>
 __tree<_Tp, _Compare, _Allocator>::__tree(__tree&& __t, const allocator_type& __a)
-    : __node_alloc_(__node_allocator(__a)), __size_(0), __value_comp_(std::move(__t.value_comp())) {
+    : __begin_node_(__end_node()),
+      __node_alloc_(__node_allocator(__a)),
+      __size_(0),
+      __value_comp_(std::move(__t.value_comp())) {
+  if (__t.size() == 0)
+    return;
   if (__a == __t.__alloc()) {
-    if (__t.size() == 0)
-      __begin_node() = __end_node();
-    else {
-      __begin_node()                   = __t.__begin_node();
-      __end_node()->__left_            = __t.__end_node()->__left_;
-      __end_node()->__left_->__parent_ = static_cast<__end_node_pointer>(__end_node());
-      size()                           = __t.size();
-      __t.__begin_node()               = __t.__end_node();
-      __t.__end_node()->__left_        = nullptr;
-      __t.size()                       = 0;
-    }
+    __begin_node_                    = __t.__begin_node_;
+    __end_node()->__left_            = __t.__end_node()->__left_;
+    __end_node()->__left_->__parent_ = static_cast<__end_node_pointer>(__end_node());
+    __size_                          = __t.__size_;
+    __t.__begin_node_                = __t.__end_node();
+    __t.__end_node()->__left_        = nullptr;
+    __t.__size_                      = 0;
   } else {
-    __begin_node() = __end_node();
+    *__root_ptr()       = static_cast<__node_base_pointer>(__move_construct_tree(__t.__root()));
+    __root()->__parent_ = __end_node();
+    __begin_node_       = static_cast<__end_node_pointer>(std::__tree_min(__end_node()->__left_));
+    __size_             = __t.size();
+    __t.clear(); // Ensure that __t is in a valid state after moving out the keys
   }
 }
 
@@ -1397,61 +1687,33 @@ void __tree<_Tp, _Compare, _Allocator>::__move_assign(__tree& __t, true_type)
   __move_assign_alloc(__t);
   __size_       = __t.__size_;
   __value_comp_ = std::move(__t.__value_comp_);
-  if (size() == 0)
-    __begin_node() = __end_node();
+  if (__size_ == 0)
+    __begin_node_ = __end_node();
   else {
     __end_node()->__left_->__parent_ = static_cast<__end_node_pointer>(__end_node());
-    __t.__begin_node()               = __t.__end_node();
+    __t.__begin_node_                = __t.__end_node();
     __t.__end_node()->__left_        = nullptr;
-    __t.size()                       = 0;
+    __t.__size_                      = 0;
   }
 }
 
 template <class _Tp, class _Compare, class _Allocator>
 void __tree<_Tp, _Compare, _Allocator>::__move_assign(__tree& __t, false_type) {
-  if (__node_alloc() == __t.__node_alloc())
+  if (__node_alloc() == __t.__node_alloc()) {
     __move_assign(__t, true_type());
-  else {
-    value_comp()       = std::move(__t.value_comp());
-    const_iterator __e = end();
-    if (size() != 0) {
-      _DetachedTreeCache __cache(this);
-      while (__cache.__get() != nullptr && __t.size() != 0) {
-        __assign_value(__cache.__get()->__value_, std::move(__t.remove(__t.begin())->__value_));
-        __node_insert_multi(__cache.__get());
-        __cache.__advance();
-      }
+  } else {
+    value_comp() = std::move(__t.value_comp());
+    if (__size_ != 0) {
+      *__root_ptr() = static_cast<__node_base_pointer>(__move_assign_tree(__root(), __t.__root()));
+    } else {
+      *__root_ptr() = static_cast<__node_base_pointer>(__move_construct_tree(__t.__root()));
+      if (__root())
+        __root()->__parent_ = __end_node();
     }
-    while (__t.size() != 0) {
-      __insert_multi_from_orphaned_node(__e, std::move(__t.remove(__t.begin())->__value_));
-    }
-  }
-}
-
-template <class _Tp, class _Compare, class _Allocator>
-__tree<_Tp, _Compare, _Allocator>& __tree<_Tp, _Compare, _Allocator>::operator=(__tree&& __t)
-    _NOEXCEPT_(is_nothrow_move_assignable<value_compare>::value &&
-               ((__node_traits::propagate_on_container_move_assignment::value &&
-                 is_nothrow_move_assignable<__node_allocator>::value) ||
-                allocator_traits<__node_allocator>::is_always_equal::value)) {
-  __move_assign(__t, integral_constant<bool, __node_traits::propagate_on_container_move_assignment::value>());
-  return *this;
-}
-
-template <class _Tp, class _Compare, class _Allocator>
-__tree<_Tp, _Compare, _Allocator>::~__tree() {
-  static_assert(is_copy_constructible<value_compare>::value, "Comparator must be copy-constructible.");
-  destroy(__root());
-}
-
-template <class _Tp, class _Compare, class _Allocator>
-void __tree<_Tp, _Compare, _Allocator>::destroy(__node_pointer __nd) _NOEXCEPT {
-  if (__nd != nullptr) {
-    destroy(static_cast<__node_pointer>(__nd->__left_));
-    destroy(static_cast<__node_pointer>(__nd->__right_));
-    __node_allocator& __na = __node_alloc();
-    __node_traits::destroy(__na, std::addressof(__nd->__value_));
-    __node_traits::deallocate(__na, __nd, 1);
+    __begin_node_ =
+        __end_node()->__left_ ? static_cast<__end_node_pointer>(std::__tree_min(__end_node()->__left_)) : __end_node();
+    __size_ = __t.size();
+    __t.clear(); // Ensure that __t is in a valid state after moving out the keys
   }
 }
 
@@ -1470,12 +1732,12 @@ void __tree<_Tp, _Compare, _Allocator>::swap(__tree& __t)
   std::__swap_allocator(__node_alloc(), __t.__node_alloc());
   swap(__size_, __t.__size_);
   swap(__value_comp_, __t.__value_comp_);
-  if (size() == 0)
-    __begin_node() = __end_node();
+  if (__size_ == 0)
+    __begin_node_ = __end_node();
   else
     __end_node()->__left_->__parent_ = __end_node();
-  if (__t.size() == 0)
-    __t.__begin_node() = __t.__end_node();
+  if (__t.__size_ == 0)
+    __t.__begin_node_ = __t.__end_node();
   else
     __t.__end_node()->__left_->__parent_ = __t.__end_node();
 }
@@ -1483,8 +1745,8 @@ void __tree<_Tp, _Compare, _Allocator>::swap(__tree& __t)
 template <class _Tp, class _Compare, class _Allocator>
 void __tree<_Tp, _Compare, _Allocator>::clear() _NOEXCEPT {
   destroy(__root());
-  size()                = 0;
-  __begin_node()        = __end_node();
+  __size_               = 0;
+  __begin_node_         = __end_node();
   __end_node()->__left_ = nullptr;
 }
 
@@ -1497,7 +1759,7 @@ __tree<_Tp, _Compare, _Allocator>::__find_leaf_low(__end_node_pointer& __parent,
   __node_pointer __nd = __root();
   if (__nd != nullptr) {
     while (true) {
-      if (value_comp()(__nd->__value_, __v)) {
+      if (value_comp()(__nd->__get_value(), __v)) {
         if (__nd->__right_ != nullptr)
           __nd = static_cast<__node_pointer>(__nd->__right_);
         else {
@@ -1527,7 +1789,7 @@ __tree<_Tp, _Compare, _Allocator>::__find_leaf_high(__end_node_pointer& __parent
   __node_pointer __nd = __root();
   if (__nd != nullptr) {
     while (true) {
-      if (value_comp()(__v, __nd->__value_)) {
+      if (value_comp()(__v, __nd->__get_value())) {
         if (__nd->__left_ != nullptr)
           __nd = static_cast<__node_pointer>(__nd->__left_);
         else {
@@ -1578,92 +1840,91 @@ typename __tree<_Tp, _Compare, _Allocator>::__node_base_pointer& __tree<_Tp, _Co
   return __find_leaf_low(__parent, __v);
 }
 
-// Find place to insert if __v doesn't exist
-// Set __parent to parent of null leaf
-// Return reference to null leaf
-// If __v exists, set parent to node of __v and return reference to node of __v
+// Find __v
+// If __v exists, return the parent of the node of __v and a reference to the pointer to the node of __v.
+// If __v doesn't exist, return the parent of the null leaf and a reference to the pointer to the null leaf.
 template <class _Tp, class _Compare, class _Allocator>
 template <class _Key>
-typename __tree<_Tp, _Compare, _Allocator>::__node_base_pointer&
-__tree<_Tp, _Compare, _Allocator>::__find_equal(__end_node_pointer& __parent, const _Key& __v) {
-  __node_pointer __nd           = __root();
-  __node_base_pointer* __nd_ptr = __root_ptr();
-  if (__nd != nullptr) {
-    while (true) {
-      if (value_comp()(__v, __nd->__value_)) {
-        if (__nd->__left_ != nullptr) {
-          __nd_ptr = std::addressof(__nd->__left_);
-          __nd     = static_cast<__node_pointer>(__nd->__left_);
-        } else {
-          __parent = static_cast<__end_node_pointer>(__nd);
-          return __parent->__left_;
-        }
-      } else if (value_comp()(__nd->__value_, __v)) {
-        if (__nd->__right_ != nullptr) {
-          __nd_ptr = std::addressof(__nd->__right_);
-          __nd     = static_cast<__node_pointer>(__nd->__right_);
-        } else {
-          __parent = static_cast<__end_node_pointer>(__nd);
-          return __nd->__right_;
-        }
-      } else {
-        __parent = static_cast<__end_node_pointer>(__nd);
-        return *__nd_ptr;
-      }
+_LIBCPP_HIDE_FROM_ABI pair<typename __tree<_Tp, _Compare, _Allocator>::__end_node_pointer,
+                           typename __tree<_Tp, _Compare, _Allocator>::__node_base_pointer&>
+__tree<_Tp, _Compare, _Allocator>::__find_equal(const _Key& __v) {
+  using _Pair = pair<__end_node_pointer, __node_base_pointer&>;
+
+  __node_pointer __nd = __root();
+
+  if (__nd == nullptr) {
+    auto __end = __end_node();
+    return _Pair(__end, __end->__left_);
+  }
+
+  __node_base_pointer* __node_ptr = __root_ptr();
+  auto&& __transparent            = std::__as_transparent<_Key>(value_comp());
+  auto __comp =
+      __lazy_synth_three_way_comparator<__make_transparent_t<_Key, _Compare>, _Key, value_type>(__transparent);
+
+  while (true) {
+    auto __comp_res = __comp(__v, __nd->__get_value());
+
+    if (__comp_res.__less()) {
+      if (__nd->__left_ == nullptr)
+        return _Pair(static_cast<__end_node_pointer>(__nd), __nd->__left_);
+
+      __node_ptr = std::addressof(__nd->__left_);
+      __nd       = static_cast<__node_pointer>(__nd->__left_);
+    } else if (__comp_res.__greater()) {
+      if (__nd->__right_ == nullptr)
+        return _Pair(static_cast<__end_node_pointer>(__nd), __nd->__right_);
+
+      __node_ptr = std::addressof(__nd->__right_);
+      __nd       = static_cast<__node_pointer>(__nd->__right_);
+    } else {
+      return _Pair(static_cast<__end_node_pointer>(__nd), *__node_ptr);
     }
   }
-  __parent = __end_node();
-  return __parent->__left_;
 }
 
-// Find place to insert if __v doesn't exist
+// Find __v
 // First check prior to __hint.
 // Next check after __hint.
 // Next do O(log N) search.
-// Set __parent to parent of null leaf
-// Return reference to null leaf
-// If __v exists, set parent to node of __v and return reference to node of __v
+// If __v exists, return the parent of the node of __v and a reference to the pointer to the node of __v.
+// If __v doesn't exist, return the parent of the null leaf and a reference to the pointer to the null leaf.
 template <class _Tp, class _Compare, class _Allocator>
 template <class _Key>
-typename __tree<_Tp, _Compare, _Allocator>::__node_base_pointer& __tree<_Tp, _Compare, _Allocator>::__find_equal(
-    const_iterator __hint, __end_node_pointer& __parent, __node_base_pointer& __dummy, const _Key& __v) {
-  if (__hint == end() || value_comp()(__v, *__hint)) // check before
-  {
+_LIBCPP_HIDE_FROM_ABI pair<typename __tree<_Tp, _Compare, _Allocator>::__end_node_pointer,
+                           typename __tree<_Tp, _Compare, _Allocator>::__node_base_pointer&>
+__tree<_Tp, _Compare, _Allocator>::__find_equal(const_iterator __hint, __node_base_pointer& __dummy, const _Key& __v) {
+  using _Pair = pair<__end_node_pointer, __node_base_pointer&>;
+
+  if (__hint == end() || value_comp()(__v, *__hint)) { // check before
     // __v < *__hint
     const_iterator __prior = __hint;
     if (__prior == begin() || value_comp()(*--__prior, __v)) {
       // *prev(__hint) < __v < *__hint
-      if (__hint.__ptr_->__left_ == nullptr) {
-        __parent = __hint.__ptr_;
-        return __parent->__left_;
-      } else {
-        __parent = __prior.__ptr_;
-        return static_cast<__node_base_pointer>(__prior.__ptr_)->__right_;
-      }
+      if (__hint.__ptr_->__left_ == nullptr)
+        return _Pair(__hint.__ptr_, __hint.__ptr_->__left_);
+      return _Pair(__prior.__ptr_, static_cast<__node_pointer>(__prior.__ptr_)->__right_);
     }
     // __v <= *prev(__hint)
-    return __find_equal(__parent, __v);
-  } else if (value_comp()(*__hint, __v)) // check after
-  {
+    return __find_equal(__v);
+  }
+
+  if (value_comp()(*__hint, __v)) { // check after
     // *__hint < __v
     const_iterator __next = std::next(__hint);
     if (__next == end() || value_comp()(__v, *__next)) {
       // *__hint < __v < *std::next(__hint)
-      if (__hint.__get_np()->__right_ == nullptr) {
-        __parent = __hint.__ptr_;
-        return static_cast<__node_base_pointer>(__hint.__ptr_)->__right_;
-      } else {
-        __parent = __next.__ptr_;
-        return __parent->__left_;
-      }
+      if (__hint.__get_np()->__right_ == nullptr)
+        return _Pair(__hint.__ptr_, static_cast<__node_pointer>(__hint.__ptr_)->__right_);
+      return _Pair(__next.__ptr_, __next.__ptr_->__left_);
     }
     // *next(__hint) <= __v
-    return __find_equal(__parent, __v);
+    return __find_equal(__v);
   }
+
   // else __v == *__hint
-  __parent = __hint.__ptr_;
-  __dummy  = static_cast<__node_base_pointer>(__hint.__ptr_);
-  return __dummy;
+  __dummy = static_cast<__node_base_pointer>(__hint.__ptr_);
+  return _Pair(__hint.__ptr_, __dummy);
 }
 
 template <class _Tp, class _Compare, class _Allocator>
@@ -1674,46 +1935,10 @@ void __tree<_Tp, _Compare, _Allocator>::__insert_node_at(
   __new_node->__parent_ = __parent;
   // __new_node->__is_black_ is initialized in __tree_balance_after_insert
   __child = __new_node;
-  if (__begin_node()->__left_ != nullptr)
-    __begin_node() = static_cast<__end_node_pointer>(__begin_node()->__left_);
+  if (__begin_node_->__left_ != nullptr)
+    __begin_node_ = static_cast<__end_node_pointer>(__begin_node_->__left_);
   std::__tree_balance_after_insert(__end_node()->__left_, __child);
-  ++size();
-}
-
-template <class _Tp, class _Compare, class _Allocator>
-template <class _Key, class... _Args>
-pair<typename __tree<_Tp, _Compare, _Allocator>::iterator, bool>
-__tree<_Tp, _Compare, _Allocator>::__emplace_unique_key_args(_Key const& __k, _Args&&... __args) {
-  __end_node_pointer __parent;
-  __node_base_pointer& __child = __find_equal(__parent, __k);
-  __node_pointer __r           = static_cast<__node_pointer>(__child);
-  bool __inserted              = false;
-  if (__child == nullptr) {
-    __node_holder __h = __construct_node(std::forward<_Args>(__args)...);
-    __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__h.get()));
-    __r        = __h.release();
-    __inserted = true;
-  }
-  return pair<iterator, bool>(iterator(__r), __inserted);
-}
-
-template <class _Tp, class _Compare, class _Allocator>
-template <class _Key, class... _Args>
-pair<typename __tree<_Tp, _Compare, _Allocator>::iterator, bool>
-__tree<_Tp, _Compare, _Allocator>::__emplace_hint_unique_key_args(
-    const_iterator __p, _Key const& __k, _Args&&... __args) {
-  __end_node_pointer __parent;
-  __node_base_pointer __dummy;
-  __node_base_pointer& __child = __find_equal(__p, __parent, __dummy, __k);
-  __node_pointer __r           = static_cast<__node_pointer>(__child);
-  bool __inserted              = false;
-  if (__child == nullptr) {
-    __node_holder __h = __construct_node(std::forward<_Args>(__args)...);
-    __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__h.get()));
-    __r        = __h.release();
-    __inserted = true;
-  }
-  return pair<iterator, bool>(iterator(__r), __inserted);
+  ++__size_;
 }
 
 template <class _Tp, class _Compare, class _Allocator>
@@ -1722,51 +1947,18 @@ typename __tree<_Tp, _Compare, _Allocator>::__node_holder
 __tree<_Tp, _Compare, _Allocator>::__construct_node(_Args&&... __args) {
   __node_allocator& __na = __node_alloc();
   __node_holder __h(__node_traits::allocate(__na, 1), _Dp(__na));
-  __node_traits::construct(__na, std::addressof(__h->__value_), std::forward<_Args>(__args)...);
+  std::__construct_at(std::addressof(*__h), __na, std::forward<_Args>(__args)...);
   __h.get_deleter().__value_constructed = true;
   return __h;
 }
 
-template <class _Tp, class _Compare, class _Allocator>
-template <class... _Args>
-pair<typename __tree<_Tp, _Compare, _Allocator>::iterator, bool>
-__tree<_Tp, _Compare, _Allocator>::__emplace_unique_impl(_Args&&... __args) {
-  __node_holder __h = __construct_node(std::forward<_Args>(__args)...);
-  __end_node_pointer __parent;
-  __node_base_pointer& __child = __find_equal(__parent, __h->__value_);
-  __node_pointer __r           = static_cast<__node_pointer>(__child);
-  bool __inserted              = false;
-  if (__child == nullptr) {
-    __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__h.get()));
-    __r        = __h.release();
-    __inserted = true;
-  }
-  return pair<iterator, bool>(iterator(__r), __inserted);
-}
-
-template <class _Tp, class _Compare, class _Allocator>
-template <class... _Args>
-typename __tree<_Tp, _Compare, _Allocator>::iterator
-__tree<_Tp, _Compare, _Allocator>::__emplace_hint_unique_impl(const_iterator __p, _Args&&... __args) {
-  __node_holder __h = __construct_node(std::forward<_Args>(__args)...);
-  __end_node_pointer __parent;
-  __node_base_pointer __dummy;
-  __node_base_pointer& __child = __find_equal(__p, __parent, __dummy, __h->__value_);
-  __node_pointer __r           = static_cast<__node_pointer>(__child);
-  if (__child == nullptr) {
-    __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__h.get()));
-    __r = __h.release();
-  }
-  return iterator(__r);
-}
-
 template <class _Tp, class _Compare, class _Allocator>
 template <class... _Args>
 typename __tree<_Tp, _Compare, _Allocator>::iterator
 __tree<_Tp, _Compare, _Allocator>::__emplace_multi(_Args&&... __args) {
   __node_holder __h = __construct_node(std::forward<_Args>(__args)...);
   __end_node_pointer __parent;
-  __node_base_pointer& __child = __find_leaf_high(__parent, __h->__value_);
+  __node_base_pointer& __child = __find_leaf_high(__parent, __h->__get_value());
   __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__h.get()));
   return iterator(static_cast<__node_pointer>(__h.release()));
 }
@@ -1777,53 +1969,19 @@ typename __tree<_Tp, _Compare, _Allocator>::iterator
 __tree<_Tp, _Compare, _Allocator>::__emplace_hint_multi(const_iterator __p, _Args&&... __args) {
   __node_holder __h = __construct_node(std::forward<_Args>(__args)...);
   __end_node_pointer __parent;
-  __node_base_pointer& __child = __find_leaf(__p, __parent, __h->__value_);
+  __node_base_pointer& __child = __find_leaf(__p, __parent, __h->__get_value());
   __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__h.get()));
   return iterator(static_cast<__node_pointer>(__h.release()));
 }
 
-template <class _Tp, class _Compare, class _Allocator>
-pair<typename __tree<_Tp, _Compare, _Allocator>::iterator, bool>
-__tree<_Tp, _Compare, _Allocator>::__node_assign_unique(const value_type& __v, __node_pointer __nd) {
-  __end_node_pointer __parent;
-  __node_base_pointer& __child = __find_equal(__parent, __v);
-  __node_pointer __r           = static_cast<__node_pointer>(__child);
-  bool __inserted              = false;
-  if (__child == nullptr) {
-    __assign_value(__nd->__value_, __v);
-    __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__nd));
-    __r        = __nd;
-    __inserted = true;
-  }
-  return pair<iterator, bool>(iterator(__r), __inserted);
-}
-
-template <class _Tp, class _Compare, class _Allocator>
-typename __tree<_Tp, _Compare, _Allocator>::iterator
-__tree<_Tp, _Compare, _Allocator>::__node_insert_multi(__node_pointer __nd) {
-  __end_node_pointer __parent;
-  __node_base_pointer& __child = __find_leaf_high(__parent, __nd->__value_);
-  __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__nd));
-  return iterator(__nd);
-}
-
-template <class _Tp, class _Compare, class _Allocator>
-typename __tree<_Tp, _Compare, _Allocator>::iterator
-__tree<_Tp, _Compare, _Allocator>::__node_insert_multi(const_iterator __p, __node_pointer __nd) {
-  __end_node_pointer __parent;
-  __node_base_pointer& __child = __find_leaf(__p, __parent, __nd->__value_);
-  __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__nd));
-  return iterator(__nd);
-}
-
 template <class _Tp, class _Compare, class _Allocator>
 typename __tree<_Tp, _Compare, _Allocator>::iterator
 __tree<_Tp, _Compare, _Allocator>::__remove_node_pointer(__node_pointer __ptr) _NOEXCEPT {
   iterator __r(__ptr);
   ++__r;
-  if (__begin_node() == __ptr)
-    __begin_node() = __r.__ptr_;
-  --size();
+  if (__begin_node_ == __ptr)
+    __begin_node_ = __r.__ptr_;
+  --__size_;
   std::__tree_remove(__end_node()->__left_, static_cast<__node_base_pointer>(__ptr));
   return __r;
 }
@@ -1837,8 +1995,7 @@ __tree<_Tp, _Compare, _Allocator>::__node_handle_insert_unique(_NodeHandle&& __n
     return _InsertReturnType{end(), false, _NodeHandle()};
 
   __node_pointer __ptr = __nh.__ptr_;
-  __end_node_pointer __parent;
-  __node_base_pointer& __child = __find_equal(__parent, __ptr->__value_);
+  auto [__parent, __child] = __find_equal(__ptr->__get_value());
   if (__child != nullptr)
     return _InsertReturnType{iterator(static_cast<__node_pointer>(__child)), false, std::move(__nh)};
 
@@ -1855,10 +2012,9 @@ __tree<_Tp, _Compare, _Allocator>::__node_handle_insert_unique(const_iterator __
     return end();
 
   __node_pointer __ptr = __nh.__ptr_;
-  __end_node_pointer __parent;
   __node_base_pointer __dummy;
-  __node_base_pointer& __child = __find_equal(__hint, __parent, __dummy, __ptr->__value_);
-  __node_pointer __r           = static_cast<__node_pointer>(__child);
+  auto [__parent, __child] = __find_equal(__hint, __dummy, __ptr->__get_value());
+  __node_pointer __r       = static_cast<__node_pointer>(__child);
   if (__child == nullptr) {
     __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__ptr));
     __r = __ptr;
@@ -1885,14 +2041,12 @@ _LIBCPP_HIDE_FROM_ABI _NodeHandle __tree<_Tp, _Compare, _Allocator>::__node_hand
 }
 
 template <class _Tp, class _Compare, class _Allocator>
-template <class _Tree>
-_LIBCPP_HIDE_FROM_ABI void __tree<_Tp, _Compare, _Allocator>::__node_handle_merge_unique(_Tree& __source) {
-  static_assert(is_same<typename _Tree::__node_pointer, __node_pointer>::value, "");
-
-  for (typename _Tree::iterator __i = __source.begin(); __i != __source.end();) {
+template <class _Comp2>
+_LIBCPP_HIDE_FROM_ABI void
+__tree<_Tp, _Compare, _Allocator>::__node_handle_merge_unique(__tree<_Tp, _Comp2, _Allocator>& __source) {
+  for (iterator __i = __source.begin(); __i != __source.end();) {
     __node_pointer __src_ptr = __i.__get_np();
-    __end_node_pointer __parent;
-    __node_base_pointer& __child = __find_equal(__parent, __src_ptr->__value_);
+    auto [__parent, __child] = __find_equal(__src_ptr->__get_value());
     ++__i;
     if (__child != nullptr)
       continue;
@@ -1909,7 +2063,7 @@ __tree<_Tp, _Compare, _Allocator>::__node_handle_insert_multi(_NodeHandle&& __nh
     return end();
   __node_pointer __ptr = __nh.__ptr_;
   __end_node_pointer __parent;
-  __node_base_pointer& __child = __find_leaf_high(__parent, __ptr->__value_);
+  __node_base_pointer& __child = __find_leaf_high(__parent, __ptr->__get_value());
   __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__ptr));
   __nh.__release_ptr();
   return iterator(__ptr);
@@ -1924,21 +2078,20 @@ __tree<_Tp, _Compare, _Allocator>::__node_handle_insert_multi(const_iterator __h
 
   __node_pointer __ptr = __nh.__ptr_;
   __end_node_pointer __parent;
-  __node_base_pointer& __child = __find_leaf(__hint, __parent, __ptr->__value_);
+  __node_base_pointer& __child = __find_leaf(__hint, __parent, __ptr->__get_value());
   __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__ptr));
   __nh.__release_ptr();
   return iterator(__ptr);
 }
 
 template <class _Tp, class _Compare, class _Allocator>
-template <class _Tree>
-_LIBCPP_HIDE_FROM_ABI void __tree<_Tp, _Compare, _Allocator>::__node_handle_merge_multi(_Tree& __source) {
-  static_assert(is_same<typename _Tree::__node_pointer, __node_pointer>::value, "");
-
-  for (typename _Tree::iterator __i = __source.begin(); __i != __source.end();) {
+template <class _Comp2>
+_LIBCPP_HIDE_FROM_ABI void
+__tree<_Tp, _Compare, _Allocator>::__node_handle_merge_multi(__tree<_Tp, _Comp2, _Allocator>& __source) {
+  for (iterator __i = __source.begin(); __i != __source.end();) {
     __node_pointer __src_ptr = __i.__get_np();
     __end_node_pointer __parent;
-    __node_base_pointer& __child = __find_leaf_high(__parent, __src_ptr->__value_);
+    __node_base_pointer& __child = __find_leaf_high(__parent, __src_ptr->__get_value());
     ++__i;
     __source.__remove_node_pointer(__src_ptr);
     __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__src_ptr));
@@ -1987,34 +2140,17 @@ __tree<_Tp, _Compare, _Allocator>::__erase_multi(const _Key& __k) {
   return __r;
 }
 
-template <class _Tp, class _Compare, class _Allocator>
-template <class _Key>
-typename __tree<_Tp, _Compare, _Allocator>::iterator __tree<_Tp, _Compare, _Allocator>::find(const _Key& __v) {
-  iterator __p = __lower_bound(__v, __root(), __end_node());
-  if (__p != end() && !value_comp()(__v, *__p))
-    return __p;
-  return end();
-}
-
-template <class _Tp, class _Compare, class _Allocator>
-template <class _Key>
-typename __tree<_Tp, _Compare, _Allocator>::const_iterator
-__tree<_Tp, _Compare, _Allocator>::find(const _Key& __v) const {
-  const_iterator __p = __lower_bound(__v, __root(), __end_node());
-  if (__p != end() && !value_comp()(__v, *__p))
-    return __p;
-  return end();
-}
-
 template <class _Tp, class _Compare, class _Allocator>
 template <class _Key>
 typename __tree<_Tp, _Compare, _Allocator>::size_type
 __tree<_Tp, _Compare, _Allocator>::__count_unique(const _Key& __k) const {
   __node_pointer __rt = __root();
+  auto __comp         = __lazy_synth_three_way_comparator<value_compare, _Key, value_type>(value_comp());
   while (__rt != nullptr) {
-    if (value_comp()(__k, __rt->__value_)) {
+    auto __comp_res = __comp(__k, __rt->__get_value());
+    if (__comp_res.__less()) {
       __rt = static_cast<__node_pointer>(__rt->__left_);
-    } else if (value_comp()(__rt->__value_, __k))
+    } else if (__comp_res.__greater())
       __rt = static_cast<__node_pointer>(__rt->__right_);
     else
       return 1;
@@ -2028,26 +2164,28 @@ typename __tree<_Tp, _Compare, _Allocator>::size_type
 __tree<_Tp, _Compare, _Allocator>::__count_multi(const _Key& __k) const {
   __end_node_pointer __result = __end_node();
   __node_pointer __rt         = __root();
+  auto __comp                 = __lazy_synth_three_way_comparator<value_compare, _Key, value_type>(value_comp());
   while (__rt != nullptr) {
-    if (value_comp()(__k, __rt->__value_)) {
+    auto __comp_res = __comp(__k, __rt->__get_value());
+    if (__comp_res.__less()) {
       __result = static_cast<__end_node_pointer>(__rt);
       __rt     = static_cast<__node_pointer>(__rt->__left_);
-    } else if (value_comp()(__rt->__value_, __k))
+    } else if (__comp_res.__greater())
       __rt = static_cast<__node_pointer>(__rt->__right_);
     else
       return std::distance(
-          __lower_bound(__k, static_cast<__node_pointer>(__rt->__left_), static_cast<__end_node_pointer>(__rt)),
-          __upper_bound(__k, static_cast<__node_pointer>(__rt->__right_), __result));
+          __lower_bound_multi(__k, static_cast<__node_pointer>(__rt->__left_), static_cast<__end_node_pointer>(__rt)),
+          __upper_bound_multi(__k, static_cast<__node_pointer>(__rt->__right_), __result));
   }
   return 0;
 }
 
 template <class _Tp, class _Compare, class _Allocator>
 template <class _Key>
-typename __tree<_Tp, _Compare, _Allocator>::iterator
-__tree<_Tp, _Compare, _Allocator>::__lower_bound(const _Key& __v, __node_pointer __root, __end_node_pointer __result) {
+typename __tree<_Tp, _Compare, _Allocator>::iterator __tree<_Tp, _Compare, _Allocator>::__lower_bound_multi(
+    const _Key& __v, __node_pointer __root, __end_node_pointer __result) {
   while (__root != nullptr) {
-    if (!value_comp()(__root->__value_, __v)) {
+    if (!value_comp()(__root->__get_value(), __v)) {
       __result = static_cast<__end_node_pointer>(__root);
       __root   = static_cast<__node_pointer>(__root->__left_);
     } else
@@ -2058,10 +2196,10 @@ __tree<_Tp, _Compare, _Allocator>::__lower_bound(const _Key& __v, __node_pointer
 
 template <class _Tp, class _Compare, class _Allocator>
 template <class _Key>
-typename __tree<_Tp, _Compare, _Allocator>::const_iterator __tree<_Tp, _Compare, _Allocator>::__lower_bound(
+typename __tree<_Tp, _Compare, _Allocator>::const_iterator __tree<_Tp, _Compare, _Allocator>::__lower_bound_multi(
     const _Key& __v, __node_pointer __root, __end_node_pointer __result) const {
   while (__root != nullptr) {
-    if (!value_comp()(__root->__value_, __v)) {
+    if (!value_comp()(__root->__get_value(), __v)) {
       __result = static_cast<__end_node_pointer>(__root);
       __root   = static_cast<__node_pointer>(__root->__left_);
     } else
@@ -2072,10 +2210,10 @@ typename __tree<_Tp, _Compare, _Allocator>::const_iterator __tree<_Tp, _Compare,
 
 template <class _Tp, class _Compare, class _Allocator>
 template <class _Key>
-typename __tree<_Tp, _Compare, _Allocator>::iterator
-__tree<_Tp, _Compare, _Allocator>::__upper_bound(const _Key& __v, __node_pointer __root, __end_node_pointer __result) {
+typename __tree<_Tp, _Compare, _Allocator>::iterator __tree<_Tp, _Compare, _Allocator>::__upper_bound_multi(
+    const _Key& __v, __node_pointer __root, __end_node_pointer __result) {
   while (__root != nullptr) {
-    if (value_comp()(__v, __root->__value_)) {
+    if (value_comp()(__v, __root->__get_value())) {
       __result = static_cast<__end_node_pointer>(__root);
       __root   = static_cast<__node_pointer>(__root->__left_);
     } else
@@ -2086,10 +2224,10 @@ __tree<_Tp, _Compare, _Allocator>::__upper_bound(const _Key& __v, __node_pointer
 
 template <class _Tp, class _Compare, class _Allocator>
 template <class _Key>
-typename __tree<_Tp, _Compare, _Allocator>::const_iterator __tree<_Tp, _Compare, _Allocator>::__upper_bound(
+typename __tree<_Tp, _Compare, _Allocator>::const_iterator __tree<_Tp, _Compare, _Allocator>::__upper_bound_multi(
     const _Key& __v, __node_pointer __root, __end_node_pointer __result) const {
   while (__root != nullptr) {
-    if (value_comp()(__v, __root->__value_)) {
+    if (value_comp()(__v, __root->__get_value())) {
       __result = static_cast<__end_node_pointer>(__root);
       __root   = static_cast<__node_pointer>(__root->__left_);
     } else
@@ -2102,14 +2240,16 @@ template <class _Tp, class _Compare, class _Allocator>
 template <class _Key>
 pair<typename __tree<_Tp, _Compare, _Allocator>::iterator, typename __tree<_Tp, _Compare, _Allocator>::iterator>
 __tree<_Tp, _Compare, _Allocator>::__equal_range_unique(const _Key& __k) {
-  typedef pair<iterator, iterator> _Pp;
+  using _Pp                   = pair<iterator, iterator>;
   __end_node_pointer __result = __end_node();
   __node_pointer __rt         = __root();
+  auto __comp                 = __lazy_synth_three_way_comparator<value_compare, _Key, value_type>(value_comp());
   while (__rt != nullptr) {
-    if (value_comp()(__k, __rt->__value_)) {
+    auto __comp_res = __comp(__k, __rt->__get_value());
+    if (__comp_res.__less()) {
       __result = static_cast<__end_node_pointer>(__rt);
       __rt     = static_cast<__node_pointer>(__rt->__left_);
-    } else if (value_comp()(__rt->__value_, __k))
+    } else if (__comp_res.__greater())
       __rt = static_cast<__node_pointer>(__rt->__right_);
     else
       return _Pp(iterator(__rt),
@@ -2124,14 +2264,16 @@ template <class _Key>
 pair<typename __tree<_Tp, _Compare, _Allocator>::const_iterator,
      typename __tree<_Tp, _Compare, _Allocator>::const_iterator>
 __tree<_Tp, _Compare, _Allocator>::__equal_range_unique(const _Key& __k) const {
-  typedef pair<const_iterator, const_iterator> _Pp;
+  using _Pp                   = pair<const_iterator, const_iterator>;
   __end_node_pointer __result = __end_node();
   __node_pointer __rt         = __root();
+  auto __comp                 = __lazy_synth_three_way_comparator<value_compare, _Key, value_type>(value_comp());
   while (__rt != nullptr) {
-    if (value_comp()(__k, __rt->__value_)) {
+    auto __comp_res = __comp(__k, __rt->__get_value());
+    if (__comp_res.__less()) {
       __result = static_cast<__end_node_pointer>(__rt);
       __rt     = static_cast<__node_pointer>(__rt->__left_);
-    } else if (value_comp()(__rt->__value_, __k))
+    } else if (__comp_res.__greater())
       __rt = static_cast<__node_pointer>(__rt->__right_);
     else
       return _Pp(
@@ -2146,18 +2288,21 @@ template <class _Tp, class _Compare, class _Allocator>
 template <class _Key>
 pair<typename __tree<_Tp, _Compare, _Allocator>::iterator, typename __tree<_Tp, _Compare, _Allocator>::iterator>
 __tree<_Tp, _Compare, _Allocator>::__equal_range_multi(const _Key& __k) {
-  typedef pair<iterator, iterator> _Pp;
+  using _Pp                   = pair<iterator, iterator>;
   __end_node_pointer __result = __end_node();
-  __node_pointer __rt     = __root();
+  __node_pointer __rt         = __root();
+  auto __comp                 = __lazy_synth_three_way_comparator<value_compare, _Key, value_type>(value_comp());
   while (__rt != nullptr) {
-    if (value_comp()(__k, __rt->__value_)) {
+    auto __comp_res = __comp(__k, __rt->__get_value());
+    if (__comp_res.__less()) {
       __result = static_cast<__end_node_pointer>(__rt);
       __rt     = static_cast<__node_pointer>(__rt->__left_);
-    } else if (value_comp()(__rt->__value_, __k))
+    } else if (__comp_res.__greater())
       __rt = static_cast<__node_pointer>(__rt->__right_);
     else
-      return _Pp(__lower_bound(__k, static_cast<__node_pointer>(__rt->__left_), static_cast<__end_node_pointer>(__rt)),
-                 __upper_bound(__k, static_cast<__node_pointer>(__rt->__right_), __result));
+      return _Pp(
+          __lower_bound_multi(__k, static_cast<__node_pointer>(__rt->__left_), static_cast<__end_node_pointer>(__rt)),
+          __upper_bound_multi(__k, static_cast<__node_pointer>(__rt->__right_), __result));
   }
   return _Pp(iterator(__result), iterator(__result));
 }
@@ -2167,18 +2312,21 @@ template <class _Key>
 pair<typename __tree<_Tp, _Compare, _Allocator>::const_iterator,
      typename __tree<_Tp, _Compare, _Allocator>::const_iterator>
 __tree<_Tp, _Compare, _Allocator>::__equal_range_multi(const _Key& __k) const {
-  typedef pair<const_iterator, const_iterator> _Pp;
+  using _Pp                   = pair<const_iterator, const_iterator>;
   __end_node_pointer __result = __end_node();
-  __node_pointer __rt     = __root();
+  __node_pointer __rt         = __root();
+  auto __comp                 = __lazy_synth_three_way_comparator<value_compare, _Key, value_type>(value_comp());
   while (__rt != nullptr) {
-    if (value_comp()(__k, __rt->__value_)) {
+    auto __comp_res = __comp(__k, __rt->__get_value());
+    if (__comp_res.__less()) {
       __result = static_cast<__end_node_pointer>(__rt);
       __rt     = static_cast<__node_pointer>(__rt->__left_);
-    } else if (value_comp()(__rt->__value_, __k))
+    } else if (__comp_res.__greater())
       __rt = static_cast<__node_pointer>(__rt->__right_);
     else
-      return _Pp(__lower_bound(__k, static_cast<__node_pointer>(__rt->__left_), static_cast<__end_node_pointer>(__rt)),
-                 __upper_bound(__k, static_cast<__node_pointer>(__rt->__right_), __result));
+      return _Pp(
+          __lower_bound_multi(__k, static_cast<__node_pointer>(__rt->__left_), static_cast<__end_node_pointer>(__rt)),
+          __upper_bound_multi(__k, static_cast<__node_pointer>(__rt->__right_), __result));
   }
   return _Pp(const_iterator(__result), const_iterator(__result));
 }
@@ -2187,13 +2335,13 @@ template <class _Tp, class _Compare, class _Allocator>
 typename __tree<_Tp, _Compare, _Allocator>::__node_holder
 __tree<_Tp, _Compare, _Allocator>::remove(const_iterator __p) _NOEXCEPT {
   __node_pointer __np = __p.__get_np();
-  if (__begin_node() == __p.__ptr_) {
+  if (__begin_node_ == __p.__ptr_) {
     if (__np->__right_ != nullptr)
-      __begin_node() = static_cast<__end_node_pointer>(__np->__right_);
+      __begin_node_ = static_cast<__end_node_pointer>(__np->__right_);
     else
-      __begin_node() = static_cast<__end_node_pointer>(__np->__parent_);
+      __begin_node_ = static_cast<__end_node_pointer>(__np->__parent_);
   }
-  --size();
+  --__size_;
   std::__tree_remove(__end_node()->__left_, static_cast<__node_base_pointer>(__np));
   return __node_holder(__np, _Dp(__node_alloc(), true));
 }
diff --git a/lib/libcxx/include/__tuple/sfinae_helpers.h b/lib/libcxx/include/__tuple/sfinae_helpers.h
index 9fe5e84e2f..f81048f406 100644
--- a/lib/libcxx/include/__tuple/sfinae_helpers.h
+++ b/lib/libcxx/include/__tuple/sfinae_helpers.h
@@ -10,20 +10,6 @@
 #define _LIBCPP___TUPLE_SFINAE_HELPERS_H
 
 #include <__config>
-#include <__cstddef/size_t.h>
-#include <__fwd/tuple.h>
-#include <__tuple/make_tuple_types.h>
-#include <__tuple/tuple_element.h>
-#include <__tuple/tuple_like_ext.h>
-#include <__tuple/tuple_size.h>
-#include <__tuple/tuple_types.h>
-#include <__type_traits/conjunction.h>
-#include <__type_traits/enable_if.h>
-#include <__type_traits/integral_constant.h>
-#include <__type_traits/is_constructible.h>
-#include <__type_traits/is_same.h>
-#include <__type_traits/remove_cvref.h>
-#include <__type_traits/remove_reference.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
@@ -33,36 +19,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 #ifndef _LIBCPP_CXX03_LANG
 
-struct __tuple_sfinae_base {
-  template <template <class, class...> class _Trait, class... _LArgs, class... _RArgs>
-  static auto __do_test(__tuple_types<_LArgs...>,
-                        __tuple_types<_RArgs...>) -> __all<__enable_if_t<_Trait<_LArgs, _RArgs>::value, bool>{true}...>;
-  template <template <class...> class>
-  static auto __do_test(...) -> false_type;
-
-  template <class _FromArgs, class _ToArgs>
-  using __constructible _LIBCPP_NODEBUG = decltype(__do_test<is_constructible>(_ToArgs{}, _FromArgs{}));
-};
-
-// __tuple_constructible
-
-template <class _Tp,
-          class _Up,
-          bool = __tuple_like_ext<__libcpp_remove_reference_t<_Tp> >::value,
-          bool = __tuple_like_ext<_Up>::value>
-struct __tuple_constructible : public false_type {};
-
-template <class _Tp, class _Up>
-struct __tuple_constructible<_Tp, _Up, true, true>
-    : public __tuple_sfinae_base::__constructible< typename __make_tuple_types<_Tp>::type,
-                                                   typename __make_tuple_types<_Up>::type > {};
-
-template <size_t _Ip, class... _Tp>
-struct tuple_element<_Ip, tuple<_Tp...> > {
-  using type _LIBCPP_NODEBUG = typename tuple_element<_Ip, __tuple_types<_Tp...> >::type;
-};
-
-struct _LIBCPP_EXPORTED_FROM_ABI __check_tuple_constructor_fail {
+struct __check_tuple_constructor_fail {
   static _LIBCPP_HIDE_FROM_ABI constexpr bool __enable_explicit_default() { return false; }
   static _LIBCPP_HIDE_FROM_ABI constexpr bool __enable_implicit_default() { return false; }
   template <class...>
diff --git a/lib/libcxx/include/__tuple/tuple_element.h b/lib/libcxx/include/__tuple/tuple_element.h
index f67c867464..50a98079cc 100644
--- a/lib/libcxx/include/__tuple/tuple_element.h
+++ b/lib/libcxx/include/__tuple/tuple_element.h
@@ -11,8 +11,6 @@
 
 #include <__config>
 #include <__cstddef/size_t.h>
-#include <__tuple/tuple_indices.h>
-#include <__tuple/tuple_types.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
@@ -38,21 +36,11 @@ struct tuple_element<_Ip, const volatile _Tp> {
   using type _LIBCPP_NODEBUG = const volatile typename tuple_element<_Ip, _Tp>::type;
 };
 
-#ifndef _LIBCPP_CXX03_LANG
-
-template <size_t _Ip, class... _Types>
-struct tuple_element<_Ip, __tuple_types<_Types...> > {
-  static_assert(_Ip < sizeof...(_Types), "tuple_element index out of range");
-  using type _LIBCPP_NODEBUG = __type_pack_element<_Ip, _Types...>;
-};
-
 #  if _LIBCPP_STD_VER >= 14
 template <size_t _Ip, class... _Tp>
 using tuple_element_t _LIBCPP_NODEBUG = typename tuple_element<_Ip, _Tp...>::type;
 #  endif
 
-#endif // _LIBCPP_CXX03_LANG
-
 _LIBCPP_END_NAMESPACE_STD
 
 #endif // _LIBCPP___TUPLE_TUPLE_ELEMENT_H
diff --git a/lib/libcxx/include/__tuple/tuple_size.h b/lib/libcxx/include/__tuple/tuple_size.h
index 3308c000dc..719edc0e34 100644
--- a/lib/libcxx/include/__tuple/tuple_size.h
+++ b/lib/libcxx/include/__tuple/tuple_size.h
@@ -12,7 +12,6 @@
 #include <__config>
 #include <__cstddef/size_t.h>
 #include <__fwd/tuple.h>
-#include <__tuple/tuple_types.h>
 #include <__type_traits/enable_if.h>
 #include <__type_traits/integral_constant.h>
 #include <__type_traits/is_const.h>
@@ -59,9 +58,6 @@ struct tuple_size<const volatile _Tp> : public tuple_size<_Tp> {};
 template <class... _Tp>
 struct tuple_size<tuple<_Tp...> > : public integral_constant<size_t, sizeof...(_Tp)> {};
 
-template <class... _Tp>
-struct tuple_size<__tuple_types<_Tp...> > : public integral_constant<size_t, sizeof...(_Tp)> {};
-
 #  if _LIBCPP_STD_VER >= 17
 template <class _Tp>
 inline constexpr size_t tuple_size_v = tuple_size<_Tp>::value;
diff --git a/lib/libcxx/include/__tuple/tuple_transform.h b/lib/libcxx/include/__tuple/tuple_transform.h
new file mode 100644
index 0000000000..92a18e0f54
--- /dev/null
+++ b/lib/libcxx/include/__tuple/tuple_transform.h
@@ -0,0 +1,45 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___TUPLE_TUPLE_TRANSFORM_H
+#define _LIBCPP___TUPLE_TUPLE_TRANSFORM_H
+
+#include <__config>
+
+#include <__functional/invoke.h>
+#include <__utility/forward.h>
+#include <tuple>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER >= 23
+
+template <class _Fun, class _Tuple>
+_LIBCPP_HIDE_FROM_ABI constexpr auto __tuple_transform(_Fun&& __f, _Tuple&& __tuple) {
+  return std::apply(
+      [&]<class... _Types>(_Types&&... __elements) {
+        return tuple<invoke_result_t<_Fun&, _Types>...>(std::invoke(__f, std::forward<_Types>(__elements))...);
+      },
+      std::forward<_Tuple>(__tuple));
+}
+
+#endif // _LIBCPP_STD_VER >= 23
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___TUPLE_TUPLE_TRANSFORM_H
diff --git a/lib/libcxx/include/__type_traits/aligned_storage.h b/lib/libcxx/include/__type_traits/aligned_storage.h
index 5c2208ae0c..33c0368d0c 100644
--- a/lib/libcxx/include/__type_traits/aligned_storage.h
+++ b/lib/libcxx/include/__type_traits/aligned_storage.h
@@ -11,8 +11,6 @@
 
 #include <__config>
 #include <__cstddef/size_t.h>
-#include <__type_traits/integral_constant.h>
-#include <__type_traits/type_list.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
@@ -21,10 +19,10 @@
 _LIBCPP_BEGIN_NAMESPACE_STD
 
 template <class _Tp>
-struct __align_type {
-  static const size_t value = _LIBCPP_PREFERRED_ALIGNOF(_Tp);
-  typedef _Tp type;
-};
+struct _ALIGNAS(_LIBCPP_PREFERRED_ALIGNOF(_Tp)) _AlignedAsT {};
+
+template <class... _Args>
+struct __max_align_impl : _AlignedAsT<_Args>... {};
 
 struct __struct_double {
   long double __lx;
@@ -33,41 +31,16 @@ struct __struct_double4 {
   double __lx[4];
 };
 
-using __all_types _LIBCPP_NODEBUG =
-    __type_list<__align_type<unsigned char>,
-                __align_type<unsigned short>,
-                __align_type<unsigned int>,
-                __align_type<unsigned long>,
-                __align_type<unsigned long long>,
-                __align_type<double>,
-                __align_type<long double>,
-                __align_type<__struct_double>,
-                __align_type<__struct_double4>,
-                __align_type<int*> >;
+inline const size_t __aligned_storage_max_align =
+    _LIBCPP_ALIGNOF(__max_align_impl<unsigned long long, double, long double, __struct_double, __struct_double4, int*>);
 
-template <class _TL, size_t _Len>
-struct __find_max_align;
+template <size_t _Len>
+inline const size_t __aligned_storage_alignment =
+    _Len > __aligned_storage_max_align
+        ? __aligned_storage_max_align
+        : size_t(1) << ((sizeof(size_t) * __CHAR_BIT__) - __builtin_clzg(_Len) - 1);
 
-template <class _Head, size_t _Len>
-struct __find_max_align<__type_list<_Head>, _Len> : public integral_constant<size_t, _Head::value> {};
-
-template <size_t _Len, size_t _A1, size_t _A2>
-struct __select_align {
-private:
-  static const size_t __min = _A2 < _A1 ? _A2 : _A1;
-  static const size_t __max = _A1 < _A2 ? _A2 : _A1;
-
-public:
-  static const size_t value = _Len < __max ? __min : __max;
-};
-
-template <class _Head, class... _Tail, size_t _Len>
-struct __find_max_align<__type_list<_Head, _Tail...>, _Len>
-    : public integral_constant<
-          size_t,
-          __select_align<_Len, _Head::value, __find_max_align<__type_list<_Tail...>, _Len>::value>::value> {};
-
-template <size_t _Len, size_t _Align = __find_max_align<__all_types, _Len>::value>
+template <size_t _Len, size_t _Align = __aligned_storage_alignment<_Len> >
 struct _LIBCPP_DEPRECATED_IN_CXX23 _LIBCPP_NO_SPECIALIZATIONS aligned_storage {
   union _ALIGNAS(_Align) type {
     unsigned char __data[(_Len + _Align - 1) / _Align * _Align];
@@ -77,7 +50,7 @@ struct _LIBCPP_DEPRECATED_IN_CXX23 _LIBCPP_NO_SPECIALIZATIONS aligned_storage {
 #if _LIBCPP_STD_VER >= 14
 
 _LIBCPP_SUPPRESS_DEPRECATED_PUSH
-template <size_t _Len, size_t _Align = __find_max_align<__all_types, _Len>::value>
+template <size_t _Len, size_t _Align = __aligned_storage_alignment<_Len> >
 using aligned_storage_t _LIBCPP_DEPRECATED_IN_CXX23 = typename aligned_storage<_Len, _Align>::type;
 _LIBCPP_SUPPRESS_DEPRECATED_POP
 
diff --git a/lib/libcxx/include/__type_traits/desugars_to.h b/lib/libcxx/include/__type_traits/desugars_to.h
index b67baae31b..029b3c6336 100644
--- a/lib/libcxx/include/__type_traits/desugars_to.h
+++ b/lib/libcxx/include/__type_traits/desugars_to.h
@@ -10,6 +10,7 @@
 #define _LIBCPP___TYPE_TRAITS_DESUGARS_TO_H
 
 #include <__config>
+#include <__type_traits/integral_constant.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
@@ -64,6 +65,9 @@ template <class _CanonicalTag, class _Operation, class... _Args>
 inline const bool __desugars_to_v<_CanonicalTag, _Operation&&, _Args...> =
     __desugars_to_v<_CanonicalTag, _Operation, _Args...>;
 
+template <class _CanonicalTag, class _Operation, class... _Args>
+struct __desugars_to : integral_constant<bool, __desugars_to_v<_CanonicalTag, _Operation, _Args...> > {};
+
 _LIBCPP_END_NAMESPACE_STD
 
 #endif // _LIBCPP___TYPE_TRAITS_DESUGARS_TO_H
diff --git a/lib/libcxx/include/__type_traits/invoke.h b/lib/libcxx/include/__type_traits/invoke.h
index 3f5626c014..ba82025765 100644
--- a/lib/libcxx/include/__type_traits/invoke.h
+++ b/lib/libcxx/include/__type_traits/invoke.h
@@ -62,6 +62,9 @@
 //
 // template <class Func, class... Args>
 // using __invoke_result_t = invoke_result_t<Func, Args...>;
+//
+// template <class Ret, class Func, class... Args>
+// struct __is_invocable_r : is_invocable_r<Ret, Func, Args...> {};
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
@@ -112,8 +115,10 @@ inline const bool __is_invocable_r_v = __is_invocable_r_impl<_Ret, __is_invocabl
 template <bool __is_invocable, class... _Args>
 inline const bool __is_nothrow_invocable_impl = false;
 
+#  ifndef _LIBCPP_CXX03_LANG
 template <class... _Args>
 inline const bool __is_nothrow_invocable_impl<true, _Args...> = noexcept(__builtin_invoke(std::declval<_Args>()...));
+#  endif
 
 template <class... _Args>
 inline const bool __is_nothrow_invocable_v = __is_nothrow_invocable_impl<__is_invocable_v<_Args...>, _Args...>;
@@ -327,6 +332,9 @@ using __invoke_result_t _LIBCPP_NODEBUG = typename __invoke_result<_Func, _Args.
 
 #endif // __has_builtin(__builtin_invoke_r)
 
+template <class _Ret, class _Func, class... _Args>
+struct __is_invocable_r : integral_constant<bool, __is_invocable_r_v<_Ret, _Func, _Args...> > {};
+
 template <class _Ret, bool = is_void<_Ret>::value>
 struct __invoke_void_return_wrapper {
   template <class... _Args>
diff --git a/lib/libcxx/include/__type_traits/is_allocator.h b/lib/libcxx/include/__type_traits/is_allocator.h
index 191eeb9a1f..f37c029a2a 100644
--- a/lib/libcxx/include/__type_traits/is_allocator.h
+++ b/lib/libcxx/include/__type_traits/is_allocator.h
@@ -11,7 +11,6 @@
 
 #include <__config>
 #include <__cstddef/size_t.h>
-#include <__type_traits/integral_constant.h>
 #include <__type_traits/void_t.h>
 #include <__utility/declval.h>
 
@@ -21,13 +20,13 @@
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-template <typename _Alloc, typename = void, typename = void>
-struct __is_allocator : false_type {};
+template <class _Alloc, class = void, class = void>
+inline const bool __is_allocator_v = false;
 
-template <typename _Alloc>
-struct __is_allocator<_Alloc,
-                      __void_t<typename _Alloc::value_type>,
-                      __void_t<decltype(std::declval<_Alloc&>().allocate(size_t(0)))> > : true_type {};
+template <class _Alloc>
+inline const bool __is_allocator_v<_Alloc,
+                                   __void_t<typename _Alloc::value_type>,
+                                   __void_t<decltype(std::declval<_Alloc&>().allocate(size_t()))> > = true;
 
 _LIBCPP_END_NAMESPACE_STD
 
diff --git a/lib/libcxx/include/__type_traits/is_array.h b/lib/libcxx/include/__type_traits/is_array.h
index e734d1a304..62dd378cec 100644
--- a/lib/libcxx/include/__type_traits/is_array.h
+++ b/lib/libcxx/include/__type_traits/is_array.h
@@ -26,6 +26,32 @@ template <class _Tp>
 _LIBCPP_NO_SPECIALIZATIONS inline constexpr bool is_array_v = __is_array(_Tp);
 #endif
 
+template <class _Tp>
+inline const bool __is_bounded_array_v = __is_bounded_array(_Tp);
+
+#if _LIBCPP_STD_VER >= 20
+
+template <class _Tp>
+struct _LIBCPP_NO_SPECIALIZATIONS is_bounded_array : bool_constant<__is_bounded_array(_Tp)> {};
+
+template <class _Tp>
+_LIBCPP_NO_SPECIALIZATIONS inline constexpr bool is_bounded_array_v = __is_bounded_array(_Tp);
+
+#endif
+
+template <class _Tp>
+inline const bool __is_unbounded_array_v = __is_unbounded_array(_Tp);
+
+#if _LIBCPP_STD_VER >= 20
+
+template <class _Tp>
+struct _LIBCPP_NO_SPECIALIZATIONS is_unbounded_array : bool_constant<__is_unbounded_array(_Tp)> {};
+
+template <class _Tp>
+_LIBCPP_NO_SPECIALIZATIONS inline constexpr bool is_unbounded_array_v = __is_unbounded_array(_Tp);
+
+#endif
+
 _LIBCPP_END_NAMESPACE_STD
 
 #endif // _LIBCPP___TYPE_TRAITS_IS_ARRAY_H
diff --git a/lib/libcxx/include/__type_traits/is_equality_comparable.h b/lib/libcxx/include/__type_traits/is_equality_comparable.h
index 3ee1839996..03dbbd07cd 100644
--- a/lib/libcxx/include/__type_traits/is_equality_comparable.h
+++ b/lib/libcxx/include/__type_traits/is_equality_comparable.h
@@ -27,11 +27,11 @@
 _LIBCPP_BEGIN_NAMESPACE_STD
 
 template <class _Tp, class _Up, class = void>
-struct __is_equality_comparable : false_type {};
+inline const bool __is_equality_comparable_v = false;
 
 template <class _Tp, class _Up>
-struct __is_equality_comparable<_Tp, _Up, __void_t<decltype(std::declval<_Tp>() == std::declval<_Up>())> > : true_type {
-};
+inline const bool
+    __is_equality_comparable_v<_Tp, _Up, __void_t<decltype(std::declval<_Tp>() == std::declval<_Up>())> > = true;
 
 // A type is_trivially_equality_comparable if the expression `a == b` is equivalent to `std::memcmp(&a, &b, sizeof(T))`
 // (with `a` and `b` being of type `T`). For the case where we compare two object of the same type, we can use
@@ -48,40 +48,35 @@ struct __is_equality_comparable<_Tp, _Up, __void_t<decltype(std::declval<_Tp>()
 //   representation may not be equivalent.
 
 template <class _Tp, class _Up, class = void>
-struct __libcpp_is_trivially_equality_comparable_impl : false_type {};
+inline const bool __is_trivially_equality_comparable_impl = false;
 
 template <class _Tp>
-struct __libcpp_is_trivially_equality_comparable_impl<_Tp, _Tp>
+inline const bool __is_trivially_equality_comparable_impl<_Tp, _Tp>
 #if __has_builtin(__is_trivially_equality_comparable)
-    : integral_constant<bool, __is_trivially_equality_comparable(_Tp) && __is_equality_comparable<_Tp, _Tp>::value> {
-};
+    = __is_trivially_equality_comparable(_Tp) && __is_equality_comparable_v<_Tp, _Tp>;
 #else
-    : is_integral<_Tp> {
-};
+    = is_integral<_Tp>::value;
 #endif // __has_builtin(__is_trivially_equality_comparable)
 
 template <class _Tp, class _Up>
-struct __libcpp_is_trivially_equality_comparable_impl<
+inline const bool __is_trivially_equality_comparable_impl<
     _Tp,
     _Up,
-    __enable_if_t<is_integral<_Tp>::value && is_integral<_Up>::value && !is_same<_Tp, _Up>::value &&
-                  is_signed<_Tp>::value == is_signed<_Up>::value && sizeof(_Tp) == sizeof(_Up)> > : true_type {};
+    __enable_if_t<is_integral<_Tp>::value && is_integral<_Up>::value && !is_same<_Tp, _Up>::value> > =
+    is_signed<_Tp>::value == is_signed<_Up>::value && sizeof(_Tp) == sizeof(_Up);
 
 template <class _Tp>
-struct __libcpp_is_trivially_equality_comparable_impl<_Tp*, _Tp*> : true_type {};
+inline const bool __is_trivially_equality_comparable_impl<_Tp*, _Tp*> = true;
 
 // TODO: Use is_pointer_inverconvertible_base_of
 template <class _Tp, class _Up>
-struct __libcpp_is_trivially_equality_comparable_impl<_Tp*, _Up*>
-    : integral_constant<
-          bool,
-          __is_equality_comparable<_Tp*, _Up*>::value &&
-              (is_same<__remove_cv_t<_Tp>, __remove_cv_t<_Up> >::value || is_void<_Tp>::value || is_void<_Up>::value)> {
-};
+inline const bool __is_trivially_equality_comparable_impl<_Tp*, _Up*> =
+    __is_equality_comparable_v<_Tp*, _Up*> &&
+    (is_same<__remove_cv_t<_Tp>, __remove_cv_t<_Up> >::value || is_void<_Tp>::value || is_void<_Up>::value);
 
 template <class _Tp, class _Up>
-using __libcpp_is_trivially_equality_comparable _LIBCPP_NODEBUG =
-    __libcpp_is_trivially_equality_comparable_impl<__remove_cv_t<_Tp>, __remove_cv_t<_Up> >;
+inline const bool __is_trivially_equality_comparable_v =
+    __is_trivially_equality_comparable_impl<__remove_cv_t<_Tp>, __remove_cv_t<_Up> >;
 
 _LIBCPP_END_NAMESPACE_STD
 
diff --git a/lib/libcxx/include/__type_traits/is_final.h b/lib/libcxx/include/__type_traits/is_final.h
index e9ef1425c9..ab1cace52c 100644
--- a/lib/libcxx/include/__type_traits/is_final.h
+++ b/lib/libcxx/include/__type_traits/is_final.h
@@ -19,7 +19,7 @@
 _LIBCPP_BEGIN_NAMESPACE_STD
 
 template <class _Tp>
-struct __libcpp_is_final : integral_constant<bool, __is_final(_Tp)> {};
+inline const bool __is_final_v = __is_final(_Tp);
 
 #if _LIBCPP_STD_VER >= 14
 template <class _Tp>
diff --git a/lib/libcxx/include/__type_traits/is_floating_point.h b/lib/libcxx/include/__type_traits/is_floating_point.h
index b87363fe5b..586fce6af6 100644
--- a/lib/libcxx/include/__type_traits/is_floating_point.h
+++ b/lib/libcxx/include/__type_traits/is_floating_point.h
@@ -20,18 +20,19 @@
 _LIBCPP_BEGIN_NAMESPACE_STD
 
 // clang-format off
-template <class _Tp> struct __libcpp_is_floating_point              : false_type {};
-template <>          struct __libcpp_is_floating_point<float>       : true_type {};
-template <>          struct __libcpp_is_floating_point<double>      : true_type {};
-template <>          struct __libcpp_is_floating_point<long double> : true_type {};
+template <class _Tp> inline const bool __is_floating_point_impl              = false;
+template <>          inline const bool __is_floating_point_impl<float>       = true;
+template <>          inline const bool __is_floating_point_impl<double>      = true;
+template <>          inline const bool __is_floating_point_impl<long double> = true;
 // clang-format on
 
 template <class _Tp>
-struct _LIBCPP_NO_SPECIALIZATIONS is_floating_point : __libcpp_is_floating_point<__remove_cv_t<_Tp> > {};
+struct _LIBCPP_NO_SPECIALIZATIONS is_floating_point
+    : integral_constant<bool, __is_floating_point_impl<__remove_cv_t<_Tp> > > {};
 
 #if _LIBCPP_STD_VER >= 17
 template <class _Tp>
-_LIBCPP_NO_SPECIALIZATIONS inline constexpr bool is_floating_point_v = is_floating_point<_Tp>::value;
+_LIBCPP_NO_SPECIALIZATIONS inline constexpr bool is_floating_point_v = __is_floating_point_impl<__remove_cv_t<_Tp>>;
 #endif
 
 _LIBCPP_END_NAMESPACE_STD
diff --git a/lib/libcxx/include/__type_traits/is_generic_transparent_comparator.h b/lib/libcxx/include/__type_traits/is_generic_transparent_comparator.h
new file mode 100644
index 0000000000..7c1f0e984e
--- /dev/null
+++ b/lib/libcxx/include/__type_traits/is_generic_transparent_comparator.h
@@ -0,0 +1,30 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___TYPE_TRAITS_IS_GENERIC_TRANSPARENT_COMPARATOR_H
+#define _LIBCPP___TYPE_TRAITS_IS_GENERIC_TRANSPARENT_COMPARATOR_H
+
+#include <__config>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+// This trait returns true if the given _Comparator is known to accept any two types for comparison. This is separate
+// from `__is_transparent_v`, since that only enables overloads of specific functions, but doesn't give any semantic
+// guarantees. This trait guarantess that the comparator simply calls the appropriate comparison functions for any two
+// types.
+
+template <class _Comparator>
+inline const bool __is_generic_transparent_comparator_v = false;
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___TYPE_TRAITS_IS_GENERIC_TRANSPARENT_COMPARATOR_H
diff --git a/lib/libcxx/include/__type_traits/is_specialization.h b/lib/libcxx/include/__type_traits/is_specialization.h
index 9b75636b1a..f14ab93c3c 100644
--- a/lib/libcxx/include/__type_traits/is_specialization.h
+++ b/lib/libcxx/include/__type_traits/is_specialization.h
@@ -30,15 +30,11 @@
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-#if _LIBCPP_STD_VER >= 17
-
 template <class _Tp, template <class...> class _Template>
-inline constexpr bool __is_specialization_v = false; // true if and only if _Tp is a specialization of _Template
+inline const bool __is_specialization_v = false; // true if and only if _Tp is a specialization of _Template
 
 template <template <class...> class _Template, class... _Args>
-inline constexpr bool __is_specialization_v<_Template<_Args...>, _Template> = true;
-
-#endif // _LIBCPP_STD_VER >= 17
+inline const bool __is_specialization_v<_Template<_Args...>, _Template> = true;
 
 _LIBCPP_END_NAMESPACE_STD
 
diff --git a/lib/libcxx/include/__type_traits/is_within_lifetime.h b/lib/libcxx/include/__type_traits/is_within_lifetime.h
new file mode 100644
index 0000000000..242f2adaf3
--- /dev/null
+++ b/lib/libcxx/include/__type_traits/is_within_lifetime.h
@@ -0,0 +1,29 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___TYPE_TRAITS_IS_WITHIN_LIFETIME_H
+#define _LIBCPP___TYPE_TRAITS_IS_WITHIN_LIFETIME_H
+
+#include <__config>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER >= 26 && __has_builtin(__builtin_is_within_lifetime)
+template <class _Tp>
+_LIBCPP_HIDE_FROM_ABI consteval bool is_within_lifetime(const _Tp* __p) noexcept {
+  return __builtin_is_within_lifetime(__p);
+}
+#endif
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___TYPE_TRAITS_IS_WITHIN_LIFETIME_H
diff --git a/lib/libcxx/include/__type_traits/make_transparent.h b/lib/libcxx/include/__type_traits/make_transparent.h
new file mode 100644
index 0000000000..c2edf126d4
--- /dev/null
+++ b/lib/libcxx/include/__type_traits/make_transparent.h
@@ -0,0 +1,52 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___TYPE_TRAITS_MAKE_TRANSPARENT_H
+#define _LIBCPP___TYPE_TRAITS_MAKE_TRANSPARENT_H
+
+#include <__config>
+#include <__type_traits/enable_if.h>
+#include <__type_traits/is_empty.h>
+#include <__type_traits/is_same.h>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+// __make_transparent tries to create a transparent comparator from its non-transparent counterpart, e.g. obtain
+// `less<>` from `less<T>`. This is useful in cases where conversions can be avoided (e.g. a string literal to a
+// std::string).
+
+template <class _Tp, class _Comparator>
+struct __make_transparent {
+  using type _LIBCPP_NODEBUG = _Comparator;
+};
+
+template <class _Tp, class _Comparator>
+using __make_transparent_t _LIBCPP_NODEBUG = typename __make_transparent<_Tp, _Comparator>::type;
+
+template <class _Tp,
+          class _Comparator,
+          __enable_if_t<is_same<_Comparator, __make_transparent_t<_Tp, _Comparator> >::value, int> = 0>
+_LIBCPP_HIDE_FROM_ABI _Comparator& __as_transparent(_Comparator& __comp) {
+  return __comp;
+}
+
+template <class _Tp,
+          class _Comparator,
+          __enable_if_t<!is_same<_Comparator, __make_transparent_t<_Tp, _Comparator> >::value, int> = 0>
+_LIBCPP_HIDE_FROM_ABI __make_transparent_t<_Tp, _Comparator> __as_transparent(_Comparator&) {
+  static_assert(is_empty<_Comparator>::value);
+  return __make_transparent_t<_Tp, _Comparator>();
+}
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___TYPE_TRAITS_MAKE_TRANSPARENT_H
diff --git a/lib/libcxx/include/__type_traits/reference_constructs_from_temporary.h b/lib/libcxx/include/__type_traits/reference_constructs_from_temporary.h
index 2ff549b4e1..a832562041 100644
--- a/lib/libcxx/include/__type_traits/reference_constructs_from_temporary.h
+++ b/lib/libcxx/include/__type_traits/reference_constructs_from_temporary.h
@@ -18,7 +18,7 @@
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-#if _LIBCPP_STD_VER >= 23 && __has_builtin(__reference_constructs_from_temporary)
+#if _LIBCPP_STD_VER >= 23
 
 template <class _Tp, class _Up>
 struct _LIBCPP_NO_SPECIALIZATIONS reference_constructs_from_temporary
@@ -30,14 +30,8 @@ _LIBCPP_NO_SPECIALIZATIONS inline constexpr bool reference_constructs_from_tempo
 
 #endif
 
-#if __has_builtin(__reference_constructs_from_temporary)
 template <class _Tp, class _Up>
 inline const bool __reference_constructs_from_temporary_v = __reference_constructs_from_temporary(_Tp, _Up);
-#else
-// TODO(LLVM 22): Remove this as all supported compilers should have __reference_constructs_from_temporary implemented.
-template <class _Tp, class _Up>
-inline const bool __reference_constructs_from_temporary_v = __reference_binds_to_temporary(_Tp, _Up);
-#endif
 
 _LIBCPP_END_NAMESPACE_STD
 
diff --git a/lib/libcxx/include/__type_traits/reference_converts_from_temporary.h b/lib/libcxx/include/__type_traits/reference_converts_from_temporary.h
index c68f1765af..9c51225e53 100644
--- a/lib/libcxx/include/__type_traits/reference_converts_from_temporary.h
+++ b/lib/libcxx/include/__type_traits/reference_converts_from_temporary.h
@@ -18,7 +18,7 @@
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-#if _LIBCPP_STD_VER >= 23 && __has_builtin(__reference_converts_from_temporary)
+#if _LIBCPP_STD_VER >= 23
 
 template <class _Tp, class _Up>
 struct _LIBCPP_NO_SPECIALIZATIONS reference_converts_from_temporary
diff --git a/lib/libcxx/include/__utility/cmp.h b/lib/libcxx/include/__utility/cmp.h
index 14dc0c154c..7cfe640ceb 100644
--- a/lib/libcxx/include/__utility/cmp.h
+++ b/lib/libcxx/include/__utility/cmp.h
@@ -26,10 +26,18 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 #if _LIBCPP_STD_VER >= 20
 
+template <typename _Tp, typename _Ip>
+concept __comparison_can_promote_to =
+    sizeof(_Tp) < sizeof(_Ip) || (sizeof(_Tp) == sizeof(_Ip) && __signed_integer<_Tp>);
+
 template <__signed_or_unsigned_integer _Tp, __signed_or_unsigned_integer _Up>
-_LIBCPP_HIDE_FROM_ABI constexpr bool cmp_equal(_Tp __t, _Up __u) noexcept {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool cmp_equal(_Tp __t, _Up __u) noexcept {
   if constexpr (is_signed_v<_Tp> == is_signed_v<_Up>)
     return __t == __u;
+  else if constexpr (__comparison_can_promote_to<_Tp, int> && __comparison_can_promote_to<_Up, int>)
+    return static_cast<int>(__t) == static_cast<int>(__u);
+  else if constexpr (__comparison_can_promote_to<_Tp, long long> && __comparison_can_promote_to<_Up, long long>)
+    return static_cast<long long>(__t) == static_cast<long long>(__u);
   else if constexpr (is_signed_v<_Tp>)
     return __t < 0 ? false : make_unsigned_t<_Tp>(__t) == __u;
   else
@@ -37,14 +45,18 @@ _LIBCPP_HIDE_FROM_ABI constexpr bool cmp_equal(_Tp __t, _Up __u) noexcept {
 }
 
 template <__signed_or_unsigned_integer _Tp, __signed_or_unsigned_integer _Up>
-_LIBCPP_HIDE_FROM_ABI constexpr bool cmp_not_equal(_Tp __t, _Up __u) noexcept {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool cmp_not_equal(_Tp __t, _Up __u) noexcept {
   return !std::cmp_equal(__t, __u);
 }
 
 template <__signed_or_unsigned_integer _Tp, __signed_or_unsigned_integer _Up>
-_LIBCPP_HIDE_FROM_ABI constexpr bool cmp_less(_Tp __t, _Up __u) noexcept {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool cmp_less(_Tp __t, _Up __u) noexcept {
   if constexpr (is_signed_v<_Tp> == is_signed_v<_Up>)
     return __t < __u;
+  else if constexpr (__comparison_can_promote_to<_Tp, int> && __comparison_can_promote_to<_Up, int>)
+    return static_cast<int>(__t) < static_cast<int>(__u);
+  else if constexpr (__comparison_can_promote_to<_Tp, long long> && __comparison_can_promote_to<_Up, long long>)
+    return static_cast<long long>(__t) < static_cast<long long>(__u);
   else if constexpr (is_signed_v<_Tp>)
     return __t < 0 ? true : make_unsigned_t<_Tp>(__t) < __u;
   else
@@ -52,22 +64,22 @@ _LIBCPP_HIDE_FROM_ABI constexpr bool cmp_less(_Tp __t, _Up __u) noexcept {
 }
 
 template <__signed_or_unsigned_integer _Tp, __signed_or_unsigned_integer _Up>
-_LIBCPP_HIDE_FROM_ABI constexpr bool cmp_greater(_Tp __t, _Up __u) noexcept {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool cmp_greater(_Tp __t, _Up __u) noexcept {
   return std::cmp_less(__u, __t);
 }
 
 template <__signed_or_unsigned_integer _Tp, __signed_or_unsigned_integer _Up>
-_LIBCPP_HIDE_FROM_ABI constexpr bool cmp_less_equal(_Tp __t, _Up __u) noexcept {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool cmp_less_equal(_Tp __t, _Up __u) noexcept {
   return !std::cmp_greater(__t, __u);
 }
 
 template <__signed_or_unsigned_integer _Tp, __signed_or_unsigned_integer _Up>
-_LIBCPP_HIDE_FROM_ABI constexpr bool cmp_greater_equal(_Tp __t, _Up __u) noexcept {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool cmp_greater_equal(_Tp __t, _Up __u) noexcept {
   return !std::cmp_less(__t, __u);
 }
 
 template <__signed_or_unsigned_integer _Tp, __signed_or_unsigned_integer _Up>
-_LIBCPP_HIDE_FROM_ABI constexpr bool in_range(_Up __u) noexcept {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool in_range(_Up __u) noexcept {
   return std::cmp_less_equal(__u, numeric_limits<_Tp>::max()) &&
          std::cmp_greater_equal(__u, numeric_limits<_Tp>::min());
 }
diff --git a/lib/libcxx/include/__utility/default_three_way_comparator.h b/lib/libcxx/include/__utility/default_three_way_comparator.h
new file mode 100644
index 0000000000..92cdce6aae
--- /dev/null
+++ b/lib/libcxx/include/__utility/default_three_way_comparator.h
@@ -0,0 +1,70 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___UTILITY_DEFAULT_THREE_WAY_COMPARATOR_H
+#define _LIBCPP___UTILITY_DEFAULT_THREE_WAY_COMPARATOR_H
+
+#include <__config>
+#include <__type_traits/enable_if.h>
+#include <__type_traits/is_arithmetic.h>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+// This struct can be specialized to provide a three way comparator between _LHS and _RHS.
+// The return value should be
+// - less than zero if (lhs_val < rhs_val)
+// - greater than zero if (rhs_val < lhs_val)
+// - zero otherwise
+template <class _LHS, class _RHS, class = void>
+struct __default_three_way_comparator;
+
+template <class _LHS, class _RHS>
+struct __default_three_way_comparator<_LHS,
+                                      _RHS,
+                                      __enable_if_t<is_arithmetic<_LHS>::value && is_arithmetic<_RHS>::value> > {
+  _LIBCPP_HIDE_FROM_ABI static int operator()(_LHS __lhs, _RHS __rhs) {
+    if (__lhs < __rhs)
+      return -1;
+    if (__lhs > __rhs)
+      return 1;
+    return 0;
+  }
+};
+
+#if _LIBCPP_STD_VER >= 20 && __has_builtin(__builtin_lt_synthesizes_from_spaceship)
+template <class _LHS, class _RHS>
+struct __default_three_way_comparator<
+    _LHS,
+    _RHS,
+    __enable_if_t<!(is_arithmetic<_LHS>::value && is_arithmetic<_RHS>::value) &&
+                  __builtin_lt_synthesizes_from_spaceship(const _LHS&, const _RHS&)>> {
+  _LIBCPP_HIDE_FROM_ABI static int operator()(const _LHS& __lhs, const _RHS& __rhs) {
+    auto __res = __lhs <=> __rhs;
+    if (__res < 0)
+      return -1;
+    if (__res > 0)
+      return 1;
+    return 0;
+  }
+};
+#endif
+
+template <class _LHS, class _RHS, bool = true>
+struct __has_default_three_way_comparator : false_type {};
+
+template <class _LHS, class _RHS>
+struct __has_default_three_way_comparator<_LHS, _RHS, sizeof(__default_three_way_comparator<_LHS, _RHS>) >= 0>
+    : true_type {};
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___UTILITY_DEFAULT_THREE_WAY_COMPARATOR_H
diff --git a/lib/libcxx/include/__utility/in_place.h b/lib/libcxx/include/__utility/in_place.h
index ade4b6685a..c5bfa94705 100644
--- a/lib/libcxx/include/__utility/in_place.h
+++ b/lib/libcxx/include/__utility/in_place.h
@@ -22,7 +22,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 #if _LIBCPP_STD_VER >= 17
 
-struct _LIBCPP_EXPORTED_FROM_ABI in_place_t {
+struct in_place_t {
   explicit in_place_t() = default;
 };
 inline constexpr in_place_t in_place{};
diff --git a/lib/libcxx/include/__utility/integer_sequence.h b/lib/libcxx/include/__utility/integer_sequence.h
index d1c6e53c72..a84f572c33 100644
--- a/lib/libcxx/include/__utility/integer_sequence.h
+++ b/lib/libcxx/include/__utility/integer_sequence.h
@@ -11,63 +11,52 @@
 
 #include <__config>
 #include <__cstddef/size_t.h>
+#include <__tuple/tuple_element.h>
+#include <__tuple/tuple_size.h>
 #include <__type_traits/is_integral.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
 #endif
 
+#ifndef _LIBCPP_CXX03_LANG
+
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-template <size_t...>
-struct __tuple_indices;
+#  if __has_builtin(__make_integer_seq)
+template <template <class _Tp, _Tp...> class _BaseType, class _Tp, _Tp _SequenceSize>
+using __make_integer_sequence_impl _LIBCPP_NODEBUG = __make_integer_seq<_BaseType, _Tp, _SequenceSize>;
+#  else
+template <template <class _Tp, _Tp...> class _BaseType, class _Tp, _Tp _SequenceSize>
+using __make_integer_sequence_impl _LIBCPP_NODEBUG = _BaseType<_Tp, __integer_pack(_SequenceSize)...>;
+#  endif
 
-template <class _IdxType, _IdxType... _Values>
+template <class _Tp, _Tp... _Indices>
 struct __integer_sequence {
-  template <template <class _OIdxType, _OIdxType...> class _ToIndexSeq, class _ToIndexType>
-  using __convert _LIBCPP_NODEBUG = _ToIndexSeq<_ToIndexType, _Values...>;
-
-  template <size_t _Sp>
-  using __to_tuple_indices _LIBCPP_NODEBUG = __tuple_indices<(_Values + _Sp)...>;
-};
-
-#if __has_builtin(__make_integer_seq)
-template <size_t _Ep, size_t _Sp>
-using __make_indices_imp _LIBCPP_NODEBUG =
-    typename __make_integer_seq<__integer_sequence, size_t, _Ep - _Sp>::template __to_tuple_indices<_Sp>;
-#elif __has_builtin(__integer_pack)
-template <size_t _Ep, size_t _Sp>
-using __make_indices_imp _LIBCPP_NODEBUG =
-    typename __integer_sequence<size_t, __integer_pack(_Ep - _Sp)...>::template __to_tuple_indices<_Sp>;
-#else
-#  error "No known way to get an integer pack from the compiler"
-#endif
-
-#if _LIBCPP_STD_VER >= 14
-
-template <class _Tp, _Tp... _Ip>
-struct integer_sequence {
-  typedef _Tp value_type;
+  using value_type = _Tp;
   static_assert(is_integral<_Tp>::value, "std::integer_sequence can only be instantiated with an integral type");
-  static _LIBCPP_HIDE_FROM_ABI constexpr size_t size() noexcept { return sizeof...(_Ip); }
+  [[__nodiscard__]] static _LIBCPP_HIDE_FROM_ABI constexpr size_t size() noexcept { return sizeof...(_Indices); }
 };
 
+template <size_t... _Indices>
+using __index_sequence _LIBCPP_NODEBUG = __integer_sequence<size_t, _Indices...>;
+
+template <size_t _SequenceSize>
+using __make_index_sequence _LIBCPP_NODEBUG = __make_integer_sequence_impl<__integer_sequence, size_t, _SequenceSize>;
+
+template <class... _Args>
+using __index_sequence_for _LIBCPP_NODEBUG = __make_index_sequence<sizeof...(_Args)>;
+
+#  if _LIBCPP_STD_VER >= 14
+
+template <class _Tp, _Tp... _Indices>
+struct integer_sequence : __integer_sequence<_Tp, _Indices...> {};
+
 template <size_t... _Ip>
 using index_sequence = integer_sequence<size_t, _Ip...>;
 
-#  if __has_builtin(__make_integer_seq)
-
 template <class _Tp, _Tp _Ep>
-using make_integer_sequence _LIBCPP_NODEBUG = __make_integer_seq<integer_sequence, _Tp, _Ep>;
-
-#  elif __has_builtin(__integer_pack)
-
-template <class _Tp, _Tp _SequenceSize>
-using make_integer_sequence _LIBCPP_NODEBUG = integer_sequence<_Tp, __integer_pack(_SequenceSize)...>;
-
-#  else
-#    error "No known way to get an integer pack from the compiler"
-#  endif
+using make_integer_sequence _LIBCPP_NODEBUG = __make_integer_sequence_impl<integer_sequence, _Tp, _Ep>;
 
 template <size_t _Np>
 using make_index_sequence = make_integer_sequence<size_t, _Np>;
@@ -75,16 +64,42 @@ using make_index_sequence = make_integer_sequence<size_t, _Np>;
 template <class... _Tp>
 using index_sequence_for = make_index_sequence<sizeof...(_Tp)>;
 
-#  if _LIBCPP_STD_VER >= 20
+#    if _LIBCPP_STD_VER >= 20
 // Executes __func for every element in an index_sequence.
 template <size_t... _Index, class _Function>
 _LIBCPP_HIDE_FROM_ABI constexpr void __for_each_index_sequence(index_sequence<_Index...>, _Function __func) {
   (__func.template operator()<_Index>(), ...);
 }
-#  endif // _LIBCPP_STD_VER >= 20
+#    endif // _LIBCPP_STD_VER >= 20
 
-#endif // _LIBCPP_STD_VER >= 14
+#    if _LIBCPP_STD_VER >= 26
+// [intseq.binding], structured binding support
+template <class _Tp, _Tp... _Indices>
+struct tuple_size<integer_sequence<_Tp, _Indices...>> : integral_constant<size_t, sizeof...(_Indices)> {};
+
+template <size_t _Ip, class _Tp, _Tp... _Indices>
+struct tuple_element<_Ip, integer_sequence<_Tp, _Indices...>> {
+  static_assert(_Ip < sizeof...(_Indices), "Index out of bounds in std::tuple_element<> (std::integer_sequence)");
+  using type _LIBCPP_NODEBUG = _Tp;
+};
+
+template <size_t _Ip, class _Tp, _Tp... _Indices>
+struct tuple_element<_Ip, const integer_sequence<_Tp, _Indices...>> {
+  static_assert(_Ip < sizeof...(_Indices), "Index out of bounds in std::tuple_element<> (const std::integer_sequence)");
+  using type _LIBCPP_NODEBUG = _Tp;
+};
+
+template <size_t _Ip, class _Tp, _Tp... _Indices>
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp get(integer_sequence<_Tp, _Indices...>) noexcept {
+  static_assert(_Ip < sizeof...(_Indices), "Index out of bounds in std::get<> (std::integer_sequence)");
+  return _Indices...[_Ip];
+}
+#    endif // _LIBCPP_STD_VER >= 26
+
+#  endif // _LIBCPP_STD_VER >= 14
 
 _LIBCPP_END_NAMESPACE_STD
 
+#endif // _LIBCPP_CXX03_LANG
+
 #endif // _LIBCPP___UTILITY_INTEGER_SEQUENCE_H
diff --git a/lib/libcxx/include/__utility/lazy_synth_three_way_comparator.h b/lib/libcxx/include/__utility/lazy_synth_three_way_comparator.h
new file mode 100644
index 0000000000..906166bd2a
--- /dev/null
+++ b/lib/libcxx/include/__utility/lazy_synth_three_way_comparator.h
@@ -0,0 +1,120 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___UTILITY_LAZY_SYNTH_THREE_WAY_COMPARATOR_H
+#define _LIBCPP___UTILITY_LAZY_SYNTH_THREE_WAY_COMPARATOR_H
+
+#include <__assert>
+#include <__config>
+#include <__type_traits/conjunction.h>
+#include <__type_traits/desugars_to.h>
+#include <__type_traits/enable_if.h>
+#include <__utility/default_three_way_comparator.h>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+// This file implements a __lazy_synth_three_way_comparator, which tries to build an efficient three way comparison from
+// a binary comparator. That is done in multiple steps:
+// 1) Check whether the comparator desugars to a less-than operator
+//    If that is the case, check whether there exists a specialization of `__default_three_way_comparator`, which
+//    can be specialized to implement a three way comparator for the specific types.
+// 2) Fall back to doing a lazy less than/greater than comparison
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template <class _Comparator, class _LHS, class _RHS>
+struct __lazy_compare_result {
+  const _Comparator& __comp_;
+  const _LHS& __lhs_;
+  const _RHS& __rhs_;
+
+  _LIBCPP_HIDE_FROM_ABI
+  __lazy_compare_result(_LIBCPP_CTOR_LIFETIMEBOUND const _Comparator& __comp,
+                        _LIBCPP_CTOR_LIFETIMEBOUND const _LHS& __lhs,
+                        _LIBCPP_CTOR_LIFETIMEBOUND const _RHS& __rhs)
+      : __comp_(__comp), __lhs_(__lhs), __rhs_(__rhs) {}
+
+  _LIBCPP_HIDE_FROM_ABI bool __less() const {
+    bool __result = __comp_(__lhs_, __rhs_);
+    _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(__result ? !static_cast<bool>(__comp_(__rhs_, __lhs_)) : true,
+                                        "Comparator does not induce a strict weak ordering");
+    return __result;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI bool __greater() const {
+    bool __result = __comp_(__rhs_, __lhs_);
+    _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(__result ? !static_cast<bool>(__comp_(__lhs_, __rhs_)) : true,
+                                        "Comparator does not induce a strict weak ordering");
+    return __result;
+  }
+};
+
+// This class provides three way comparison between _LHS and _RHS as efficiently as possible. This can be specialized if
+// a comparator only compares part of the object, potentially allowing an efficient three way comparison between the
+// subobjects. The specialization should use the __lazy_synth_three_way_comparator for the subobjects to achieve this.
+template <class _Comparator, class _LHS, class _RHS, class = void>
+struct __lazy_synth_three_way_comparator {
+  const _Comparator& __comp_;
+
+  _LIBCPP_HIDE_FROM_ABI __lazy_synth_three_way_comparator(_LIBCPP_CTOR_LIFETIMEBOUND const _Comparator& __comp)
+      : __comp_(__comp) {}
+
+  _LIBCPP_HIDE_FROM_ABI __lazy_compare_result<_Comparator, _LHS, _RHS>
+  operator()(_LIBCPP_LIFETIMEBOUND const _LHS& __lhs, _LIBCPP_LIFETIMEBOUND const _RHS& __rhs) const {
+    return __lazy_compare_result<_Comparator, _LHS, _RHS>(__comp_, __lhs, __rhs);
+  }
+};
+
+struct __eager_compare_result {
+  int __res_;
+
+  _LIBCPP_HIDE_FROM_ABI explicit __eager_compare_result(int __res) : __res_(__res) {}
+
+  _LIBCPP_HIDE_FROM_ABI bool __less() const { return __res_ < 0; }
+  _LIBCPP_HIDE_FROM_ABI bool __greater() const { return __res_ > 0; }
+};
+
+template <class _Comparator, class _LHS, class _RHS>
+struct __lazy_synth_three_way_comparator<_Comparator,
+                                         _LHS,
+                                         _RHS,
+                                         __enable_if_t<_And<__desugars_to<__less_tag, _Comparator, _LHS, _RHS>,
+                                                            __has_default_three_way_comparator<_LHS, _RHS> >::value> > {
+  // This lifetimebound annotation is technically incorrect, but other specializations actually capture the lifetime of
+  // the comparator.
+  _LIBCPP_HIDE_FROM_ABI __lazy_synth_three_way_comparator(_LIBCPP_CTOR_LIFETIMEBOUND const _Comparator&) {}
+
+  // Same comment as above.
+  _LIBCPP_HIDE_FROM_ABI static __eager_compare_result
+  operator()(_LIBCPP_LIFETIMEBOUND const _LHS& __lhs, _LIBCPP_LIFETIMEBOUND const _RHS& __rhs) {
+    return __eager_compare_result(__default_three_way_comparator<_LHS, _RHS>()(__lhs, __rhs));
+  }
+};
+
+template <class _Comparator, class _LHS, class _RHS>
+struct __lazy_synth_three_way_comparator<_Comparator,
+                                         _LHS,
+                                         _RHS,
+                                         __enable_if_t<_And<__desugars_to<__greater_tag, _Comparator, _LHS, _RHS>,
+                                                            __has_default_three_way_comparator<_LHS, _RHS> >::value> > {
+  // This lifetimebound annotation is technically incorrect, but other specializations actually capture the lifetime of
+  // the comparator.
+  _LIBCPP_HIDE_FROM_ABI __lazy_synth_three_way_comparator(_LIBCPP_CTOR_LIFETIMEBOUND const _Comparator&) {}
+
+  // Same comment as above.
+  _LIBCPP_HIDE_FROM_ABI static __eager_compare_result
+  operator()(_LIBCPP_LIFETIMEBOUND const _LHS& __lhs, _LIBCPP_LIFETIMEBOUND const _RHS& __rhs) {
+    return __eager_compare_result(-__default_three_way_comparator<_LHS, _RHS>()(__lhs, __rhs));
+  }
+};
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___UTILITY_LAZY_SYNTH_THREE_WAY_COMPARATOR_H
diff --git a/lib/libcxx/include/__utility/pair.h b/lib/libcxx/include/__utility/pair.h
index dbacbce044..a8232bc9da 100644
--- a/lib/libcxx/include/__utility/pair.h
+++ b/lib/libcxx/include/__utility/pair.h
@@ -18,7 +18,6 @@
 #include <__fwd/array.h>
 #include <__fwd/pair.h>
 #include <__fwd/tuple.h>
-#include <__tuple/tuple_indices.h>
 #include <__tuple/tuple_like_no_subrange.h>
 #include <__tuple/tuple_size.h>
 #include <__type_traits/common_reference.h>
@@ -32,14 +31,13 @@
 #include <__type_traits/is_implicitly_default_constructible.h>
 #include <__type_traits/is_nothrow_assignable.h>
 #include <__type_traits/is_nothrow_constructible.h>
-#include <__type_traits/is_replaceable.h>
-#include <__type_traits/is_same.h>
 #include <__type_traits/is_swappable.h>
 #include <__type_traits/is_trivially_relocatable.h>
 #include <__type_traits/nat.h>
 #include <__type_traits/unwrap_ref.h>
 #include <__utility/declval.h>
 #include <__utility/forward.h>
+#include <__utility/integer_sequence.h>
 #include <__utility/move.h>
 #include <__utility/piecewise_construct.h>
 
@@ -102,7 +100,6 @@ struct pair
       __conditional_t<__libcpp_is_trivially_relocatable<_T1>::value && __libcpp_is_trivially_relocatable<_T2>::value,
                       pair,
                       void>;
-  using __replaceable _LIBCPP_NODEBUG = __conditional_t<__is_replaceable_v<_T1> && __is_replaceable_v<_T2>, pair, void>;
 
   _LIBCPP_HIDE_FROM_ABI pair(pair const&) = default;
   _LIBCPP_HIDE_FROM_ABI pair(pair&&)      = default;
@@ -222,11 +219,7 @@ struct pair
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
   pair(piecewise_construct_t __pc, tuple<_Args1...> __first_args, tuple<_Args2...> __second_args) noexcept(
       is_nothrow_constructible<first_type, _Args1...>::value && is_nothrow_constructible<second_type, _Args2...>::value)
-      : pair(__pc,
-             __first_args,
-             __second_args,
-             typename __make_tuple_indices<sizeof...(_Args1)>::type(),
-             typename __make_tuple_indices<sizeof...(_Args2) >::type()) {}
+      : pair(__pc, __first_args, __second_args, __index_sequence_for<_Args1...>(), __index_sequence_for<_Args2...>()) {}
 
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair&
   operator=(__conditional_t<is_copy_assignable<first_type>::value && is_copy_assignable<second_type>::value,
@@ -440,8 +433,8 @@ private:
   pair(piecewise_construct_t,
        tuple<_Args1...>& __first_args,
        tuple<_Args2...>& __second_args,
-       __tuple_indices<_I1...>,
-       __tuple_indices<_I2...>)
+       __index_sequence<_I1...>,
+       __index_sequence<_I2...>)
       : first(std::forward<_Args1>(std::get<_I1>(__first_args))...),
         second(std::forward<_Args2>(std::get<_I2>(__second_args))...) {}
 #endif
@@ -546,8 +539,8 @@ swap(const pair<_T1, _T2>& __x, const pair<_T1, _T2>& __y) noexcept(noexcept(__x
 #endif
 
 template <class _T1, class _T2>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<__unwrap_ref_decay_t<_T1>, __unwrap_ref_decay_t<_T2> >
-make_pair(_T1&& __t1, _T2&& __t2) {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14
+pair<__unwrap_ref_decay_t<_T1>, __unwrap_ref_decay_t<_T2> > make_pair(_T1&& __t1, _T2&& __t2) {
   return pair<__unwrap_ref_decay_t<_T1>, __unwrap_ref_decay_t<_T2> >(std::forward<_T1>(__t1), std::forward<_T2>(__t2));
 }
 
@@ -619,67 +612,71 @@ struct __get_pair<1> {
 };
 
 template <size_t _Ip, class _T1, class _T2>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 typename tuple_element<_Ip, pair<_T1, _T2> >::type&
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14
+typename tuple_element<_Ip, pair<_T1, _T2> >::type&
 get(pair<_T1, _T2>& __p) _NOEXCEPT {
   return __get_pair<_Ip>::get(__p);
 }
 
 template <size_t _Ip, class _T1, class _T2>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const typename tuple_element<_Ip, pair<_T1, _T2> >::type&
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI
+_LIBCPP_CONSTEXPR_SINCE_CXX14 const typename tuple_element<_Ip, pair<_T1, _T2> >::type&
 get(const pair<_T1, _T2>& __p) _NOEXCEPT {
   return __get_pair<_Ip>::get(__p);
 }
 
 template <size_t _Ip, class _T1, class _T2>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 typename tuple_element<_Ip, pair<_T1, _T2> >::type&&
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14
+typename tuple_element<_Ip, pair<_T1, _T2> >::type&&
 get(pair<_T1, _T2>&& __p) _NOEXCEPT {
   return __get_pair<_Ip>::get(std::move(__p));
 }
 
 template <size_t _Ip, class _T1, class _T2>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const typename tuple_element<_Ip, pair<_T1, _T2> >::type&&
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI
+_LIBCPP_CONSTEXPR_SINCE_CXX14 const typename tuple_element<_Ip, pair<_T1, _T2> >::type&&
 get(const pair<_T1, _T2>&& __p) _NOEXCEPT {
   return __get_pair<_Ip>::get(std::move(__p));
 }
 
 #if _LIBCPP_STD_VER >= 14
 template <class _T1, class _T2>
-inline _LIBCPP_HIDE_FROM_ABI constexpr _T1& get(pair<_T1, _T2>& __p) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI constexpr _T1& get(pair<_T1, _T2>& __p) _NOEXCEPT {
   return __p.first;
 }
 
 template <class _T1, class _T2>
-inline _LIBCPP_HIDE_FROM_ABI constexpr _T1 const& get(pair<_T1, _T2> const& __p) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI constexpr _T1 const& get(pair<_T1, _T2> const& __p) _NOEXCEPT {
   return __p.first;
 }
 
 template <class _T1, class _T2>
-inline _LIBCPP_HIDE_FROM_ABI constexpr _T1&& get(pair<_T1, _T2>&& __p) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI constexpr _T1&& get(pair<_T1, _T2>&& __p) _NOEXCEPT {
   return std::forward<_T1&&>(__p.first);
 }
 
 template <class _T1, class _T2>
-inline _LIBCPP_HIDE_FROM_ABI constexpr _T1 const&& get(pair<_T1, _T2> const&& __p) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI constexpr _T1 const&& get(pair<_T1, _T2> const&& __p) _NOEXCEPT {
   return std::forward<_T1 const&&>(__p.first);
 }
 
 template <class _T2, class _T1>
-inline _LIBCPP_HIDE_FROM_ABI constexpr _T2& get(pair<_T1, _T2>& __p) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI constexpr _T2& get(pair<_T1, _T2>& __p) _NOEXCEPT {
   return __p.second;
 }
 
 template <class _T2, class _T1>
-inline _LIBCPP_HIDE_FROM_ABI constexpr _T2 const& get(pair<_T1, _T2> const& __p) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI constexpr _T2 const& get(pair<_T1, _T2> const& __p) _NOEXCEPT {
   return __p.second;
 }
 
 template <class _T2, class _T1>
-inline _LIBCPP_HIDE_FROM_ABI constexpr _T2&& get(pair<_T1, _T2>&& __p) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI constexpr _T2&& get(pair<_T1, _T2>&& __p) _NOEXCEPT {
   return std::forward<_T2&&>(__p.second);
 }
 
 template <class _T2, class _T1>
-inline _LIBCPP_HIDE_FROM_ABI constexpr _T2 const&& get(pair<_T1, _T2> const&& __p) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI constexpr _T2 const&& get(pair<_T1, _T2> const&& __p) _NOEXCEPT {
   return std::forward<_T2 const&&>(__p.second);
 }
 
diff --git a/lib/libcxx/include/__utility/scope_guard.h b/lib/libcxx/include/__utility/scope_guard.h
index 3972102eee..db4f0e4c73 100644
--- a/lib/libcxx/include/__utility/scope_guard.h
+++ b/lib/libcxx/include/__utility/scope_guard.h
@@ -43,6 +43,8 @@ public:
 #endif
 };
 
+_LIBCPP_CTAD_SUPPORTED_FOR_TYPE(__scope_guard);
+
 template <class _Func>
 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 __scope_guard<_Func> __make_scope_guard(_Func __func) {
   return __scope_guard<_Func>(std::move(__func));
diff --git a/lib/libcxx/include/__utility/try_key_extraction.h b/lib/libcxx/include/__utility/try_key_extraction.h
new file mode 100644
index 0000000000..755c082140
--- /dev/null
+++ b/lib/libcxx/include/__utility/try_key_extraction.h
@@ -0,0 +1,114 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___UTILITY_TRY_EXTRACT_KEY_H
+#define _LIBCPP___UTILITY_TRY_EXTRACT_KEY_H
+
+#include <__config>
+#include <__fwd/pair.h>
+#include <__fwd/tuple.h>
+#include <__type_traits/enable_if.h>
+#include <__type_traits/is_same.h>
+#include <__type_traits/remove_const.h>
+#include <__type_traits/remove_const_ref.h>
+#include <__utility/declval.h>
+#include <__utility/forward.h>
+#include <__utility/piecewise_construct.h>
+#include <__utility/priority_tag.h>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template <class _KeyT, class _Ret, class _WithKey, class _WithoutKey, class... _Args>
+_LIBCPP_HIDE_FROM_ABI _Ret
+__try_key_extraction_impl(__priority_tag<0>, _WithKey, _WithoutKey __without_key, _Args&&... __args) {
+  return __without_key(std::forward<_Args>(__args)...);
+}
+
+template <class _KeyT,
+          class _Ret,
+          class _WithKey,
+          class _WithoutKey,
+          class _Arg,
+          __enable_if_t<is_same<_KeyT, __remove_const_ref_t<_Arg> >::value, int> = 0>
+_LIBCPP_HIDE_FROM_ABI _Ret
+__try_key_extraction_impl(__priority_tag<1>, _WithKey __with_key, _WithoutKey, _Arg&& __arg) {
+  return __with_key(__arg, std::forward<_Arg>(__arg));
+}
+
+template <class _KeyT,
+          class _Ret,
+          class _WithKey,
+          class _WithoutKey,
+          class _Arg,
+          __enable_if_t<__is_pair_v<__remove_const_ref_t<_Arg> > &&
+                            is_same<__remove_const_t<typename __remove_const_ref_t<_Arg>::first_type>, _KeyT>::value,
+                        int> = 0>
+_LIBCPP_HIDE_FROM_ABI _Ret
+__try_key_extraction_impl(__priority_tag<1>, _WithKey __with_key, _WithoutKey, _Arg&& __arg) {
+  return __with_key(__arg.first, std::forward<_Arg>(__arg));
+}
+
+template <class _KeyT,
+          class _Ret,
+          class _WithKey,
+          class _WithoutKey,
+          class _Arg1,
+          class _Arg2,
+          __enable_if_t<is_same<_KeyT, __remove_const_ref_t<_Arg1> >::value, int> = 0>
+_LIBCPP_HIDE_FROM_ABI _Ret
+__try_key_extraction_impl(__priority_tag<1>, _WithKey __with_key, _WithoutKey, _Arg1&& __arg1, _Arg2&& __arg2) {
+  return __with_key(__arg1, std::forward<_Arg1>(__arg1), std::forward<_Arg2>(__arg2));
+}
+
+#ifndef _LIBCPP_CXX03_LANG
+template <class _KeyT,
+          class _Ret,
+          class _WithKey,
+          class _WithoutKey,
+          class _PiecewiseConstruct,
+          class _Tuple1,
+          class _Tuple2,
+          __enable_if_t<is_same<__remove_const_ref_t<_PiecewiseConstruct>, piecewise_construct_t>::value &&
+                            __is_tuple_v<_Tuple1> && tuple_size<_Tuple1>::value == 1 &&
+                            is_same<__remove_const_ref_t<typename tuple_element<0, _Tuple1>::type>, _KeyT>::value,
+                        int> = 0>
+_LIBCPP_HIDE_FROM_ABI _Ret __try_key_extraction_impl(
+    __priority_tag<1>,
+    _WithKey __with_key,
+    _WithoutKey,
+    _PiecewiseConstruct&& __pc,
+    _Tuple1&& __tuple1,
+    _Tuple2&& __tuple2) {
+  return __with_key(
+      std::get<0>(__tuple1),
+      std::forward<_PiecewiseConstruct>(__pc),
+      std::forward<_Tuple1>(__tuple1),
+      std::forward<_Tuple2>(__tuple2));
+}
+#endif // _LIBCPP_CXX03_LANG
+
+// This function tries extracting the given _KeyT from _Args...
+// If it succeeds to extract the key, it calls the `__with_key` function with the extracted key and all of the
+// arguments. Otherwise it calls the `__without_key` function with all of the arguments.
+//
+// Both `__with_key` and `__without_key` must take all arguments by reference.
+template <class _KeyT, class _WithKey, class _WithoutKey, class... _Args>
+_LIBCPP_HIDE_FROM_ABI decltype(std::declval<_WithoutKey>()(std::declval<_Args>()...))
+__try_key_extraction(_WithKey __with_key, _WithoutKey __without_key, _Args&&... __args) {
+  using _Ret = decltype(__without_key(std::forward<_Args>(__args)...));
+  return std::__try_key_extraction_impl<_KeyT, _Ret>(
+      __priority_tag<1>(), __with_key, __without_key, std::forward<_Args>(__args)...);
+}
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___UTILITY_TRY_EXTRACT_KEY_H
diff --git a/lib/libcxx/include/__vector/vector.h b/lib/libcxx/include/__vector/vector.h
index 4e0d76fbbe..4e48b1c201 100644
--- a/lib/libcxx/include/__vector/vector.h
+++ b/lib/libcxx/include/__vector/vector.h
@@ -12,18 +12,17 @@
 #include <__algorithm/copy.h>
 #include <__algorithm/copy_n.h>
 #include <__algorithm/fill_n.h>
+#include <__algorithm/iterator_operations.h>
 #include <__algorithm/max.h>
 #include <__algorithm/min.h>
 #include <__algorithm/move.h>
 #include <__algorithm/move_backward.h>
-#include <__algorithm/ranges_copy_n.h>
 #include <__algorithm/rotate.h>
 #include <__assert>
 #include <__config>
 #include <__debug_utils/sanitizers.h>
 #include <__format/enable_insertable.h>
 #include <__fwd/vector.h>
-#include <__iterator/advance.h>
 #include <__iterator/bounded_iter.h>
 #include <__iterator/concepts.h>
 #include <__iterator/distance.h>
@@ -43,6 +42,7 @@
 #include <__memory/temp_value.h>
 #include <__memory/uninitialized_algorithms.h>
 #include <__ranges/access.h>
+#include <__ranges/as_rvalue_view.h>
 #include <__ranges/concepts.h>
 #include <__ranges/container_compatible_range.h>
 #include <__ranges/from_range.h>
@@ -55,7 +55,6 @@
 #include <__type_traits/is_nothrow_assignable.h>
 #include <__type_traits/is_nothrow_constructible.h>
 #include <__type_traits/is_pointer.h>
-#include <__type_traits/is_replaceable.h>
 #include <__type_traits/is_same.h>
 #include <__type_traits/is_trivially_relocatable.h>
 #include <__type_traits/type_identity.h>
@@ -86,6 +85,9 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 template <class _Tp, class _Allocator /* = allocator<_Tp> */>
 class vector {
+  template <class _Up, class _Alloc>
+  using __split_buffer _LIBCPP_NODEBUG = std::__split_buffer<_Up, _Alloc, __split_buffer_pointer_layout>;
+
 public:
   //
   // Types
@@ -113,7 +115,7 @@ public:
   using reverse_iterator       = std::reverse_iterator<iterator>;
   using const_reverse_iterator = std::reverse_iterator<const_iterator>;
 
-  // A vector containers the following members which may be trivially relocatable:
+  // A vector contains the following members which may be trivially relocatable:
   // - pointer: may be trivially relocatable, so it's checked
   // - allocator_type: may be trivially relocatable, so it's checked
   // vector doesn't contain any self-references, so it's trivially relocatable if its members are.
@@ -121,10 +123,6 @@ public:
       __libcpp_is_trivially_relocatable<pointer>::value && __libcpp_is_trivially_relocatable<allocator_type>::value,
       vector,
       void>;
-  using __replaceable _LIBCPP_NODEBUG =
-      __conditional_t<__is_replaceable_v<pointer> && __container_allocator_is_replaceable<__alloc_traits>::value,
-                      vector,
-                      void>;
 
   static_assert(__check_valid_allocator<allocator_type>::value, "");
   static_assert(is_same<typename allocator_type::value_type, value_type>::value,
@@ -174,7 +172,7 @@ public:
     __guard.__complete();
   }
 
-  template <__enable_if_t<__is_allocator<_Allocator>::value, int> = 0>
+  template <__enable_if_t<__is_allocator_v<_Allocator>, int> = 0>
   _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI
   vector(size_type __n, const value_type& __x, const allocator_type& __a)
       : __alloc_(__a) {
@@ -317,7 +315,7 @@ public:
                         is_constructible<value_type, typename iterator_traits<_ForwardIterator>::reference>::value,
                     int> = 0>
   _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void assign(_ForwardIterator __first, _ForwardIterator __last) {
-    __assign_with_size(__first, __last, std::distance(__first, __last));
+    __assign_with_size<_ClassicAlgPolicy>(__first, __last, std::distance(__first, __last));
   }
 
 #if _LIBCPP_STD_VER >= 23
@@ -325,7 +323,7 @@ public:
   _LIBCPP_HIDE_FROM_ABI constexpr void assign_range(_Range&& __range) {
     if constexpr (ranges::forward_range<_Range> || ranges::sized_range<_Range>) {
       auto __n = static_cast<size_type>(ranges::distance(__range));
-      __assign_with_size(ranges::begin(__range), ranges::end(__range), __n);
+      __assign_with_size<_RangeAlgPolicy>(ranges::begin(__range), ranges::end(__range), __n);
 
     } else {
       __assign_with_sentinel(ranges::begin(__range), ranges::end(__range));
@@ -341,59 +339,67 @@ public:
   }
 #endif
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI allocator_type get_allocator() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI allocator_type get_allocator() const _NOEXCEPT {
     return this->__alloc_;
   }
 
   //
   // Iterators
   //
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI iterator begin() _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI iterator begin() _NOEXCEPT {
     return __make_iter(__add_alignment_assumption(this->__begin_));
   }
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_iterator begin() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_iterator begin() const _NOEXCEPT {
     return __make_iter(__add_alignment_assumption(this->__begin_));
   }
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI iterator end() _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI iterator end() _NOEXCEPT {
     return __make_iter(__add_alignment_assumption(this->__end_));
   }
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_iterator end() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_iterator end() const _NOEXCEPT {
     return __make_iter(__add_alignment_assumption(this->__end_));
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI reverse_iterator rbegin() _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI reverse_iterator rbegin() _NOEXCEPT {
     return reverse_iterator(end());
   }
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rbegin() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_reverse_iterator
+  rbegin() const _NOEXCEPT {
     return const_reverse_iterator(end());
   }
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI reverse_iterator rend() _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI reverse_iterator rend() _NOEXCEPT {
     return reverse_iterator(begin());
   }
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rend() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rend() const _NOEXCEPT {
     return const_reverse_iterator(begin());
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_iterator cbegin() const _NOEXCEPT { return begin(); }
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_iterator cend() const _NOEXCEPT { return end(); }
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crbegin() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_iterator cbegin() const _NOEXCEPT {
+    return begin();
+  }
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_iterator cend() const _NOEXCEPT {
+    return end();
+  }
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_reverse_iterator
+  crbegin() const _NOEXCEPT {
     return rbegin();
   }
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crend() const _NOEXCEPT { return rend(); }
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crend() const _NOEXCEPT {
+    return rend();
+  }
 
   //
   // [vector.capacity], capacity
   //
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT {
     return static_cast<size_type>(this->__end_ - this->__begin_);
   }
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type capacity() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type capacity() const _NOEXCEPT {
     return static_cast<size_type>(this->__cap_ - this->__begin_);
   }
   [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT {
     return this->__begin_ == this->__end_;
   }
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT {
     return std::min<size_type>(__alloc_traits::max_size(this->__alloc_), numeric_limits<difference_type>::max());
   }
   _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void reserve(size_type __n);
@@ -402,38 +408,39 @@ public:
   //
   // element access
   //
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI reference operator[](size_type __n) _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI reference operator[](size_type __n) _NOEXCEPT {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(__n < size(), "vector[] index out of bounds");
     return this->__begin_[__n];
   }
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_reference operator[](size_type __n) const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_reference
+  operator[](size_type __n) const _NOEXCEPT {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(__n < size(), "vector[] index out of bounds");
     return this->__begin_[__n];
   }
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI reference at(size_type __n) {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI reference at(size_type __n) {
     if (__n >= size())
       this->__throw_out_of_range();
     return this->__begin_[__n];
   }
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_reference at(size_type __n) const {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_reference at(size_type __n) const {
     if (__n >= size())
       this->__throw_out_of_range();
     return this->__begin_[__n];
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI reference front() _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI reference front() _NOEXCEPT {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(!empty(), "front() called on an empty vector");
     return *this->__begin_;
   }
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_reference front() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_reference front() const _NOEXCEPT {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(!empty(), "front() called on an empty vector");
     return *this->__begin_;
   }
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI reference back() _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI reference back() _NOEXCEPT {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(!empty(), "back() called on an empty vector");
     return *(this->__end_ - 1);
   }
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_reference back() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const_reference back() const _NOEXCEPT {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(!empty(), "back() called on an empty vector");
     return *(this->__end_ - 1);
   }
@@ -441,11 +448,11 @@ public:
   //
   // [vector.data], data access
   //
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI value_type* data() _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI value_type* data() _NOEXCEPT {
     return std::__to_address(this->__begin_);
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const value_type* data() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI const value_type* data() const _NOEXCEPT {
     return std::__to_address(this->__begin_);
   }
 
@@ -478,7 +485,21 @@ public:
 #if _LIBCPP_STD_VER >= 23
   template <_ContainerCompatibleRange<_Tp> _Range>
   _LIBCPP_HIDE_FROM_ABI constexpr void append_range(_Range&& __range) {
-    insert_range(end(), std::forward<_Range>(__range));
+    if constexpr (ranges::forward_range<_Range> || ranges::sized_range<_Range>) {
+      auto __len = ranges::distance(__range);
+      if (__len <= __cap_ - __end_) {
+        __construct_at_end(ranges::begin(__range), ranges::end(__range), __len);
+      } else {
+        __split_buffer<value_type, allocator_type> __buffer(__recommend(size() + __len), size(), __alloc_);
+        __buffer.__construct_at_end_with_size(ranges::begin(__range), __len);
+        __swap_out_circular_buffer(__buffer);
+      }
+    } else {
+      vector __buffer(__alloc_);
+      for (auto&& __val : __range)
+        __buffer.emplace_back(std::forward<decltype(__val)>(__val));
+      append_range(ranges::as_rvalue_view(__buffer));
+    }
   }
 #endif
 
@@ -512,7 +533,7 @@ public:
                     int> = 0>
   _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI iterator
   insert(const_iterator __position, _ForwardIterator __first, _ForwardIterator __last) {
-    return __insert_with_size(__position, __first, __last, std::distance(__first, __last));
+    return __insert_with_size<_ClassicAlgPolicy>(__position, __first, __last, std::distance(__first, __last));
   }
 
 #if _LIBCPP_STD_VER >= 23
@@ -520,7 +541,7 @@ public:
   _LIBCPP_HIDE_FROM_ABI constexpr iterator insert_range(const_iterator __position, _Range&& __range) {
     if constexpr (ranges::forward_range<_Range> || ranges::sized_range<_Range>) {
       auto __n = static_cast<size_type>(ranges::distance(__range));
-      return __insert_with_size(__position, ranges::begin(__range), ranges::end(__range), __n);
+      return __insert_with_size<_RangeAlgPolicy>(__position, ranges::begin(__range), ranges::end(__range), __n);
 
     } else {
       return __insert_with_sentinel(__position, ranges::begin(__range), ranges::end(__range));
@@ -613,12 +634,13 @@ private:
   // The `_Iterator` in `*_with_size` functions can be input-only only if called from `*_range` (since C++23).
   // Otherwise, `_Iterator` is a forward iterator.
 
-  template <class _Iterator, class _Sentinel>
+  template <class _AlgPolicy, class _Iterator, class _Sentinel>
   _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void
   __assign_with_size(_Iterator __first, _Sentinel __last, difference_type __n);
 
-  template <class _Iterator,
-            __enable_if_t<!is_same<decltype(*std::declval<_Iterator&>())&&, value_type&&>::value, int> = 0>
+  template <class _AlgPolicy,
+            class _Iterator,
+            __enable_if_t<!is_same<__policy_value_type<_AlgPolicy, _Iterator>, value_type>::value, int> = 0>
   _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void
   __insert_assign_n_unchecked(_Iterator __first, difference_type __n, pointer __position) {
     for (pointer __end_position = __position + __n; __position != __end_position; ++__position, (void)++__first) {
@@ -627,25 +649,19 @@ private:
     }
   }
 
-  template <class _Iterator,
-            __enable_if_t<is_same<decltype(*std::declval<_Iterator&>())&&, value_type&&>::value, int> = 0>
+  template <class _AlgPolicy,
+            class _Iterator,
+            __enable_if_t<is_same<__policy_value_type<_AlgPolicy, _Iterator>, value_type>::value, int> = 0>
   _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void
   __insert_assign_n_unchecked(_Iterator __first, difference_type __n, pointer __position) {
-#if _LIBCPP_STD_VER >= 23
-    if constexpr (!forward_iterator<_Iterator>) { // Handles input-only sized ranges for insert_range
-      ranges::copy_n(std::move(__first), __n, __position);
-    } else
-#endif
-    {
-      std::copy_n(__first, __n, __position);
-    }
+    std::__copy_n<_AlgPolicy>(std::move(__first), __n, __position);
   }
 
   template <class _InputIterator, class _Sentinel>
   _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI iterator
   __insert_with_sentinel(const_iterator __position, _InputIterator __first, _Sentinel __last);
 
-  template <class _Iterator, class _Sentinel>
+  template <class _AlgPolicy, class _Iterator, class _Sentinel>
   _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI iterator
   __insert_with_size(const_iterator __position, _Iterator __first, _Sentinel __last, difference_type __n);
 
@@ -653,9 +669,6 @@ private:
   _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void
   __construct_at_end(_InputIterator __first, _Sentinel __last, size_type __n);
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __append(size_type __n);
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __append(size_type __n, const_reference __x);
-
   _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI iterator __make_iter(pointer __p) _NOEXCEPT {
 #ifdef _LIBCPP_ABI_BOUNDED_ITERATORS_IN_VECTOR
     // Bound the iterator according to the capacity, rather than the size.
@@ -689,9 +702,9 @@ private:
   }
 
   _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void
-  __swap_out_circular_buffer(__split_buffer<value_type, allocator_type&>& __v);
+  __swap_out_circular_buffer(__split_buffer<value_type, allocator_type>& __v);
   _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI pointer
-  __swap_out_circular_buffer(__split_buffer<value_type, allocator_type&>& __v, pointer __p);
+  __swap_out_circular_buffer(__split_buffer<value_type, allocator_type>& __v, pointer __p);
   _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void
   __move_range(pointer __from_s, pointer __from_e, pointer __to);
   _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __move_assign(vector& __c, true_type)
@@ -811,26 +824,44 @@ private:
   __add_alignment_assumption(_Ptr __p) _NOEXCEPT {
     return __p;
   }
+
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __swap_layouts(__split_buffer<_Tp, allocator_type>& __sb) {
+    auto __vector_begin    = __begin_;
+    auto __vector_sentinel = __end_;
+    auto __vector_cap      = __cap_;
+
+    auto __sb_begin    = __sb.begin();
+    auto __sb_sentinel = __sb.__raw_sentinel();
+    auto __sb_cap      = __sb.__raw_capacity();
+
+    // TODO: replace with __set_valid_range and __set_capacity when vector supports it.
+    __begin_ = __sb_begin;
+    __end_   = __sb_sentinel;
+    __cap_   = __sb_cap;
+
+    __sb.__set_valid_range(__vector_begin, __vector_sentinel);
+    __sb.__set_capacity(__vector_cap);
+  }
 };
 
 #if _LIBCPP_STD_VER >= 17
 template <class _InputIterator,
-          class _Alloc = allocator<__iter_value_type<_InputIterator>>,
+          class _Alloc = allocator<__iterator_value_type<_InputIterator>>,
           class        = enable_if_t<__has_input_iterator_category<_InputIterator>::value>,
-          class        = enable_if_t<__is_allocator<_Alloc>::value> >
-vector(_InputIterator, _InputIterator) -> vector<__iter_value_type<_InputIterator>, _Alloc>;
+          class        = enable_if_t<__is_allocator_v<_Alloc>>>
+vector(_InputIterator, _InputIterator) -> vector<__iterator_value_type<_InputIterator>, _Alloc>;
 
 template <class _InputIterator,
           class _Alloc,
           class = enable_if_t<__has_input_iterator_category<_InputIterator>::value>,
-          class = enable_if_t<__is_allocator<_Alloc>::value> >
-vector(_InputIterator, _InputIterator, _Alloc) -> vector<__iter_value_type<_InputIterator>, _Alloc>;
+          class = enable_if_t<__is_allocator_v<_Alloc>>>
+vector(_InputIterator, _InputIterator, _Alloc) -> vector<__iterator_value_type<_InputIterator>, _Alloc>;
 #endif
 
 #if _LIBCPP_STD_VER >= 23
 template <ranges::input_range _Range,
           class _Alloc = allocator<ranges::range_value_t<_Range>>,
-          class        = enable_if_t<__is_allocator<_Alloc>::value> >
+          class        = enable_if_t<__is_allocator_v<_Alloc>>>
 vector(from_range_t, _Range&&, _Alloc = _Alloc()) -> vector<ranges::range_value_t<_Range>, _Alloc>;
 #endif
 
@@ -839,17 +870,16 @@ vector(from_range_t, _Range&&, _Alloc = _Alloc()) -> vector<ranges::range_value_
 // function has a strong exception guarantee.
 template <class _Tp, class _Allocator>
 _LIBCPP_CONSTEXPR_SINCE_CXX20 void
-vector<_Tp, _Allocator>::__swap_out_circular_buffer(__split_buffer<value_type, allocator_type&>& __v) {
+vector<_Tp, _Allocator>::__swap_out_circular_buffer(__split_buffer<value_type, allocator_type>& __v) {
   __annotate_delete();
-  auto __new_begin = __v.__begin_ - (__end_ - __begin_);
+  auto __new_begin = __v.begin() - size();
   std::__uninitialized_allocator_relocate(
       this->__alloc_, std::__to_address(__begin_), std::__to_address(__end_), std::__to_address(__new_begin));
-  __v.__begin_ = __new_begin;
+  __v.__set_valid_range(__new_begin, __v.end());
   __end_       = __begin_; // All the objects have been destroyed by relocating them.
-  std::swap(this->__begin_, __v.__begin_);
-  std::swap(this->__end_, __v.__end_);
-  std::swap(this->__cap_, __v.__cap_);
-  __v.__first_ = __v.__begin_;
+
+  __swap_layouts(__v);
+  __v.__set_data(__v.begin());
   __annotate_new(size());
 }
 
@@ -859,27 +889,25 @@ vector<_Tp, _Allocator>::__swap_out_circular_buffer(__split_buffer<value_type, a
 // function has a strong exception guarantee if __begin_ == __p || __end_ == __p.
 template <class _Tp, class _Allocator>
 _LIBCPP_CONSTEXPR_SINCE_CXX20 typename vector<_Tp, _Allocator>::pointer
-vector<_Tp, _Allocator>::__swap_out_circular_buffer(__split_buffer<value_type, allocator_type&>& __v, pointer __p) {
+vector<_Tp, _Allocator>::__swap_out_circular_buffer(__split_buffer<value_type, allocator_type>& __v, pointer __p) {
   __annotate_delete();
-  pointer __ret = __v.__begin_;
+  pointer __ret = __v.begin();
 
   // Relocate [__p, __end_) first to avoid having a hole in [__begin_, __end_)
   // in case something in [__begin_, __p) throws.
   std::__uninitialized_allocator_relocate(
-      this->__alloc_, std::__to_address(__p), std::__to_address(__end_), std::__to_address(__v.__end_));
-  __v.__end_ += (__end_ - __p);
+      this->__alloc_, std::__to_address(__p), std::__to_address(__end_), std::__to_address(__v.end()));
+  auto __relocated_so_far = __end_ - __p;
+  __v.__set_sentinel(__v.end() + __relocated_so_far);
   __end_           = __p; // The objects in [__p, __end_) have been destroyed by relocating them.
-  auto __new_begin = __v.__begin_ - (__p - __begin_);
+  auto __new_begin = __v.begin() - (__p - __begin_);
 
   std::__uninitialized_allocator_relocate(
       this->__alloc_, std::__to_address(__begin_), std::__to_address(__p), std::__to_address(__new_begin));
-  __v.__begin_ = __new_begin;
-  __end_       = __begin_; // All the objects have been destroyed by relocating them.
-
-  std::swap(this->__begin_, __v.__begin_);
-  std::swap(this->__end_, __v.__end_);
-  std::swap(this->__cap_, __v.__cap_);
-  __v.__first_ = __v.__begin_;
+  __v.__set_valid_range(__new_begin, __v.end());
+  __end_ = __begin_; // All the objects have been destroyed by relocating them.
+  __swap_layouts(__v);
+  __v.__set_data(__v.begin());
   __annotate_new(size());
   return __ret;
 }
@@ -945,36 +973,6 @@ vector<_Tp, _Allocator>::__construct_at_end(_InputIterator __first, _Sentinel __
   __tx.__pos_ = std::__uninitialized_allocator_copy(this->__alloc_, std::move(__first), std::move(__last), __tx.__pos_);
 }
 
-//  Default constructs __n objects starting at __end_
-//  throws if construction throws
-//  Postcondition:  size() == size() + __n
-//  Exception safety: strong.
-template <class _Tp, class _Allocator>
-_LIBCPP_CONSTEXPR_SINCE_CXX20 void vector<_Tp, _Allocator>::__append(size_type __n) {
-  if (static_cast<size_type>(this->__cap_ - this->__end_) >= __n)
-    this->__construct_at_end(__n);
-  else {
-    __split_buffer<value_type, allocator_type&> __v(__recommend(size() + __n), size(), this->__alloc_);
-    __v.__construct_at_end(__n);
-    __swap_out_circular_buffer(__v);
-  }
-}
-
-//  Default constructs __n objects starting at __end_
-//  throws if construction throws
-//  Postcondition:  size() == size() + __n
-//  Exception safety: strong.
-template <class _Tp, class _Allocator>
-_LIBCPP_CONSTEXPR_SINCE_CXX20 void vector<_Tp, _Allocator>::__append(size_type __n, const_reference __x) {
-  if (static_cast<size_type>(this->__cap_ - this->__end_) >= __n)
-    this->__construct_at_end(__n, __x);
-  else {
-    __split_buffer<value_type, allocator_type&> __v(__recommend(size() + __n), size(), this->__alloc_);
-    __v.__construct_at_end(__n, __x);
-    __swap_out_circular_buffer(__v);
-  }
-}
-
 template <class _Tp, class _Allocator>
 _LIBCPP_CONSTEXPR_SINCE_CXX20 inline _LIBCPP_HIDE_FROM_ABI vector<_Tp, _Allocator>::vector(vector&& __x)
 #if _LIBCPP_STD_VER >= 17
@@ -1051,20 +1049,14 @@ vector<_Tp, _Allocator>::__assign_with_sentinel(_Iterator __first, _Sentinel __l
 }
 
 template <class _Tp, class _Allocator>
-template <class _Iterator, class _Sentinel>
+template <class _AlgPolicy, class _Iterator, class _Sentinel>
 _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void
 vector<_Tp, _Allocator>::__assign_with_size(_Iterator __first, _Sentinel __last, difference_type __n) {
   size_type __new_size = static_cast<size_type>(__n);
   if (__new_size <= capacity()) {
     if (__new_size > size()) {
-#if _LIBCPP_STD_VER >= 23
-      auto __mid = ranges::copy_n(std::move(__first), size(), this->__begin_).in;
+      auto __mid = std::__copy_n<_AlgPolicy>(std::move(__first), size(), this->__begin_).first;
       __construct_at_end(std::move(__mid), std::move(__last), __new_size - size());
-#else
-      _Iterator __mid = std::next(__first, size());
-      std::copy(__first, __mid, this->__begin_);
-      __construct_at_end(__mid, __last, __new_size - size());
-#endif
     } else {
       pointer __m = std::__copy(std::move(__first), __last, this->__begin_).second;
       this->__destruct_at_end(__m);
@@ -1097,7 +1089,7 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 void vector<_Tp, _Allocator>::reserve(size_type __
   if (__n > capacity()) {
     if (__n > max_size())
       this->__throw_length_error();
-    __split_buffer<value_type, allocator_type&> __v(__n, size(), this->__alloc_);
+    __split_buffer<value_type, allocator_type> __v(__n, size(), this->__alloc_);
     __swap_out_circular_buffer(__v);
   }
 }
@@ -1108,7 +1100,7 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 void vector<_Tp, _Allocator>::shrink_to_fit() _NOE
 #if _LIBCPP_HAS_EXCEPTIONS
     try {
 #endif // _LIBCPP_HAS_EXCEPTIONS
-      __split_buffer<value_type, allocator_type&> __v(size(), size(), this->__alloc_);
+      __split_buffer<value_type, allocator_type> __v(size(), size(), this->__alloc_);
       // The Standard mandates shrink_to_fit() does not increase the capacity.
       // With equal capacity keep the existing buffer. This avoids extra work
       // due to swapping the elements.
@@ -1125,14 +1117,33 @@ template <class _Tp, class _Allocator>
 template <class... _Args>
 _LIBCPP_CONSTEXPR_SINCE_CXX20 typename vector<_Tp, _Allocator>::pointer
 vector<_Tp, _Allocator>::__emplace_back_slow_path(_Args&&... __args) {
-  __split_buffer<value_type, allocator_type&> __v(__recommend(size() + 1), size(), this->__alloc_);
+  __split_buffer<value_type, allocator_type> __v(__recommend(size() + 1), size(), this->__alloc_);
   //    __v.emplace_back(std::forward<_Args>(__args)...);
-  __alloc_traits::construct(this->__alloc_, std::__to_address(__v.__end_), std::forward<_Args>(__args)...);
-  __v.__end_++;
+  pointer __end = __v.end();
+  __alloc_traits::construct(this->__alloc_, std::__to_address(__end), std::forward<_Args>(__args)...);
+  __v.__set_sentinel(++__end);
   __swap_out_circular_buffer(__v);
   return this->__end_;
 }
 
+// This makes the compiler inline `__else()` if `__cond` is known to be false. Currently LLVM doesn't do that without
+// the `__builtin_constant_p`, since it considers `__else` unlikely even through it's known to be run.
+// See https://llvm.org/PR154292
+template <class _If, class _Else>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void __if_likely_else(bool __cond, _If __if, _Else __else) {
+  if (__builtin_constant_p(__cond)) {
+    if (__cond)
+      __if();
+    else
+      __else();
+  } else {
+    if (__cond) [[__likely__]]
+      __if();
+    else
+      __else();
+  }
+}
+
 template <class _Tp, class _Allocator>
 template <class... _Args>
 _LIBCPP_CONSTEXPR_SINCE_CXX20 inline
@@ -1143,12 +1154,14 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 inline
 #endif
     vector<_Tp, _Allocator>::emplace_back(_Args&&... __args) {
   pointer __end = this->__end_;
-  if (__end < this->__cap_) {
-    __emplace_back_assume_capacity(std::forward<_Args>(__args)...);
-    ++__end;
-  } else {
-    __end = __emplace_back_slow_path(std::forward<_Args>(__args)...);
-  }
+  std::__if_likely_else(
+      __end < this->__cap_,
+      [&] {
+        __emplace_back_assume_capacity(std::forward<_Args>(__args)...);
+        ++__end;
+      },
+      [&] { __end = __emplace_back_slow_path(std::forward<_Args>(__args)...); });
+
   this->__end_ = __end;
 #if _LIBCPP_STD_VER >= 17
   return *(__end - 1);
@@ -1207,7 +1220,7 @@ vector<_Tp, _Allocator>::insert(const_iterator __position, const_reference __x)
       *__p = *__xr;
     }
   } else {
-    __split_buffer<value_type, allocator_type&> __v(__recommend(size() + 1), __p - this->__begin_, this->__alloc_);
+    __split_buffer<value_type, allocator_type> __v(__recommend(size() + 1), __p - this->__begin_, this->__alloc_);
     __v.emplace_back(__x);
     __p = __swap_out_circular_buffer(__v, __p);
   }
@@ -1226,7 +1239,7 @@ vector<_Tp, _Allocator>::insert(const_iterator __position, value_type&& __x) {
       *__p = std::move(__x);
     }
   } else {
-    __split_buffer<value_type, allocator_type&> __v(__recommend(size() + 1), __p - this->__begin_, this->__alloc_);
+    __split_buffer<value_type, allocator_type> __v(__recommend(size() + 1), __p - this->__begin_, this->__alloc_);
     __v.emplace_back(std::move(__x));
     __p = __swap_out_circular_buffer(__v, __p);
   }
@@ -1247,7 +1260,7 @@ vector<_Tp, _Allocator>::emplace(const_iterator __position, _Args&&... __args) {
       *__p = std::move(__tmp.get());
     }
   } else {
-    __split_buffer<value_type, allocator_type&> __v(__recommend(size() + 1), __p - this->__begin_, this->__alloc_);
+    __split_buffer<value_type, allocator_type> __v(__recommend(size() + 1), __p - this->__begin_, this->__alloc_);
     __v.emplace_back(std::forward<_Args>(__args)...);
     __p = __swap_out_circular_buffer(__v, __p);
   }
@@ -1275,7 +1288,7 @@ vector<_Tp, _Allocator>::insert(const_iterator __position, size_type __n, const_
         std::fill_n(__p, __n, *__xr);
       }
     } else {
-      __split_buffer<value_type, allocator_type&> __v(__recommend(size() + __n), __p - this->__begin_, this->__alloc_);
+      __split_buffer<value_type, allocator_type> __v(__recommend(size() + __n), __p - this->__begin_, this->__alloc_);
       __v.__construct_at_end(__n, __x);
       __p = __swap_out_circular_buffer(__v, __p);
     }
@@ -1296,28 +1309,28 @@ vector<_Tp, _Allocator>::__insert_with_sentinel(const_iterator __position, _Inpu
   if (__first == __last)
     (void)std::rotate(__p, __old_last, this->__end_);
   else {
-    __split_buffer<value_type, allocator_type&> __v(__alloc_);
+    __split_buffer<value_type, allocator_type> __v(__alloc_);
     auto __guard = std::__make_exception_guard(
         _AllocatorDestroyRangeReverse<allocator_type, pointer>(__alloc_, __old_last, this->__end_));
     __v.__construct_at_end_with_sentinel(std::move(__first), std::move(__last));
-    __split_buffer<value_type, allocator_type&> __merged(
+    __split_buffer<value_type, allocator_type> __merged(
         __recommend(size() + __v.size()), __off, __alloc_); // has `__off` positions available at the front
     std::__uninitialized_allocator_relocate(
-        __alloc_, std::__to_address(__old_last), std::__to_address(this->__end_), std::__to_address(__merged.__end_));
+        __alloc_, std::__to_address(__old_last), std::__to_address(this->__end_), std::__to_address(__merged.end()));
     __guard.__complete(); // Release the guard once objects in [__old_last_, __end_) have been successfully relocated.
-    __merged.__end_ += this->__end_ - __old_last;
+    __merged.__set_sentinel(__merged.end() + (this->__end_ - __old_last));
     this->__end_ = __old_last;
     std::__uninitialized_allocator_relocate(
-        __alloc_, std::__to_address(__v.__begin_), std::__to_address(__v.__end_), std::__to_address(__merged.__end_));
-    __merged.__end_ += __v.size();
-    __v.__end_ = __v.__begin_;
+        __alloc_, std::__to_address(__v.begin()), std::__to_address(__v.end()), std::__to_address(__merged.end()));
+    __merged.__set_sentinel(__merged.size() + __v.size());
+    __v.__set_sentinel(__v.begin());
     __p        = __swap_out_circular_buffer(__merged, __p);
   }
   return __make_iter(__p);
 }
 
 template <class _Tp, class _Allocator>
-template <class _Iterator, class _Sentinel>
+template <class _AlgPolicy, class _Iterator, class _Sentinel>
 _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI typename vector<_Tp, _Allocator>::iterator
 vector<_Tp, _Allocator>::__insert_with_size(
     const_iterator __position, _Iterator __first, _Sentinel __last, difference_type __n) {
@@ -1338,15 +1351,15 @@ vector<_Tp, _Allocator>::__insert_with_size(
           __construct_at_end(__m, __last, __n - __dx);
           if (__dx > 0) {
             __move_range(__p, __old_last, __p + __n);
-            __insert_assign_n_unchecked(__first, __dx, __p);
+            __insert_assign_n_unchecked<_AlgPolicy>(__first, __dx, __p);
           }
         }
       } else {
         __move_range(__p, __old_last, __p + __n);
-        __insert_assign_n_unchecked(std::move(__first), __n, __p);
+        __insert_assign_n_unchecked<_AlgPolicy>(std::move(__first), __n, __p);
       }
     } else {
-      __split_buffer<value_type, allocator_type&> __v(__recommend(size() + __n), __p - this->__begin_, this->__alloc_);
+      __split_buffer<value_type, allocator_type> __v(__recommend(size() + __n), __p - this->__begin_, this->__alloc_);
       __v.__construct_at_end_with_size(std::move(__first), __n);
       __p = __swap_out_circular_buffer(__v, __p);
     }
@@ -1355,21 +1368,35 @@ vector<_Tp, _Allocator>::__insert_with_size(
 }
 
 template <class _Tp, class _Allocator>
-_LIBCPP_CONSTEXPR_SINCE_CXX20 void vector<_Tp, _Allocator>::resize(size_type __sz) {
-  size_type __cs = size();
-  if (__cs < __sz)
-    this->__append(__sz - __cs);
-  else if (__cs > __sz)
-    this->__destruct_at_end(this->__begin_ + __sz);
+_LIBCPP_CONSTEXPR_SINCE_CXX20 void vector<_Tp, _Allocator>::resize(size_type __new_size) {
+  size_type __current_size = size();
+  if (__current_size < __new_size) {
+    if (__new_size <= capacity()) {
+      __construct_at_end(__new_size - __current_size);
+    } else {
+      __split_buffer<value_type, allocator_type> __v(__recommend(__new_size), __current_size, __alloc_);
+      __v.__construct_at_end(__new_size - __current_size);
+      __swap_out_circular_buffer(__v);
+    }
+  } else if (__current_size > __new_size) {
+    this->__destruct_at_end(this->__begin_ + __new_size);
+  }
 }
 
 template <class _Tp, class _Allocator>
-_LIBCPP_CONSTEXPR_SINCE_CXX20 void vector<_Tp, _Allocator>::resize(size_type __sz, const_reference __x) {
-  size_type __cs = size();
-  if (__cs < __sz)
-    this->__append(__sz - __cs, __x);
-  else if (__cs > __sz)
-    this->__destruct_at_end(this->__begin_ + __sz);
+_LIBCPP_CONSTEXPR_SINCE_CXX20 void vector<_Tp, _Allocator>::resize(size_type __new_size, const_reference __x) {
+  size_type __current_size = size();
+  if (__current_size < __new_size) {
+    if (__new_size <= capacity())
+      __construct_at_end(__new_size - __current_size, __x);
+    else {
+      __split_buffer<value_type, allocator_type> __v(__recommend(__new_size), __current_size, __alloc_);
+      __v.__construct_at_end(__new_size - __current_size, __x);
+      __swap_out_circular_buffer(__v);
+    }
+  } else if (__current_size > __new_size) {
+    this->__destruct_at_end(this->__begin_ + __new_size);
+  }
 }
 
 template <class _Tp, class _Allocator>
diff --git a/lib/libcxx/include/__vector/vector_bool.h b/lib/libcxx/include/__vector/vector_bool.h
index e921e651e9..f81fcd92a7 100644
--- a/lib/libcxx/include/__vector/vector_bool.h
+++ b/lib/libcxx/include/__vector/vector_bool.h
@@ -11,6 +11,7 @@
 
 #include <__algorithm/copy.h>
 #include <__algorithm/copy_backward.h>
+#include <__algorithm/copy_n.h>
 #include <__algorithm/fill_n.h>
 #include <__algorithm/iterator_operations.h>
 #include <__algorithm/max.h>
@@ -19,7 +20,7 @@
 #include <__bit_reference>
 #include <__config>
 #include <__functional/unary_function.h>
-#include <__fwd/bit_reference.h> // TODO: This is a workaround for https://github.com/llvm/llvm-project/issues/131814
+#include <__fwd/bit_reference.h> // TODO: This is a workaround for https://llvm.org/PR131814
 #include <__fwd/functional.h>
 #include <__fwd/vector.h>
 #include <__iterator/distance.h>
@@ -234,74 +235,89 @@ public:
   }
 #endif
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 allocator_type get_allocator() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 allocator_type get_allocator() const _NOEXCEPT {
     return allocator_type(this->__alloc_);
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type max_size() const _NOEXCEPT;
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type capacity() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type max_size() const _NOEXCEPT;
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type capacity() const _NOEXCEPT {
     return __internal_cap_to_external(__cap_);
   }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type size() const _NOEXCEPT { return __size_; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type size() const _NOEXCEPT {
+    return __size_;
+  }
   [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool empty() const _NOEXCEPT {
     return __size_ == 0;
   }
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void reserve(size_type __n);
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void shrink_to_fit() _NOEXCEPT;
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 iterator begin() _NOEXCEPT { return __make_iter(0); }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_iterator begin() const _NOEXCEPT { return __make_iter(0); }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 iterator end() _NOEXCEPT { return __make_iter(__size_); }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_iterator end() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 iterator begin() _NOEXCEPT {
+    return __make_iter(0);
+  }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_iterator begin() const _NOEXCEPT {
+    return __make_iter(0);
+  }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 iterator end() _NOEXCEPT {
+    return __make_iter(__size_);
+  }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_iterator end() const _NOEXCEPT {
     return __make_iter(__size_);
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reverse_iterator rbegin() _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reverse_iterator rbegin() _NOEXCEPT {
     return reverse_iterator(end());
   }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_reverse_iterator rbegin() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_reverse_iterator
+  rbegin() const _NOEXCEPT {
     return const_reverse_iterator(end());
   }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reverse_iterator rend() _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reverse_iterator rend() _NOEXCEPT {
     return reverse_iterator(begin());
   }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_reverse_iterator rend() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_reverse_iterator rend() const _NOEXCEPT {
     return const_reverse_iterator(begin());
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_iterator cbegin() const _NOEXCEPT { return __make_iter(0); }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_iterator cend() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_iterator cbegin() const _NOEXCEPT {
+    return __make_iter(0);
+  }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_iterator cend() const _NOEXCEPT {
     return __make_iter(__size_);
   }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_reverse_iterator crbegin() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_reverse_iterator
+  crbegin() const _NOEXCEPT {
     return rbegin();
   }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_reverse_iterator crend() const _NOEXCEPT { return rend(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_reverse_iterator crend() const _NOEXCEPT {
+    return rend();
+  }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reference operator[](size_type __n) {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reference operator[](size_type __n) {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(__n < size(), "vector<bool>::operator[] index out of bounds");
     return __make_ref(__n);
   }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_reference operator[](size_type __n) const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_reference
+  operator[](size_type __n) const {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(__n < size(), "vector<bool>::operator[] index out of bounds");
     return __make_ref(__n);
   }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reference at(size_type __n);
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_reference at(size_type __n) const;
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reference at(size_type __n);
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_reference at(size_type __n) const;
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reference front() {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reference front() {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(!empty(), "vector<bool>::front() called on an empty vector");
     return __make_ref(0);
   }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_reference front() const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_reference front() const {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(!empty(), "vector<bool>::front() called on an empty vector");
     return __make_ref(0);
   }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reference back() {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reference back() {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(!empty(), "vector<bool>::back() called on an empty vector");
     return __make_ref(__size_ - 1);
   }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_reference back() const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_reference back() const {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(!empty(), "vector<bool>::back() called on an empty vector");
     return __make_ref(__size_ - 1);
   }
@@ -463,7 +479,6 @@ private:
     return (__new_size + (__bits_per_word - 1)) & ~((size_type)__bits_per_word - 1);
   }
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type __recommend(size_type __new_size) const;
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __construct_at_end(size_type __n, bool __x);
   template <class _InputIterator, class _Sentinel>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
   __construct_at_end(_InputIterator __first, _Sentinel __last, size_type __n);
@@ -552,20 +567,6 @@ vector<bool, _Allocator>::__recommend(size_type __new_size) const {
   return std::max<size_type>(2 * __cap, __align_it(__new_size));
 }
 
-//  Default constructs __n objects starting at __end_
-//  Precondition:  size() + __n <= capacity()
-//  Postcondition:  size() == size() + __n
-template <class _Allocator>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
-vector<bool, _Allocator>::__construct_at_end(size_type __n, bool __x) {
-  _LIBCPP_ASSERT_INTERNAL(
-      capacity() >= size() + __n, "vector<bool>::__construct_at_end called with insufficient capacity");
-  std::fill_n(end(), __n, __x);
-  this->__size_ += __n;
-  if (end().__ctz_ != 0) // Ensure uninitialized leading bits in the last word are set to zero
-    std::fill_n(end(), __bits_per_word - end().__ctz_, 0);
-}
-
 template <class _Allocator>
 template <class _InputIterator, class _Sentinel>
 _LIBCPP_CONSTEXPR_SINCE_CXX20 void
@@ -598,7 +599,8 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 vector<bool, _Allocator>::vector(size_type __n)
     : __begin_(nullptr), __size_(0), __cap_(0) {
   if (__n > 0) {
     __vallocate(__n);
-    __construct_at_end(__n, false);
+    std::fill_n(__begin_, __external_cap_to_internal(__n), __storage_type(0));
+    __size_ = __n;
   }
 }
 
@@ -608,7 +610,8 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 vector<bool, _Allocator>::vector(size_type __n, co
     : __begin_(nullptr), __size_(0), __cap_(0), __alloc_(static_cast<__storage_allocator>(__a)) {
   if (__n > 0) {
     __vallocate(__n);
-    __construct_at_end(__n, false);
+    std::fill_n(__begin_, __external_cap_to_internal(__n), __storage_type(0));
+    __size_ = __n;
   }
 }
 #endif
@@ -618,7 +621,8 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 vector<bool, _Allocator>::vector(size_type __n, co
     : __begin_(nullptr), __size_(0), __cap_(0) {
   if (__n > 0) {
     __vallocate(__n);
-    __construct_at_end(__n, __x);
+    std::fill_n(__begin_, __external_cap_to_internal(__n), __storage_type(0) - __x);
+    __size_ = __n;
   }
 }
 
@@ -628,7 +632,8 @@ vector<bool, _Allocator>::vector(size_type __n, const value_type& __x, const all
     : __begin_(nullptr), __size_(0), __cap_(0), __alloc_(static_cast<__storage_allocator>(__a)) {
   if (__n > 0) {
     __vallocate(__n);
-    __construct_at_end(__n, __x);
+    std::fill_n(__begin_, __external_cap_to_internal(__n), __storage_type(0) - __x);
+    __size_ = __n;
   }
 }
 
@@ -697,7 +702,8 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 vector<bool, _Allocator>::vector(const vector& __v
       __alloc_(__storage_traits::select_on_container_copy_construction(__v.__alloc_)) {
   if (__v.size() > 0) {
     __vallocate(__v.size());
-    __construct_at_end(__v.begin(), __v.end(), __v.size());
+    std::copy_n(__v.__begin_, __external_cap_to_internal(__v.size()), __begin_);
+    __size_ = __v.size();
   }
 }
 
@@ -706,7 +712,8 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 vector<bool, _Allocator>::vector(const vector& __v
     : __begin_(nullptr), __size_(0), __cap_(0), __alloc_(__a) {
   if (__v.size() > 0) {
     __vallocate(__v.size());
-    __construct_at_end(__v.begin(), __v.end(), __v.size());
+    std::copy_n(__v.__begin_, __external_cap_to_internal(__v.size()), __begin_);
+    __size_ = __v.size();
   }
 }
 
@@ -719,7 +726,7 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 vector<bool, _Allocator>& vector<bool, _Allocator>
         __vdeallocate();
         __vallocate(__v.__size_);
       }
-      std::copy(__v.__begin_, __v.__begin_ + __external_cap_to_internal(__v.__size_), __begin_);
+      std::copy_n(__v.__begin_, __external_cap_to_internal(__v.size()), __begin_);
     }
     __size_ = __v.__size_;
   }
@@ -754,7 +761,8 @@ vector<bool, _Allocator>::vector(vector&& __v, const __type_identity_t<allocator
     __v.__cap_ = __v.__size_ = 0;
   } else if (__v.size() > 0) {
     __vallocate(__v.size());
-    __construct_at_end(__v.begin(), __v.end(), __v.size());
+    __size_ = __v.__size_;
+    std::copy_n(__v.__begin_, __external_cap_to_internal(__v.size()), __begin_);
   }
 }
 
@@ -849,7 +857,8 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 void vector<bool, _Allocator>::reserve(size_type _
       this->__throw_length_error();
     vector __v(this->get_allocator());
     __v.__vallocate(__n);
-    __v.__construct_at_end(this->begin(), this->end(), this->size());
+    __v.__size_ = __size_;
+    std::copy_n(__begin_, __external_cap_to_internal(__size_), __v.__begin_);
     swap(__v);
   }
 }
@@ -956,21 +965,14 @@ vector<bool, _Allocator>::__insert_with_sentinel(const_iterator __position, _Inp
   }
   vector __v(get_allocator());
   if (__first != __last) {
-#if _LIBCPP_HAS_EXCEPTIONS
-    try {
-#endif // _LIBCPP_HAS_EXCEPTIONS
-      __v.__assign_with_sentinel(std::move(__first), std::move(__last));
-      difference_type __old_size = static_cast<difference_type>(__old_end - begin());
-      difference_type __old_p    = __p - begin();
-      reserve(__recommend(size() + __v.size()));
-      __p       = begin() + __old_p;
-      __old_end = begin() + __old_size;
-#if _LIBCPP_HAS_EXCEPTIONS
-    } catch (...) {
-      erase(__old_end, end());
-      throw;
-    }
-#endif // _LIBCPP_HAS_EXCEPTIONS
+    auto __guard = std::__make_exception_guard([&] { erase(__old_end, end()); });
+    __v.__assign_with_sentinel(std::move(__first), std::move(__last));
+    difference_type __old_size = static_cast<difference_type>(__old_end - begin());
+    difference_type __old_p    = __p - begin();
+    reserve(__recommend(size() + __v.size()));
+    __p       = begin() + __old_p;
+    __old_end = begin() + __old_size;
+    __guard.__complete();
   }
   __p = std::rotate(__p, __old_end, end());
   insert(__p, __v.begin(), __v.end());
@@ -1048,25 +1050,16 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 void vector<bool, _Allocator>::swap(vector& __x)
 }
 
 template <class _Allocator>
-_LIBCPP_CONSTEXPR_SINCE_CXX20 void vector<bool, _Allocator>::resize(size_type __sz, value_type __x) {
-  size_type __cs = size();
-  if (__cs < __sz) {
-    iterator __r;
-    size_type __c = capacity();
-    size_type __n = __sz - __cs;
-    if (__n <= __c && __cs <= __c - __n) {
-      __r = end();
-      __size_ += __n;
-    } else {
-      vector __v(get_allocator());
-      __v.reserve(__recommend(__size_ + __n));
-      __v.__size_ = __size_ + __n;
-      __r         = std::copy(cbegin(), cend(), __v.begin());
-      swap(__v);
-    }
-    std::fill_n(__r, __n, __x);
-  } else
-    __size_ = __sz;
+_LIBCPP_CONSTEXPR_SINCE_CXX20 void vector<bool, _Allocator>::resize(size_type __new_size, value_type __x) {
+  size_type __current_size = size();
+  if (__new_size < __current_size) {
+    __size_ = __new_size;
+    return;
+  }
+
+  reserve(__new_size);
+  std::fill_n(end(), __new_size - __current_size, __x);
+  __size_ = __new_size;
 }
 
 template <class _Allocator>
diff --git a/lib/libcxx/include/any b/lib/libcxx/include/any
index 89bf3cf1f7..d9368df752 100644
--- a/lib/libcxx/include/any
+++ b/lib/libcxx/include/any
@@ -84,14 +84,12 @@ namespace std {
 #  include <__cxx03/__config>
 #else
 #  include <__config>
-#  include <__memory/allocator.h>
-#  include <__memory/allocator_destructor.h>
-#  include <__memory/allocator_traits.h>
-#  include <__memory/unique_ptr.h>
+#  include <__memory/construct_at.h>
+#  include <__new/allocate.h>
 #  include <__type_traits/add_cv_quals.h>
 #  include <__type_traits/add_pointer.h>
-#  include <__type_traits/aligned_storage.h>
 #  include <__type_traits/conditional.h>
+#  include <__type_traits/conjunction.h>
 #  include <__type_traits/decay.h>
 #  include <__type_traits/enable_if.h>
 #  include <__type_traits/is_constructible.h>
@@ -100,9 +98,11 @@ namespace std {
 #  include <__type_traits/is_reference.h>
 #  include <__type_traits/is_same.h>
 #  include <__type_traits/is_void.h>
+#  include <__type_traits/negation.h>
 #  include <__type_traits/remove_cv.h>
 #  include <__type_traits/remove_cvref.h>
 #  include <__type_traits/remove_reference.h>
+#  include <__utility/exception_guard.h>
 #  include <__utility/forward.h>
 #  include <__utility/in_place.h>
 #  include <__utility/move.h>
@@ -142,21 +142,20 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 class any;
 
 template <class _ValueType>
-_LIBCPP_HIDE_FROM_ABI add_pointer_t<add_const_t<_ValueType>> any_cast(any const*) _NOEXCEPT;
+_LIBCPP_HIDE_FROM_ABI add_pointer_t<add_const_t<_ValueType>> any_cast(any const*) noexcept;
 
 template <class _ValueType>
-_LIBCPP_HIDE_FROM_ABI add_pointer_t<_ValueType> any_cast(any*) _NOEXCEPT;
+_LIBCPP_HIDE_FROM_ABI add_pointer_t<_ValueType> any_cast(any*) noexcept;
 
 namespace __any_imp {
-_LIBCPP_SUPPRESS_DEPRECATED_PUSH
-using _Buffer _LIBCPP_NODEBUG = aligned_storage_t<3 * sizeof(void*), alignof(void*)>;
-_LIBCPP_SUPPRESS_DEPRECATED_POP
+inline constexpr size_t __small_buffer_size      = 3 * sizeof(void*);
+inline constexpr size_t __small_buffer_alignment = alignof(void*);
 
 template <class _Tp>
 using _IsSmallObject _LIBCPP_NODEBUG =
     integral_constant<bool,
-                      sizeof(_Tp) <= sizeof(_Buffer) && alignof(_Buffer) % alignof(_Tp) == 0 &&
-                          is_nothrow_move_constructible<_Tp>::value >;
+                      sizeof(_Tp) <= __small_buffer_size && alignof(_Tp) <= __small_buffer_alignment &&
+                          is_nothrow_move_constructible_v<_Tp>>;
 
 enum class _Action { _Destroy, _Copy, _Move, _Get, _TypeInfo };
 
@@ -192,37 +191,44 @@ using _Handler _LIBCPP_NODEBUG = conditional_t< _IsSmallObject<_Tp>::value, _Sma
 class any {
 public:
   // construct/destruct
-  _LIBCPP_HIDE_FROM_ABI constexpr any() _NOEXCEPT : __h_(nullptr) {}
+  _LIBCPP_HIDE_FROM_ABI constexpr any() noexcept : __h_(nullptr) {}
 
   _LIBCPP_HIDE_FROM_ABI any(any const& __other) : __h_(nullptr) {
     if (__other.__h_)
       __other.__call(_Action::_Copy, this);
   }
 
-  _LIBCPP_HIDE_FROM_ABI any(any&& __other) _NOEXCEPT : __h_(nullptr) {
+  _LIBCPP_HIDE_FROM_ABI any(any&& __other) noexcept : __h_(nullptr) {
     if (__other.__h_)
       __other.__call(_Action::_Move, this);
   }
 
-  template < class _ValueType,
-             class _Tp = decay_t<_ValueType>,
-             class     = enable_if_t< !is_same<_Tp, any>::value && !__is_inplace_type<_ValueType>::value &&
-                                      is_copy_constructible<_Tp>::value> >
-  _LIBCPP_HIDE_FROM_ABI any(_ValueType&& __value);
+  template <
+      class _ValueType,
+      class _Tp        = decay_t<_ValueType>,
+      enable_if_t<_And<_Not<is_same<_Tp, any>>, _Not<__is_inplace_type<_ValueType>>, is_copy_constructible<_Tp>>::value,
+                  int> = 0>
+  _LIBCPP_HIDE_FROM_ABI any(_ValueType&& __value) : __h_(nullptr) {
+    __any_imp::_Handler<_Tp>::__create(*this, std::forward<_ValueType>(__value));
+  }
 
   template <class _ValueType,
             class... _Args,
-            class _Tp = decay_t<_ValueType>,
-            class     = enable_if_t< is_constructible<_Tp, _Args...>::value && is_copy_constructible<_Tp>::value > >
-  _LIBCPP_HIDE_FROM_ABI explicit any(in_place_type_t<_ValueType>, _Args&&... __args);
+            class _Tp                                                                           = decay_t<_ValueType>,
+            enable_if_t<is_constructible_v<_Tp, _Args...> && is_copy_constructible_v<_Tp>, int> = 0>
+  _LIBCPP_HIDE_FROM_ABI explicit any(in_place_type_t<_ValueType>, _Args&&... __args) {
+    __any_imp::_Handler<_Tp>::__create(*this, std::forward<_Args>(__args)...);
+  }
 
-  template <class _ValueType,
-            class _Up,
-            class... _Args,
-            class _Tp = decay_t<_ValueType>,
-            class     = enable_if_t< is_constructible<_Tp, initializer_list<_Up>&, _Args...>::value &&
-                                     is_copy_constructible<_Tp>::value> >
-  _LIBCPP_HIDE_FROM_ABI explicit any(in_place_type_t<_ValueType>, initializer_list<_Up>, _Args&&... __args);
+  template <
+      class _ValueType,
+      class _Up,
+      class... _Args,
+      class _Tp = decay_t<_ValueType>,
+      enable_if_t<is_constructible_v<_Tp, initializer_list<_Up>&, _Args...> && is_copy_constructible_v<_Tp>, int> = 0>
+  _LIBCPP_HIDE_FROM_ABI explicit any(in_place_type_t<_ValueType>, initializer_list<_Up> __il, _Args&&... __args) {
+    __any_imp::_Handler<_Tp>::__create(*this, __il, std::forward<_Args>(__args)...);
+  }
 
   _LIBCPP_HIDE_FROM_ABI ~any() { this->reset(); }
 
@@ -232,43 +238,65 @@ public:
     return *this;
   }
 
-  _LIBCPP_HIDE_FROM_ABI any& operator=(any&& __rhs) _NOEXCEPT {
+  _LIBCPP_HIDE_FROM_ABI any& operator=(any&& __rhs) noexcept {
     any(std::move(__rhs)).swap(*this);
     return *this;
   }
 
-  template < class _ValueType,
-             class _Tp = decay_t<_ValueType>,
-             class     = enable_if_t< !is_same<_Tp, any>::value && is_copy_constructible<_Tp>::value> >
-  _LIBCPP_HIDE_FROM_ABI any& operator=(_ValueType&& __rhs);
+  template <class _ValueType,
+            class _Tp                                                              = decay_t<_ValueType>,
+            enable_if_t<!is_same_v<_Tp, any> && is_copy_constructible_v<_Tp>, int> = 0>
+  _LIBCPP_HIDE_FROM_ABI any& operator=(_ValueType&& __rhs) {
+    any(std::forward<_ValueType>(__rhs)).swap(*this);
+    return *this;
+  }
 
   template <class _ValueType,
             class... _Args,
-            class _Tp = decay_t<_ValueType>,
-            class     = enable_if_t< is_constructible<_Tp, _Args...>::value && is_copy_constructible<_Tp>::value> >
-  _LIBCPP_HIDE_FROM_ABI _Tp& emplace(_Args&&...);
+            class _Tp                                                                           = decay_t<_ValueType>,
+            enable_if_t<is_constructible_v<_Tp, _Args...> && is_copy_constructible_v<_Tp>, int> = 0>
+  _LIBCPP_HIDE_FROM_ABI _Tp& emplace(_Args&&... __args) {
+    reset();
+    return __any_imp::_Handler<_Tp>::__create(*this, std::forward<_Args>(__args)...);
+  }
 
-  template <class _ValueType,
-            class _Up,
-            class... _Args,
-            class _Tp = decay_t<_ValueType>,
-            class     = enable_if_t< is_constructible<_Tp, initializer_list<_Up>&, _Args...>::value &&
-                                     is_copy_constructible<_Tp>::value> >
-  _LIBCPP_HIDE_FROM_ABI _Tp& emplace(initializer_list<_Up>, _Args&&...);
+  template <
+      class _ValueType,
+      class _Up,
+      class... _Args,
+      class _Tp = decay_t<_ValueType>,
+      enable_if_t<is_constructible_v<_Tp, initializer_list<_Up>&, _Args...> && is_copy_constructible_v<_Tp>, int> = 0>
+  _LIBCPP_HIDE_FROM_ABI _Tp& emplace(initializer_list<_Up> __il, _Args&&... __args) {
+    reset();
+    return __any_imp::_Handler<_Tp>::__create(*this, __il, std::forward<_Args>(__args)...);
+  }
 
   // 6.3.3 any modifiers
-  _LIBCPP_HIDE_FROM_ABI void reset() _NOEXCEPT {
+  _LIBCPP_HIDE_FROM_ABI void reset() noexcept {
     if (__h_)
       this->__call(_Action::_Destroy);
   }
 
-  _LIBCPP_HIDE_FROM_ABI void swap(any& __rhs) _NOEXCEPT;
+  _LIBCPP_HIDE_FROM_ABI void swap(any& __rhs) noexcept {
+    if (this == &__rhs)
+      return;
+    if (__h_ && __rhs.__h_) {
+      any __tmp;
+      __rhs.__call(_Action::_Move, &__tmp);
+      this->__call(_Action::_Move, &__rhs);
+      __tmp.__call(_Action::_Move, this);
+    } else if (__h_) {
+      this->__call(_Action::_Move, &__rhs);
+    } else if (__rhs.__h_) {
+      __rhs.__call(_Action::_Move, this);
+    }
+  }
 
   // 6.3.4 any observers
-  _LIBCPP_HIDE_FROM_ABI bool has_value() const _NOEXCEPT { return __h_ != nullptr; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool has_value() const noexcept { return __h_ != nullptr; }
 
 #    if _LIBCPP_HAS_RTTI
-  _LIBCPP_HIDE_FROM_ABI const type_info& type() const _NOEXCEPT {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI const type_info& type() const noexcept {
     if (__h_) {
       return *static_cast<type_info const*>(this->__call(_Action::_TypeInfo));
     } else {
@@ -285,7 +313,7 @@ private:
   union _Storage {
     _LIBCPP_HIDE_FROM_ABI constexpr _Storage() : __ptr(nullptr) {}
     void* __ptr;
-    __any_imp::_Buffer __buf;
+    alignas(__any_imp::__small_buffer_alignment) char __buf[__any_imp::__small_buffer_size];
   };
 
   _LIBCPP_HIDE_FROM_ABI void*
@@ -305,10 +333,10 @@ private:
   friend struct __any_imp::_LargeHandler;
 
   template <class _ValueType>
-  friend add_pointer_t<add_const_t<_ValueType>> any_cast(any const*) _NOEXCEPT;
+  friend add_pointer_t<add_const_t<_ValueType>> any_cast(any const*) noexcept;
 
   template <class _ValueType>
-  friend add_pointer_t<_ValueType> any_cast(any*) _NOEXCEPT;
+  friend add_pointer_t<_ValueType> any_cast(any*) noexcept;
 
   _HandleFuncPtr __h_ = nullptr;
   _Storage __s_;
@@ -339,22 +367,14 @@ struct _SmallHandler {
 
   template <class... _Args>
   _LIBCPP_HIDE_FROM_ABI static _Tp& __create(any& __dest, _Args&&... __args) {
-    typedef allocator<_Tp> _Alloc;
-    typedef allocator_traits<_Alloc> _ATraits;
-    _Alloc __a;
-    _Tp* __ret = static_cast<_Tp*>(static_cast<void*>(&__dest.__s_.__buf));
-    _ATraits::construct(__a, __ret, std::forward<_Args>(__args)...);
+    auto __ret  = std::__construct_at(reinterpret_cast<_Tp*>(&__dest.__s_.__buf), std::forward<_Args>(__args)...);
     __dest.__h_ = &_SmallHandler::__handle;
     return *__ret;
   }
 
 private:
   _LIBCPP_HIDE_FROM_ABI static void __destroy(any& __this) {
-    typedef allocator<_Tp> _Alloc;
-    typedef allocator_traits<_Alloc> _ATraits;
-    _Alloc __a;
-    _Tp* __p = static_cast<_Tp*>(static_cast<void*>(&__this.__s_.__buf));
-    _ATraits::destroy(__a, __p);
+    std::__destroy_at(reinterpret_cast<_Tp*>(&__this.__s_.__buf));
     __this.__h_ = nullptr;
   }
 
@@ -406,26 +426,20 @@ struct _LargeHandler {
 
   template <class... _Args>
   _LIBCPP_HIDE_FROM_ABI static _Tp& __create(any& __dest, _Args&&... __args) {
-    typedef allocator<_Tp> _Alloc;
-    typedef allocator_traits<_Alloc> _ATraits;
-    typedef __allocator_destructor<_Alloc> _Dp;
-    _Alloc __a;
-    unique_ptr<_Tp, _Dp> __hold(_ATraits::allocate(__a, 1), _Dp(__a, 1));
-    _Tp* __ret = __hold.get();
-    _ATraits::construct(__a, __ret, std::forward<_Args>(__args)...);
-    __dest.__s_.__ptr = __hold.release();
+    _Tp* __ptr = static_cast<_Tp*>(std::__libcpp_allocate<_Tp>(__element_count(1)));
+    std::__exception_guard __guard([&] { std::__libcpp_deallocate<_Tp>(__ptr, __element_count(1)); });
+    std::__construct_at(__ptr, std::forward<_Args>(__args)...);
+    __guard.__complete();
+    __dest.__s_.__ptr = __ptr;
     __dest.__h_       = &_LargeHandler::__handle;
-    return *__ret;
+    return *__ptr;
   }
 
 private:
   _LIBCPP_HIDE_FROM_ABI static void __destroy(any& __this) {
-    typedef allocator<_Tp> _Alloc;
-    typedef allocator_traits<_Alloc> _ATraits;
-    _Alloc __a;
     _Tp* __p = static_cast<_Tp*>(__this.__s_.__ptr);
-    _ATraits::destroy(__a, __p);
-    _ATraits::deallocate(__a, __p, 1);
+    std::__destroy_at(__p);
+    std::__libcpp_deallocate<_Tp>(__p, __element_count(1));
     __this.__h_ = nullptr;
   }
 
@@ -456,72 +470,24 @@ private:
 
 } // namespace __any_imp
 
-template <class _ValueType, class _Tp, class>
-any::any(_ValueType&& __v) : __h_(nullptr) {
-  __any_imp::_Handler<_Tp>::__create(*this, std::forward<_ValueType>(__v));
-}
-
-template <class _ValueType, class... _Args, class _Tp, class>
-any::any(in_place_type_t<_ValueType>, _Args&&... __args) {
-  __any_imp::_Handler<_Tp>::__create(*this, std::forward<_Args>(__args)...);
-}
-
-template <class _ValueType, class _Up, class... _Args, class _Tp, class>
-any::any(in_place_type_t<_ValueType>, initializer_list<_Up> __il, _Args&&... __args) {
-  __any_imp::_Handler<_Tp>::__create(*this, __il, std::forward<_Args>(__args)...);
-}
-
-template <class _ValueType, class, class>
-inline _LIBCPP_HIDE_FROM_ABI any& any::operator=(_ValueType&& __v) {
-  any(std::forward<_ValueType>(__v)).swap(*this);
-  return *this;
-}
-
-template <class _ValueType, class... _Args, class _Tp, class>
-inline _LIBCPP_HIDE_FROM_ABI _Tp& any::emplace(_Args&&... __args) {
-  reset();
-  return __any_imp::_Handler<_Tp>::__create(*this, std::forward<_Args>(__args)...);
-}
-
-template <class _ValueType, class _Up, class... _Args, class _Tp, class>
-inline _LIBCPP_HIDE_FROM_ABI _Tp& any::emplace(initializer_list<_Up> __il, _Args&&... __args) {
-  reset();
-  return __any_imp::_Handler<_Tp>::__create(*this, __il, std::forward<_Args>(__args)...);
-}
-
-inline _LIBCPP_HIDE_FROM_ABI void any::swap(any& __rhs) _NOEXCEPT {
-  if (this == &__rhs)
-    return;
-  if (__h_ && __rhs.__h_) {
-    any __tmp;
-    __rhs.__call(_Action::_Move, &__tmp);
-    this->__call(_Action::_Move, &__rhs);
-    __tmp.__call(_Action::_Move, this);
-  } else if (__h_) {
-    this->__call(_Action::_Move, &__rhs);
-  } else if (__rhs.__h_) {
-    __rhs.__call(_Action::_Move, this);
-  }
-}
-
 // 6.4 Non-member functions
 
-inline _LIBCPP_HIDE_FROM_ABI void swap(any& __lhs, any& __rhs) _NOEXCEPT { __lhs.swap(__rhs); }
+inline _LIBCPP_HIDE_FROM_ABI void swap(any& __lhs, any& __rhs) noexcept { __lhs.swap(__rhs); }
 
 template <class _Tp, class... _Args>
-inline _LIBCPP_HIDE_FROM_ABI any make_any(_Args&&... __args) {
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI any make_any(_Args&&... __args) {
   return any(in_place_type<_Tp>, std::forward<_Args>(__args)...);
 }
 
 template <class _Tp, class _Up, class... _Args>
-inline _LIBCPP_HIDE_FROM_ABI any make_any(initializer_list<_Up> __il, _Args&&... __args) {
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI any make_any(initializer_list<_Up> __il, _Args&&... __args) {
   return any(in_place_type<_Tp>, __il, std::forward<_Args>(__args)...);
 }
 
 template <class _ValueType>
-inline _LIBCPP_HIDE_FROM_ABI _ValueType any_cast(any const& __v) {
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI _ValueType any_cast(any const& __v) {
   using _RawValueType = __remove_cvref_t<_ValueType>;
-  static_assert(is_constructible<_ValueType, _RawValueType const&>::value,
+  static_assert(is_constructible_v<_ValueType, _RawValueType const&>,
                 "ValueType is required to be a const lvalue reference "
                 "or a CopyConstructible type");
   auto __tmp = std::any_cast<add_const_t<_RawValueType>>(&__v);
@@ -531,9 +497,9 @@ inline _LIBCPP_HIDE_FROM_ABI _ValueType any_cast(any const& __v) {
 }
 
 template <class _ValueType>
-inline _LIBCPP_HIDE_FROM_ABI _ValueType any_cast(any& __v) {
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI _ValueType any_cast(any& __v) {
   using _RawValueType = __remove_cvref_t<_ValueType>;
-  static_assert(is_constructible<_ValueType, _RawValueType&>::value,
+  static_assert(is_constructible_v<_ValueType, _RawValueType&>,
                 "ValueType is required to be an lvalue reference "
                 "or a CopyConstructible type");
   auto __tmp = std::any_cast<_RawValueType>(&__v);
@@ -543,9 +509,9 @@ inline _LIBCPP_HIDE_FROM_ABI _ValueType any_cast(any& __v) {
 }
 
 template <class _ValueType>
-inline _LIBCPP_HIDE_FROM_ABI _ValueType any_cast(any&& __v) {
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI _ValueType any_cast(any&& __v) {
   using _RawValueType = __remove_cvref_t<_ValueType>;
-  static_assert(is_constructible<_ValueType, _RawValueType>::value,
+  static_assert(is_constructible_v<_ValueType, _RawValueType>,
                 "ValueType is required to be an rvalue reference "
                 "or a CopyConstructible type");
   auto __tmp = std::any_cast<_RawValueType>(&__v);
@@ -555,39 +521,31 @@ inline _LIBCPP_HIDE_FROM_ABI _ValueType any_cast(any&& __v) {
 }
 
 template <class _ValueType>
-inline _LIBCPP_HIDE_FROM_ABI add_pointer_t<add_const_t<_ValueType>> any_cast(any const* __any) _NOEXCEPT {
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI add_pointer_t<add_const_t<_ValueType>> any_cast(any const* __any) noexcept {
   static_assert(!is_void_v<_ValueType>, "_ValueType may not be void.");
-  static_assert(!is_reference<_ValueType>::value, "_ValueType may not be a reference.");
+  static_assert(!is_reference_v<_ValueType>, "_ValueType may not be a reference.");
   return std::any_cast<_ValueType>(const_cast<any*>(__any));
 }
 
-template <class _RetType>
-inline _LIBCPP_HIDE_FROM_ABI _RetType __pointer_or_func_cast(void* __p, /*IsFunction*/ false_type) noexcept {
-  return static_cast<_RetType>(__p);
-}
-
-template <class _RetType>
-inline _LIBCPP_HIDE_FROM_ABI _RetType __pointer_or_func_cast(void*, /*IsFunction*/ true_type) noexcept {
-  return nullptr;
-}
-
 template <class _ValueType>
-_LIBCPP_HIDE_FROM_ABI add_pointer_t<_ValueType> any_cast(any* __any) _NOEXCEPT {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI add_pointer_t<_ValueType> any_cast(any* __any) noexcept {
   using __any_imp::_Action;
   static_assert(!is_void_v<_ValueType>, "_ValueType may not be void.");
-  static_assert(!is_reference<_ValueType>::value, "_ValueType may not be a reference.");
-  typedef add_pointer_t<_ValueType> _ReturnType;
-  if (__any && __any->__h_) {
-    void* __p = __any->__call(
-        _Action::_Get,
-        nullptr,
+  static_assert(!is_reference_v<_ValueType>, "_ValueType may not be a reference.");
+  if constexpr (!is_function_v<_ValueType>) {
+    using _ReturnType = add_pointer_t<_ValueType>;
+    if (__any && __any->__h_) {
+      void* __p = __any->__call(
+          _Action::_Get,
+          nullptr,
 #    if _LIBCPP_HAS_RTTI
-        &typeid(_ValueType),
+          &typeid(_ValueType),
 #    else
-        nullptr,
+          nullptr,
 #    endif
-        __any_imp::__get_fallback_typeid<_ValueType>());
-    return std::__pointer_or_func_cast<_ReturnType>(__p, is_function<_ValueType>{});
+          __any_imp::__get_fallback_typeid<_ValueType>());
+      return static_cast<_ReturnType>(__p);
+    }
   }
   return nullptr;
 }
@@ -613,6 +571,11 @@ _LIBCPP_POP_MACROS
 #    include <type_traits>
 #    include <variant>
 #  endif
+
+#  if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 23
+#    include <cstring>
+#    include <limits>
+#  endif
 #endif // __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)
 
 #endif // _LIBCPP_ANY
diff --git a/lib/libcxx/include/array b/lib/libcxx/include/array
index 9643fc1dd9..0b0c854589 100644
--- a/lib/libcxx/include/array
+++ b/lib/libcxx/include/array
@@ -134,7 +134,6 @@ template <size_t I, class T, size_t N> const T&& get(const array<T, N>&&) noexce
 #  include <__type_traits/is_const.h>
 #  include <__type_traits/is_constructible.h>
 #  include <__type_traits/is_nothrow_constructible.h>
-#  include <__type_traits/is_replaceable.h>
 #  include <__type_traits/is_same.h>
 #  include <__type_traits/is_swappable.h>
 #  include <__type_traits/is_trivially_relocatable.h>
@@ -176,7 +175,6 @@ template <class _Tp, size_t _Size>
 struct array {
   using __trivially_relocatable _LIBCPP_NODEBUG =
       __conditional_t<__libcpp_is_trivially_relocatable<_Tp>::value, array, void>;
-  using __replaceable _LIBCPP_NODEBUG = __conditional_t<__is_replaceable_v<_Tp>, array, void>;
 
   // types:
   using __self _LIBCPP_NODEBUG = array;
@@ -212,28 +210,28 @@ struct array {
   }
 
   // iterators:
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 iterator begin() _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 iterator begin() _NOEXCEPT {
 #  if defined(_LIBCPP_ABI_BOUNDED_ITERATORS_IN_STD_ARRAY)
     return std::__make_static_bounded_iter<_Size>(data(), data());
 #  else
     return iterator(data());
 #  endif
   }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 const_iterator begin() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 const_iterator begin() const _NOEXCEPT {
 #  if defined(_LIBCPP_ABI_BOUNDED_ITERATORS_IN_STD_ARRAY)
     return std::__make_static_bounded_iter<_Size>(data(), data());
 #  else
     return const_iterator(data());
 #  endif
   }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 iterator end() _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 iterator end() _NOEXCEPT {
 #  if defined(_LIBCPP_ABI_BOUNDED_ITERATORS_IN_STD_ARRAY)
     return std::__make_static_bounded_iter<_Size>(data() + _Size, data());
 #  else
     return iterator(data() + _Size);
 #  endif
   }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 const_iterator end() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 const_iterator end() const _NOEXCEPT {
 #  if defined(_LIBCPP_ABI_BOUNDED_ITERATORS_IN_STD_ARRAY)
     return std::__make_static_bounded_iter<_Size>(data() + _Size, data());
 #  else
@@ -241,62 +239,81 @@ struct array {
 #  endif
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 reverse_iterator rbegin() _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 reverse_iterator rbegin() _NOEXCEPT {
     return reverse_iterator(end());
   }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 const_reverse_iterator rbegin() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 const_reverse_iterator
+  rbegin() const _NOEXCEPT {
     return const_reverse_iterator(end());
   }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 reverse_iterator rend() _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 reverse_iterator rend() _NOEXCEPT {
     return reverse_iterator(begin());
   }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 const_reverse_iterator rend() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 const_reverse_iterator rend() const _NOEXCEPT {
     return const_reverse_iterator(begin());
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 const_iterator cbegin() const _NOEXCEPT { return begin(); }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 const_iterator cend() const _NOEXCEPT { return end(); }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 const_reverse_iterator crbegin() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 const_iterator cbegin() const _NOEXCEPT {
+    return begin();
+  }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 const_iterator cend() const _NOEXCEPT {
+    return end();
+  }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 const_reverse_iterator
+  crbegin() const _NOEXCEPT {
     return rbegin();
   }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 const_reverse_iterator crend() const _NOEXCEPT { return rend(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 const_reverse_iterator crend() const _NOEXCEPT {
+    return rend();
+  }
 
   // capacity:
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR size_type size() const _NOEXCEPT { return _Size; }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR size_type max_size() const _NOEXCEPT { return _Size; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR size_type size() const _NOEXCEPT { return _Size; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR size_type max_size() const _NOEXCEPT { return _Size; }
   [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool empty() const _NOEXCEPT { return _Size == 0; }
 
   // element access:
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 reference operator[](size_type __n) _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 reference operator[](size_type __n) _NOEXCEPT {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(__n < _Size, "out-of-bounds access in std::array<T, N>");
     return __elems_[__n];
   }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const_reference operator[](size_type __n) const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const_reference
+  operator[](size_type __n) const _NOEXCEPT {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(__n < _Size, "out-of-bounds access in std::array<T, N>");
     return __elems_[__n];
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 reference at(size_type __n) {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 reference at(size_type __n) {
     if (__n >= _Size)
       std::__throw_out_of_range("array::at");
     return __elems_[__n];
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const_reference at(size_type __n) const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const_reference at(size_type __n) const {
     if (__n >= _Size)
       std::__throw_out_of_range("array::at");
     return __elems_[__n];
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 reference front() _NOEXCEPT { return (*this)[0]; }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const_reference front() const _NOEXCEPT { return (*this)[0]; }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 reference back() _NOEXCEPT { return (*this)[_Size - 1]; }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const_reference back() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 reference front() _NOEXCEPT {
+    return (*this)[0];
+  }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const_reference front() const _NOEXCEPT {
+    return (*this)[0];
+  }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 reference back() _NOEXCEPT {
+    return (*this)[_Size - 1];
+  }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const_reference back() const _NOEXCEPT {
     return (*this)[_Size - 1];
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 value_type* data() _NOEXCEPT { return __elems_; }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 const value_type* data() const _NOEXCEPT { return __elems_; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 value_type* data() _NOEXCEPT {
+    return __elems_;
+  }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 const value_type* data() const _NOEXCEPT {
+    return __elems_;
+  }
 };
 
 template <class _Tp>
@@ -330,8 +347,10 @@ struct array<_Tp, 0> {
   };
   _ALIGNAS_TYPE(_ArrayInStructT) _EmptyType __elems_[sizeof(_ArrayInStructT)];
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 value_type* data() _NOEXCEPT { return nullptr; }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 const value_type* data() const _NOEXCEPT { return nullptr; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 value_type* data() _NOEXCEPT { return nullptr; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 const value_type* data() const _NOEXCEPT {
+    return nullptr;
+  }
 
   // No explicit construct/copy/destroy for aggregate type
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void fill(const value_type&) {
@@ -343,28 +362,28 @@ struct array<_Tp, 0> {
   }
 
   // iterators:
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 iterator begin() _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 iterator begin() _NOEXCEPT {
 #  if defined(_LIBCPP_ABI_BOUNDED_ITERATORS_IN_STD_ARRAY)
     return std::__make_static_bounded_iter<0>(data(), data());
 #  else
     return iterator(data());
 #  endif
   }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 const_iterator begin() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 const_iterator begin() const _NOEXCEPT {
 #  if defined(_LIBCPP_ABI_BOUNDED_ITERATORS_IN_STD_ARRAY)
     return std::__make_static_bounded_iter<0>(data(), data());
 #  else
     return const_iterator(data());
 #  endif
   }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 iterator end() _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 iterator end() _NOEXCEPT {
 #  if defined(_LIBCPP_ABI_BOUNDED_ITERATORS_IN_STD_ARRAY)
     return std::__make_static_bounded_iter<0>(data(), data());
 #  else
     return iterator(data());
 #  endif
   }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 const_iterator end() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 const_iterator end() const _NOEXCEPT {
 #  if defined(_LIBCPP_ABI_BOUNDED_ITERATORS_IN_STD_ARRAY)
     return std::__make_static_bounded_iter<0>(data(), data());
 #  else
@@ -372,68 +391,77 @@ struct array<_Tp, 0> {
 #  endif
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 reverse_iterator rbegin() _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 reverse_iterator rbegin() _NOEXCEPT {
     return reverse_iterator(end());
   }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 const_reverse_iterator rbegin() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 const_reverse_iterator
+  rbegin() const _NOEXCEPT {
     return const_reverse_iterator(end());
   }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 reverse_iterator rend() _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 reverse_iterator rend() _NOEXCEPT {
     return reverse_iterator(begin());
   }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 const_reverse_iterator rend() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 const_reverse_iterator rend() const _NOEXCEPT {
     return const_reverse_iterator(begin());
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 const_iterator cbegin() const _NOEXCEPT { return begin(); }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 const_iterator cend() const _NOEXCEPT { return end(); }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 const_reverse_iterator crbegin() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 const_iterator cbegin() const _NOEXCEPT {
+    return begin();
+  }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 const_iterator cend() const _NOEXCEPT {
+    return end();
+  }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 const_reverse_iterator
+  crbegin() const _NOEXCEPT {
     return rbegin();
   }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 const_reverse_iterator crend() const _NOEXCEPT { return rend(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 const_reverse_iterator crend() const _NOEXCEPT {
+    return rend();
+  }
 
   // capacity:
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR size_type size() const _NOEXCEPT { return 0; }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR size_type max_size() const _NOEXCEPT { return 0; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR size_type size() const _NOEXCEPT { return 0; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR size_type max_size() const _NOEXCEPT { return 0; }
   [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool empty() const _NOEXCEPT { return true; }
 
   // element access:
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 reference operator[](size_type) _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 reference operator[](size_type) _NOEXCEPT {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(false, "cannot call array<T, 0>::operator[] on a zero-sized array");
     __libcpp_unreachable();
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const_reference operator[](size_type) const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const_reference
+  operator[](size_type) const _NOEXCEPT {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(false, "cannot call array<T, 0>::operator[] on a zero-sized array");
     __libcpp_unreachable();
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 reference at(size_type) {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 reference at(size_type) {
     std::__throw_out_of_range("array<T, 0>::at");
     __libcpp_unreachable();
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const_reference at(size_type) const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const_reference at(size_type) const {
     std::__throw_out_of_range("array<T, 0>::at");
     __libcpp_unreachable();
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 reference front() _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 reference front() _NOEXCEPT {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(false, "cannot call array<T, 0>::front() on a zero-sized array");
     __libcpp_unreachable();
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const_reference front() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const_reference front() const _NOEXCEPT {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(false, "cannot call array<T, 0>::front() on a zero-sized array");
     __libcpp_unreachable();
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 reference back() _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 reference back() _NOEXCEPT {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(false, "cannot call array<T, 0>::back() on a zero-sized array");
     __libcpp_unreachable();
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const_reference back() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const_reference back() const _NOEXCEPT {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(false, "cannot call array<T, 0>::back() on a zero-sized array");
     __libcpp_unreachable();
   }
@@ -503,25 +531,29 @@ struct tuple_element<_Ip, array<_Tp, _Size> > {
 };
 
 template <size_t _Ip, class _Tp, size_t _Size>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp& get(array<_Tp, _Size>& __a) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp&
+get(array<_Tp, _Size>& __a) _NOEXCEPT {
   static_assert(_Ip < _Size, "Index out of bounds in std::get<> (std::array)");
   return __a.__elems_[_Ip];
 }
 
 template <size_t _Ip, class _Tp, size_t _Size>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const _Tp& get(const array<_Tp, _Size>& __a) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const _Tp&
+get(const array<_Tp, _Size>& __a) _NOEXCEPT {
   static_assert(_Ip < _Size, "Index out of bounds in std::get<> (const std::array)");
   return __a.__elems_[_Ip];
 }
 
 template <size_t _Ip, class _Tp, size_t _Size>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp&& get(array<_Tp, _Size>&& __a) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp&&
+get(array<_Tp, _Size>&& __a) _NOEXCEPT {
   static_assert(_Ip < _Size, "Index out of bounds in std::get<> (std::array &&)");
   return std::move(__a.__elems_[_Ip]);
 }
 
 template <size_t _Ip, class _Tp, size_t _Size>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const _Tp&& get(const array<_Tp, _Size>&& __a) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const _Tp&&
+get(const array<_Tp, _Size>&& __a) _NOEXCEPT {
   static_assert(_Ip < _Size, "Index out of bounds in std::get<> (const std::array &&)");
   return std::move(__a.__elems_[_Ip]);
 }
@@ -541,7 +573,7 @@ __to_array_rvalue_impl(_Tp (&&__arr)[_Size], index_sequence<_Index...>) {
 }
 
 template <typename _Tp, size_t _Size>
-_LIBCPP_HIDE_FROM_ABI constexpr array<remove_cv_t<_Tp>, _Size>
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr array<remove_cv_t<_Tp>, _Size>
 to_array(_Tp (&__arr)[_Size]) noexcept(is_nothrow_constructible_v<_Tp, _Tp&>) {
   static_assert(!is_array_v<_Tp>, "[array.creation]/1: to_array does not accept multidimensional arrays.");
   static_assert(is_constructible_v<_Tp, _Tp&>, "[array.creation]/1: to_array requires copy constructible elements.");
@@ -549,7 +581,7 @@ to_array(_Tp (&__arr)[_Size]) noexcept(is_nothrow_constructible_v<_Tp, _Tp&>) {
 }
 
 template <typename _Tp, size_t _Size>
-_LIBCPP_HIDE_FROM_ABI constexpr array<remove_cv_t<_Tp>, _Size>
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr array<remove_cv_t<_Tp>, _Size>
 to_array(_Tp (&&__arr)[_Size]) noexcept(is_nothrow_move_constructible_v<_Tp>) {
   static_assert(!is_array_v<_Tp>, "[array.creation]/4: to_array does not accept multidimensional arrays.");
   static_assert(is_move_constructible_v<_Tp>, "[array.creation]/4: to_array requires move constructible elements.");
diff --git a/lib/libcxx/include/atomic b/lib/libcxx/include/atomic
index 75af5de33c..23a3db5d35 100644
--- a/lib/libcxx/include/atomic
+++ b/lib/libcxx/include/atomic
@@ -608,6 +608,8 @@ template <class T>
 #  include <__atomic/atomic_init.h>
 #  include <__atomic/atomic_lock_free.h>
 #  include <__atomic/atomic_sync.h>
+#  include <__atomic/atomic_sync_timed.h>
+#  include <__atomic/atomic_waitable_traits.h>
 #  include <__atomic/check_memory_order.h>
 #  include <__atomic/contention_t.h>
 #  include <__atomic/fence.h>
diff --git a/lib/libcxx/include/barrier b/lib/libcxx/include/barrier
index 00e196963f..428a39a44e 100644
--- a/lib/libcxx/include/barrier
+++ b/lib/libcxx/include/barrier
@@ -57,8 +57,6 @@ namespace std
 #    include <__atomic/memory_order.h>
 #    include <__cstddef/ptrdiff_t.h>
 #    include <__memory/unique_ptr.h>
-#    include <__thread/poll_with_backoff.h>
-#    include <__thread/timed_backoff_policy.h>
 #    include <__utility/move.h>
 #    include <cstdint>
 #    include <limits>
@@ -97,19 +95,20 @@ using __barrier_phase_t _LIBCPP_NODEBUG = uint8_t;
 
 class __barrier_algorithm_base;
 
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI __barrier_algorithm_base*
-__construct_barrier_algorithm_base(ptrdiff_t& __expected);
+[[__gnu__::__returns_nonnull__, __gnu__::__malloc__]]
+_LIBCPP_EXPORTED_FROM_ABI __barrier_algorithm_base* __construct_barrier_algorithm_base(ptrdiff_t& __expected);
 
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI bool
-__arrive_barrier_algorithm_base(__barrier_algorithm_base* __barrier, __barrier_phase_t __old_phase) noexcept;
+_LIBCPP_EXPORTED_FROM_ABI bool
+__arrive_barrier_algorithm_base([[__gnu__::__nonnull__]] _LIBCPP_NOESCAPE __barrier_algorithm_base* __barrier,
+                                __barrier_phase_t __old_phase) noexcept;
 
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void
-__destroy_barrier_algorithm_base(__barrier_algorithm_base* __barrier) noexcept;
+_LIBCPP_EXPORTED_FROM_ABI void __destroy_barrier_algorithm_base(
+    [[__gnu__::__nonnull__]] _LIBCPP_NOESCAPE __barrier_algorithm_base* __barrier) noexcept;
 
 template <class _CompletionF>
 class __barrier_base {
   ptrdiff_t __expected_;
-  unique_ptr<__barrier_algorithm_base, void (*)(__barrier_algorithm_base*)> __base_;
+  unique_ptr<__barrier_algorithm_base, void (*)(_LIBCPP_NOESCAPE __barrier_algorithm_base*)> __base_;
   atomic<ptrdiff_t> __expected_adjustment_;
   _CompletionF __completion_;
   atomic<__barrier_phase_t> __phase_;
@@ -119,14 +118,13 @@ public:
 
   static _LIBCPP_HIDE_FROM_ABI constexpr ptrdiff_t max() noexcept { return numeric_limits<ptrdiff_t>::max(); }
 
-  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI
-  __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
+  _LIBCPP_HIDE_FROM_ABI __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
       : __expected_(__expected),
         __base_(std::__construct_barrier_algorithm_base(this->__expected_), &__destroy_barrier_algorithm_base),
         __expected_adjustment_(0),
         __completion_(std::move(__completion)),
         __phase_(0) {}
-  [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t __update) {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t __update) {
     _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
         __update <= __expected_, "update is greater than the expected count for the current barrier phase");
 
@@ -141,11 +139,10 @@ public:
       }
     return __old_phase;
   }
-  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __old_phase) const {
-    auto const __test_fn = [this, __old_phase]() -> bool { return __phase_.load(memory_order_acquire) != __old_phase; };
-    std::__libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
+  _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __old_phase) const {
+    __phase_.wait(__old_phase, std::memory_order_acquire);
   }
-  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() {
+  _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() {
     __expected_adjustment_.fetch_sub(1, memory_order_relaxed);
     (void)arrive(1);
   }
@@ -158,9 +155,10 @@ class barrier {
 public:
   using arrival_token = typename __barrier_base<_CompletionF>::arrival_token;
 
-  static _LIBCPP_HIDE_FROM_ABI constexpr ptrdiff_t max() noexcept { return __barrier_base<_CompletionF>::max(); }
+  [[nodiscard]] static _LIBCPP_HIDE_FROM_ABI constexpr ptrdiff_t max() noexcept {
+    return __barrier_base<_CompletionF>::max();
+  }
 
-  _LIBCPP_AVAILABILITY_SYNC
   _LIBCPP_HIDE_FROM_ABI explicit barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF())
       : __b_(__count, std::move(__completion)) {
     _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
@@ -175,15 +173,13 @@ public:
   barrier(barrier const&)            = delete;
   barrier& operator=(barrier const&) = delete;
 
-  [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t __update = 1) {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t __update = 1) {
     _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(__update > 0, "barrier:arrive must be called with a value greater than 0");
     return __b_.arrive(__update);
   }
-  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __phase) const {
-    __b_.wait(std::move(__phase));
-  }
-  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_wait() { wait(arrive()); }
-  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() { __b_.arrive_and_drop(); }
+  _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __phase) const { __b_.wait(std::move(__phase)); }
+  _LIBCPP_HIDE_FROM_ABI void arrive_and_wait() { wait(arrive()); }
+  _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() { __b_.arrive_and_drop(); }
 };
 
 _LIBCPP_END_NAMESPACE_STD
diff --git a/lib/libcxx/include/bitset b/lib/libcxx/include/bitset
index d109f27af5..37253f5722 100644
--- a/lib/libcxx/include/bitset
+++ b/lib/libcxx/include/bitset
@@ -147,7 +147,6 @@ template <size_t N> struct hash<std::bitset<N>>;
 #  include <__functional/hash.h>
 #  include <__functional/identity.h>
 #  include <__functional/unary_function.h>
-#  include <__tuple/tuple_indices.h>
 #  include <__type_traits/enable_if.h>
 #  include <__type_traits/integral_constant.h>
 #  include <__type_traits/is_char_like_type.h>
@@ -314,7 +313,7 @@ private:
   _LIBCPP_HIDE_FROM_ABI void __init(unsigned long long __v, true_type) _NOEXCEPT;
 #  else
   template <size_t... _Indices>
-  _LIBCPP_HIDE_FROM_ABI constexpr __bitset(unsigned long long __v, std::__tuple_indices<_Indices...>) _NOEXCEPT
+  _LIBCPP_HIDE_FROM_ABI constexpr __bitset(unsigned long long __v, __index_sequence<_Indices...>) _NOEXCEPT
       : __first_{static_cast<__storage_type>(__v >> (_Indices * __bits_per_word))...} {}
 #  endif // _LIBCPP_CXX03_LANG
 };
@@ -352,10 +351,9 @@ template <size_t _N_words, size_t _Size>
 inline _LIBCPP_CONSTEXPR __bitset<_N_words, _Size>::__bitset(unsigned long long __v) _NOEXCEPT
 #  ifndef _LIBCPP_CXX03_LANG
     : __bitset(__v,
-               std::__make_indices_imp< (_N_words < (sizeof(unsigned long long) - 1) / sizeof(__storage_type) + 1)
-                                            ? _N_words
-                                            : (sizeof(unsigned long long) - 1) / sizeof(__storage_type) + 1,
-                                        0>{})
+               __make_index_sequence<(_N_words < (sizeof(unsigned long long) - 1) / sizeof(__storage_type) + 1)
+                                         ? _N_words
+                                         : (sizeof(unsigned long long) - 1) / sizeof(__storage_type) + 1>())
 #  endif
 {
 #  ifdef _LIBCPP_CXX03_LANG
@@ -677,53 +675,63 @@ public:
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 bitset& set(size_t __pos, bool __val = true);
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 bitset& reset() _NOEXCEPT;
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 bitset& reset(size_t __pos);
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 bitset operator~() const _NOEXCEPT;
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 bitset operator~() const _NOEXCEPT;
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 bitset& flip() _NOEXCEPT;
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 bitset& flip(size_t __pos);
 
   // element access:
-#  ifdef _LIBCPP_ABI_BITSET_VECTOR_BOOL_CONST_SUBSCRIPT_RETURN_BOOL
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool operator[](size_t __p) const {
+  // TODO(LLVM 24): Remove the opt-out
+#  ifndef _LIBCPP_DEPRECATED_ABI_BITSET_CONST_SUBSCRIPT_RETURN_REF
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool operator[](size_t __p) const {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(__p < _Size, "bitset::operator[] index out of bounds");
     return __base::__make_ref(__p);
   }
 #  else
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR __const_reference operator[](size_t __p) const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR __const_reference operator[](size_t __p) const {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(__p < _Size, "bitset::operator[] index out of bounds");
     return __base::__make_ref(__p);
   }
 #  endif
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 reference operator[](size_t __p) {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 reference operator[](size_t __p) {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(__p < _Size, "bitset::operator[] index out of bounds");
     return __base::__make_ref(__p);
   }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unsigned long to_ulong() const { return __base::to_ulong(); }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unsigned long long to_ullong() const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unsigned long to_ulong() const {
+    return __base::to_ulong();
+  }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unsigned long long to_ullong() const {
     return __base::to_ullong();
   }
   template <class _CharT, class _Traits, class _Allocator>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 basic_string<_CharT, _Traits, _Allocator>
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 basic_string<_CharT, _Traits, _Allocator>
   to_string(_CharT __zero = _CharT('0'), _CharT __one = _CharT('1')) const;
   template <class _CharT, class _Traits>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 basic_string<_CharT, _Traits, allocator<_CharT> >
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI
+  _LIBCPP_CONSTEXPR_SINCE_CXX23 basic_string<_CharT, _Traits, allocator<_CharT> >
   to_string(_CharT __zero = _CharT('0'), _CharT __one = _CharT('1')) const;
   template <class _CharT>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 basic_string<_CharT, char_traits<_CharT>, allocator<_CharT> >
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI
+  _LIBCPP_CONSTEXPR_SINCE_CXX23 basic_string<_CharT, char_traits<_CharT>, allocator<_CharT> >
   to_string(_CharT __zero = _CharT('0'), _CharT __one = _CharT('1')) const;
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 basic_string<char, char_traits<char>, allocator<char> >
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI
+  _LIBCPP_CONSTEXPR_SINCE_CXX23 basic_string<char, char_traits<char>, allocator<char> >
   to_string(char __zero = '0', char __one = '1') const;
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 size_t count() const _NOEXCEPT;
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR size_t size() const _NOEXCEPT { return _Size; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 size_t count() const _NOEXCEPT;
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR size_t size() const _NOEXCEPT { return _Size; }
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 bool operator==(const bitset& __rhs) const _NOEXCEPT;
 #  if _LIBCPP_STD_VER <= 17
   _LIBCPP_HIDE_FROM_ABI bool operator!=(const bitset& __rhs) const _NOEXCEPT;
 #  endif
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 bool test(size_t __pos) const;
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 bool all() const _NOEXCEPT { return __base::all(); }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 bool any() const _NOEXCEPT { return __base::any(); }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 bool none() const _NOEXCEPT { return !any(); }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 bitset operator<<(size_t __pos) const _NOEXCEPT;
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 bitset operator>>(size_t __pos) const _NOEXCEPT;
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 bool test(size_t __pos) const;
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 bool all() const _NOEXCEPT {
+    return __base::all();
+  }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 bool any() const _NOEXCEPT {
+    return __base::any();
+  }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 bool none() const _NOEXCEPT { return !any(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 bitset operator<<(size_t __pos) const _NOEXCEPT;
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 bitset operator>>(size_t __pos) const _NOEXCEPT;
 
 private:
   template <class _CharT, class _Traits>
@@ -869,7 +877,16 @@ bitset<_Size>::to_string(char __zero, char __one) const {
 
 template <size_t _Size>
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 size_t bitset<_Size>::count() const _NOEXCEPT {
-  return static_cast<size_t>(std::count(__base::__make_iter(0), __base::__make_iter(_Size), true));
+#  if defined(_LIBCPP_COMPILER_CLANG_BASED) && !defined(_LIBCPP_CXX03_LANG)
+  if constexpr (_Size == 0) {
+    return 0;
+  } else if constexpr (_Size <= __base::__bits_per_word) {
+    return __builtin_popcountg(static_cast<unsigned _BitInt(_Size)>(__base::__first_));
+  } else
+#  endif
+  {
+    return static_cast<size_t>(std::count(__base::__make_iter(0), __base::__make_iter(_Size), true));
+  }
 }
 
 template <size_t _Size>
@@ -912,7 +929,7 @@ bitset<_Size>::operator>>(size_t __pos) const _NOEXCEPT {
 }
 
 template <size_t _Size>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 bitset<_Size>
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 bitset<_Size>
 operator&(const bitset<_Size>& __x, const bitset<_Size>& __y) _NOEXCEPT {
   bitset<_Size> __r = __x;
   __r &= __y;
@@ -920,7 +937,7 @@ operator&(const bitset<_Size>& __x, const bitset<_Size>& __y) _NOEXCEPT {
 }
 
 template <size_t _Size>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 bitset<_Size>
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 bitset<_Size>
 operator|(const bitset<_Size>& __x, const bitset<_Size>& __y) _NOEXCEPT {
   bitset<_Size> __r = __x;
   __r |= __y;
@@ -928,7 +945,7 @@ operator|(const bitset<_Size>& __x, const bitset<_Size>& __y) _NOEXCEPT {
 }
 
 template <size_t _Size>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 bitset<_Size>
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 bitset<_Size>
 operator^(const bitset<_Size>& __x, const bitset<_Size>& __y) _NOEXCEPT {
   bitset<_Size> __r = __x;
   __r ^= __y;
@@ -937,7 +954,9 @@ operator^(const bitset<_Size>& __x, const bitset<_Size>& __y) _NOEXCEPT {
 
 template <size_t _Size>
 struct hash<bitset<_Size> > : public __unary_function<bitset<_Size>, size_t> {
-  _LIBCPP_HIDE_FROM_ABI size_t operator()(const bitset<_Size>& __bs) const _NOEXCEPT { return __bs.__hash_code(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t operator()(const bitset<_Size>& __bs) const _NOEXCEPT {
+    return __bs.__hash_code();
+  }
 };
 
 template <class _CharT, class _Traits, size_t _Size>
diff --git a/lib/libcxx/include/ccomplex b/lib/libcxx/include/ccomplex
index ee7e088aac..c1cb039f83 100644
--- a/lib/libcxx/include/ccomplex
+++ b/lib/libcxx/include/ccomplex
@@ -26,18 +26,10 @@
 #    pragma GCC system_header
 #  endif
 
-#  if _LIBCPP_STD_VER >= 20
-
-using __standard_header_ccomplex
-    _LIBCPP_DEPRECATED_("removed in C++20. Include <complex> instead.") _LIBCPP_NODEBUG = void;
-using __use_standard_header_ccomplex _LIBCPP_NODEBUG                                    = __standard_header_ccomplex;
-
-#  elif _LIBCPP_STD_VER >= 17
-
-using __standard_header_ccomplex _LIBCPP_DEPRECATED_("Include <complex> instead.") _LIBCPP_NODEBUG = void;
-using __use_standard_header_ccomplex _LIBCPP_NODEBUG = __standard_header_ccomplex;
-
+#  if _LIBCPP_STD_VER >= 17 && !__building_module(std) && _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS
+#    warning <ccomplex> is deprecated in C++17 and removed in C++20. Include <complex> instead.
 #  endif
+
 #endif // __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)
 
 #endif // _LIBCPP_CCOMPLEX
diff --git a/lib/libcxx/include/chrono b/lib/libcxx/include/chrono
index 82e99a31bc..abb678e111 100644
--- a/lib/libcxx/include/chrono
+++ b/lib/libcxx/include/chrono
@@ -218,6 +218,9 @@ template <class ToDuration, class Rep, class Period>
 template <class ToDuration, class Rep, class Period>
     constexpr ToDuration round(const duration<Rep, Period>& d);    // C++17
 
+template <class T> struct is_clock;                                // C++20
+template <class T> inline constexpr bool is_clock_v = is_clock<T>::value;                   // C++20
+
 // duration I/O
 template<class charT, class traits, class Rep, class Period>       // C++20
   basic_ostream<charT, traits>&
@@ -1035,6 +1038,49 @@ constexpr chrono::year                                  operator ""y(unsigned lo
 }  // chrono_literals
 }  // literals
 
+namespace std {
+  template<class T>
+    struct hash;                                            // C++26
+  template<class Rep, class Period>
+    struct hash<chrono::duration<Rep, Period>>;             // C++26
+  template<class Clock, class Duration>
+    struct hash<chrono::time_point<Clock, Duration>>;       // C++26
+  template<>
+    struct hash<chrono::day>;                               // C++26
+  template<>
+    struct hash<chrono::month>;                             // C++26
+  template<>
+    struct hash<chrono::year>;                              // C++26
+  template<>
+    struct hash<chrono::weekday>;                           // C++26
+  template<>
+    struct hash<chrono::weekday_indexed>;                   // C++26
+  template<>
+    struct hash<chrono::weekday_last>;                      // C++26
+  template<>
+    struct hash<chrono::month_day>;                         // C++26
+  template<>
+    struct hash<chrono::month_day_last>;                    // C++26
+  template<>
+    struct hash<chrono::month_weekday>;                     // C++26
+  template<>
+    struct hash<chrono::month_weekday_last>;                // C++26
+  template<>
+    struct hash<chrono::year_month>;                        // C++26
+  template<>
+    struct hash<chrono::year_month_day>;                    // C++26
+  template<>
+    struct hash<chrono::year_month_day_last>;               // C++26
+  template<>
+    struct hash<chrono::year_month_weekday>;                // C++26
+  template<>
+    struct hash<chrono::year_month_weekday_last>;           // C++26
+  template<class Duration, class TimeZonePtr>
+    struct hash<chrono::zoned_time<Duration, TimeZonePtr>>; // C++26
+  template<>
+    struct hash<chrono::leap_second>;                       // C++26
+} // namespace std
+
 }  // std
 */
 
@@ -1057,6 +1103,7 @@ constexpr chrono::year                                  operator ""y(unsigned lo
 #    include <__chrono/day.h>
 #    include <__chrono/exception.h>
 #    include <__chrono/hh_mm_ss.h>
+#    include <__chrono/is_clock.h>
 #    include <__chrono/literals.h>
 #    include <__chrono/local_info.h>
 #    include <__chrono/month.h>
diff --git a/lib/libcxx/include/ciso646 b/lib/libcxx/include/ciso646
index 34164362dc..d9eae41291 100644
--- a/lib/libcxx/include/ciso646
+++ b/lib/libcxx/include/ciso646
@@ -24,13 +24,10 @@
 #    pragma GCC system_header
 #  endif
 
-#  if _LIBCPP_STD_VER >= 20
-
-using __standard_header_ciso646
-    _LIBCPP_DEPRECATED_("removed in C++20. Include <version> instead.") _LIBCPP_NODEBUG = void;
-using __use_standard_header_ciso646 _LIBCPP_NODEBUG                                     = __standard_header_ciso646;
-
+#  if _LIBCPP_STD_VER >= 20 && !__building_module(std) && _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS
+#    warning <ciso646> is removed in C++20. Include <version> instead.
 #  endif
+
 #endif // __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)
 
 #endif // _LIBCPP_CISO646
diff --git a/lib/libcxx/include/complex b/lib/libcxx/include/complex
index d8ec3d95c1..49ab388113 100644
--- a/lib/libcxx/include/complex
+++ b/lib/libcxx/include/complex
@@ -319,8 +319,8 @@ public:
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 complex(const complex<_Xp>& __c)
       : __re_(__c.real()), __im_(__c.imag()) {}
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 value_type real() const { return __re_; }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 value_type imag() const { return __im_; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 value_type real() const { return __re_; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 value_type imag() const { return __im_; }
 
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void real(value_type __re) { __re_ = __re; }
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void imag(value_type __im) { __im_ = __im; }
@@ -432,8 +432,8 @@ public:
   _LIBCPP_HIDE_FROM_ABI explicit _LIBCPP_CONSTEXPR complex(const complex<double>& __c);
   _LIBCPP_HIDE_FROM_ABI explicit _LIBCPP_CONSTEXPR complex(const complex<long double>& __c);
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR float real() const { return __re_; }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR float imag() const { return __im_; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR float real() const { return __re_; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR float imag() const { return __im_; }
 
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void real(value_type __re) { __re_ = __re; }
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void imag(value_type __im) { __im_ = __im; }
@@ -529,8 +529,8 @@ public:
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR complex(const complex<float>& __c);
   _LIBCPP_HIDE_FROM_ABI explicit _LIBCPP_CONSTEXPR complex(const complex<long double>& __c);
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR double real() const { return __re_; }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR double imag() const { return __im_; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR double real() const { return __re_; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR double imag() const { return __im_; }
 
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void real(value_type __re) { __re_ = __re; }
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void imag(value_type __im) { __im_ = __im; }
@@ -630,8 +630,8 @@ public:
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR complex(const complex<float>& __c);
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR complex(const complex<double>& __c);
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR long double real() const { return __re_; }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR long double imag() const { return __im_; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR long double real() const { return __re_; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR long double imag() const { return __im_; }
 
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void real(value_type __re) { __re_ = __re; }
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void imag(value_type __im) { __im_ = __im; }
@@ -732,7 +732,7 @@ inline _LIBCPP_CONSTEXPR complex<long double>::complex(const complex<double>& __
 // 26.3.6 operators:
 
 template <class _Tp>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 complex<_Tp>
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 complex<_Tp>
 operator+(const complex<_Tp>& __x, const complex<_Tp>& __y) {
   complex<_Tp> __t(__x);
   __t += __y;
@@ -740,7 +740,7 @@ operator+(const complex<_Tp>& __x, const complex<_Tp>& __y) {
 }
 
 template <class _Tp>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 complex<_Tp>
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 complex<_Tp>
 operator+(const complex<_Tp>& __x, const _Tp& __y) {
   complex<_Tp> __t(__x);
   __t += __y;
@@ -748,7 +748,7 @@ operator+(const complex<_Tp>& __x, const _Tp& __y) {
 }
 
 template <class _Tp>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 complex<_Tp>
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 complex<_Tp>
 operator+(const _Tp& __x, const complex<_Tp>& __y) {
   complex<_Tp> __t(__y);
   __t += __x;
@@ -756,7 +756,7 @@ operator+(const _Tp& __x, const complex<_Tp>& __y) {
 }
 
 template <class _Tp>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 complex<_Tp>
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 complex<_Tp>
 operator-(const complex<_Tp>& __x, const complex<_Tp>& __y) {
   complex<_Tp> __t(__x);
   __t -= __y;
@@ -764,7 +764,7 @@ operator-(const complex<_Tp>& __x, const complex<_Tp>& __y) {
 }
 
 template <class _Tp>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 complex<_Tp>
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 complex<_Tp>
 operator-(const complex<_Tp>& __x, const _Tp& __y) {
   complex<_Tp> __t(__x);
   __t -= __y;
@@ -772,7 +772,7 @@ operator-(const complex<_Tp>& __x, const _Tp& __y) {
 }
 
 template <class _Tp>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 complex<_Tp>
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 complex<_Tp>
 operator-(const _Tp& __x, const complex<_Tp>& __y) {
   complex<_Tp> __t(-__y);
   __t += __x;
@@ -780,13 +780,13 @@ operator-(const _Tp& __x, const complex<_Tp>& __y) {
 }
 
 template <class _Tp, __enable_if_t<is_floating_point<_Tp>::value, int> >
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 complex<_Tp>
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 complex<_Tp>
 operator*(const complex<_Tp>& __lhs, const complex<_Tp>& __rhs) {
   return complex<_Tp>(__from_builtin_tag(), __lhs.__builtin() * __rhs.__builtin());
 }
 
 template <class _Tp, __enable_if_t<!is_floating_point<_Tp>::value, int> >
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 complex<_Tp>
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 complex<_Tp>
 operator*(const complex<_Tp>& __z, const complex<_Tp>& __w) {
   _Tp __a = __z.real();
   _Tp __b = __z.imag();
@@ -797,7 +797,7 @@ operator*(const complex<_Tp>& __z, const complex<_Tp>& __w) {
 }
 
 template <class _Tp>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 complex<_Tp>
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 complex<_Tp>
 operator*(const complex<_Tp>& __x, const _Tp& __y) {
   complex<_Tp> __t(__x);
   __t *= __y;
@@ -805,7 +805,7 @@ operator*(const complex<_Tp>& __x, const _Tp& __y) {
 }
 
 template <class _Tp>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 complex<_Tp>
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 complex<_Tp>
 operator*(const _Tp& __x, const complex<_Tp>& __y) {
   complex<_Tp> __t(__y);
   __t *= __x;
@@ -813,13 +813,13 @@ operator*(const _Tp& __x, const complex<_Tp>& __y) {
 }
 
 template <class _Tp, __enable_if_t<is_floating_point<_Tp>::value, int> >
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 complex<_Tp>
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 complex<_Tp>
 operator/(const complex<_Tp>& __lhs, const complex<_Tp>& __rhs) {
   return complex<_Tp>(__from_builtin_tag(), __lhs.__builtin() / __rhs.__builtin());
 }
 
 template <class _Tp, __enable_if_t<!is_floating_point<_Tp>::value, int> >
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 complex<_Tp>
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 complex<_Tp>
 operator/(const complex<_Tp>& __z, const complex<_Tp>& __w) {
   _Tp __a = __z.real();
   _Tp __b = __z.imag();
@@ -831,13 +831,13 @@ operator/(const complex<_Tp>& __z, const complex<_Tp>& __w) {
 }
 
 template <class _Tp>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 complex<_Tp>
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 complex<_Tp>
 operator/(const complex<_Tp>& __x, const _Tp& __y) {
   return complex<_Tp>(__x.real() / __y, __x.imag() / __y);
 }
 
 template <class _Tp>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 complex<_Tp>
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 complex<_Tp>
 operator/(const _Tp& __x, const complex<_Tp>& __y) {
   complex<_Tp> __t(__x);
   __t /= __y;
@@ -845,12 +845,14 @@ operator/(const _Tp& __x, const complex<_Tp>& __y) {
 }
 
 template <class _Tp>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 complex<_Tp> operator+(const complex<_Tp>& __x) {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 complex<_Tp>
+operator+(const complex<_Tp>& __x) {
   return __x;
 }
 
 template <class _Tp>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 complex<_Tp> operator-(const complex<_Tp>& __x) {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 complex<_Tp>
+operator-(const complex<_Tp>& __x) {
   return complex<_Tp>(-__x.real(), -__x.imag());
 }
 
@@ -912,12 +914,13 @@ struct __libcpp_complex_overload_traits<_Tp, false, true> {
 // real
 
 template <class _Tp>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp real(const complex<_Tp>& __c) {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp real(const complex<_Tp>& __c) {
   return __c.real();
 }
 
 template <class _Tp>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 typename __libcpp_complex_overload_traits<_Tp>::_ValueType
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14
+typename __libcpp_complex_overload_traits<_Tp>::_ValueType
 real(_Tp __re) {
   return __re;
 }
@@ -925,12 +928,13 @@ real(_Tp __re) {
 // imag
 
 template <class _Tp>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp imag(const complex<_Tp>& __c) {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp imag(const complex<_Tp>& __c) {
   return __c.imag();
 }
 
 template <class _Tp>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 typename __libcpp_complex_overload_traits<_Tp>::_ValueType
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14
+typename __libcpp_complex_overload_traits<_Tp>::_ValueType
 imag(_Tp) {
   return 0;
 }
@@ -938,36 +942,36 @@ imag(_Tp) {
 // abs
 
 template <class _Tp>
-inline _LIBCPP_HIDE_FROM_ABI _Tp abs(const complex<_Tp>& __c) {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _Tp abs(const complex<_Tp>& __c) {
   return std::hypot(__c.real(), __c.imag());
 }
 
 // arg
 
 template <class _Tp>
-inline _LIBCPP_HIDE_FROM_ABI _Tp arg(const complex<_Tp>& __c) {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _Tp arg(const complex<_Tp>& __c) {
   return std::atan2(__c.imag(), __c.real());
 }
 
 template <class _Tp, __enable_if_t<is_same<_Tp, long double>::value, int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI long double arg(_Tp __re) {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI long double arg(_Tp __re) {
   return std::atan2l(0.L, __re);
 }
 
 template <class _Tp, __enable_if_t<is_integral<_Tp>::value || is_same<_Tp, double>::value, int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI double arg(_Tp __re) {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI double arg(_Tp __re) {
   return std::atan2(0., __re);
 }
 
 template <class _Tp, __enable_if_t<is_same<_Tp, float>::value, int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI float arg(_Tp __re) {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI float arg(_Tp __re) {
   return std::atan2f(0.F, __re);
 }
 
 // norm
 
 template <class _Tp>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Tp norm(const complex<_Tp>& __c) {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Tp norm(const complex<_Tp>& __c) {
   if (std::__constexpr_isinf(__c.real()))
     return std::abs(__c.real());
   if (std::__constexpr_isinf(__c.imag()))
@@ -976,7 +980,8 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Tp norm(const comple
 }
 
 template <class _Tp>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 typename __libcpp_complex_overload_traits<_Tp>::_ValueType
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
+typename __libcpp_complex_overload_traits<_Tp>::_ValueType
 norm(_Tp __re) {
   typedef typename __libcpp_complex_overload_traits<_Tp>::_ValueType _ValueType;
   return static_cast<_ValueType>(__re) * __re;
@@ -985,12 +990,14 @@ norm(_Tp __re) {
 // conj
 
 template <class _Tp>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 complex<_Tp> conj(const complex<_Tp>& __c) {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 complex<_Tp>
+conj(const complex<_Tp>& __c) {
   return complex<_Tp>(__c.real(), -__c.imag());
 }
 
 template <class _Tp>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 typename __libcpp_complex_overload_traits<_Tp>::_ComplexType
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
+typename __libcpp_complex_overload_traits<_Tp>::_ComplexType
 conj(_Tp __re) {
   typedef typename __libcpp_complex_overload_traits<_Tp>::_ComplexType _ComplexType;
   return _ComplexType(__re);
@@ -999,7 +1006,7 @@ conj(_Tp __re) {
 // proj
 
 template <class _Tp>
-inline _LIBCPP_HIDE_FROM_ABI complex<_Tp> proj(const complex<_Tp>& __c) {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI complex<_Tp> proj(const complex<_Tp>& __c) {
   complex<_Tp> __r = __c;
   if (std::isinf(__c.real()) || std::isinf(__c.imag()))
     __r = complex<_Tp>(INFINITY, std::copysign(_Tp(0), __c.imag()));
@@ -1007,14 +1014,16 @@ inline _LIBCPP_HIDE_FROM_ABI complex<_Tp> proj(const complex<_Tp>& __c) {
 }
 
 template <class _Tp, __enable_if_t<is_floating_point<_Tp>::value, int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI typename __libcpp_complex_overload_traits<_Tp>::_ComplexType proj(_Tp __re) {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI typename __libcpp_complex_overload_traits<_Tp>::_ComplexType
+proj(_Tp __re) {
   if (std::isinf(__re))
     __re = std::abs(__re);
   return complex<_Tp>(__re);
 }
 
 template <class _Tp, __enable_if_t<is_integral<_Tp>::value, int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI typename __libcpp_complex_overload_traits<_Tp>::_ComplexType proj(_Tp __re) {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI typename __libcpp_complex_overload_traits<_Tp>::_ComplexType
+proj(_Tp __re) {
   typedef typename __libcpp_complex_overload_traits<_Tp>::_ComplexType _ComplexType;
   return _ComplexType(__re);
 }
@@ -1022,7 +1031,7 @@ inline _LIBCPP_HIDE_FROM_ABI typename __libcpp_complex_overload_traits<_Tp>::_Co
 // polar
 
 template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI complex<_Tp> polar(const _Tp& __rho, const _Tp& __theta = _Tp()) {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI complex<_Tp> polar(const _Tp& __rho, const _Tp& __theta = _Tp()) {
   if (std::isnan(__rho) || std::signbit(__rho))
     return complex<_Tp>(_Tp(NAN), _Tp(NAN));
   if (std::isnan(__theta)) {
@@ -1047,21 +1056,21 @@ _LIBCPP_HIDE_FROM_ABI complex<_Tp> polar(const _Tp& __rho, const _Tp& __theta =
 // log
 
 template <class _Tp>
-inline _LIBCPP_HIDE_FROM_ABI complex<_Tp> log(const complex<_Tp>& __x) {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI complex<_Tp> log(const complex<_Tp>& __x) {
   return complex<_Tp>(std::log(std::abs(__x)), std::arg(__x));
 }
 
 // log10
 
 template <class _Tp>
-inline _LIBCPP_HIDE_FROM_ABI complex<_Tp> log10(const complex<_Tp>& __x) {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI complex<_Tp> log10(const complex<_Tp>& __x) {
   return std::log(__x) / std::log(_Tp(10));
 }
 
 // sqrt
 
 template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI complex<_Tp> sqrt(const complex<_Tp>& __x) {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI complex<_Tp> sqrt(const complex<_Tp>& __x) {
   if (std::isinf(__x.imag()))
     return complex<_Tp>(_Tp(INFINITY), __x.imag());
   if (std::isinf(__x.real())) {
@@ -1075,7 +1084,7 @@ _LIBCPP_HIDE_FROM_ABI complex<_Tp> sqrt(const complex<_Tp>& __x) {
 // exp
 
 template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI complex<_Tp> exp(const complex<_Tp>& __x) {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI complex<_Tp> exp(const complex<_Tp>& __x) {
   _Tp __i = __x.imag();
   if (__i == 0) {
     return complex<_Tp>(std::exp(__x.real()), std::copysign(_Tp(0), __x.imag()));
@@ -1097,24 +1106,27 @@ _LIBCPP_HIDE_FROM_ABI complex<_Tp> exp(const complex<_Tp>& __x) {
 // pow
 
 template <class _Tp>
-inline _LIBCPP_HIDE_FROM_ABI complex<_Tp> pow(const complex<_Tp>& __x, const complex<_Tp>& __y) {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI complex<_Tp> pow(const complex<_Tp>& __x, const complex<_Tp>& __y) {
   return std::exp(__y * std::log(__x));
 }
 
 template <class _Tp, class _Up, __enable_if_t<is_floating_point<_Tp>::value && is_floating_point<_Up>::value, int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI complex<__promote_t<_Tp, _Up> > pow(const complex<_Tp>& __x, const complex<_Up>& __y) {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI complex<__promote_t<_Tp, _Up> >
+pow(const complex<_Tp>& __x, const complex<_Up>& __y) {
   typedef complex<__promote_t<_Tp, _Up> > result_type;
   return std::pow(result_type(__x), result_type(__y));
 }
 
 template <class _Tp, class _Up, __enable_if_t<is_floating_point<_Tp>::value && is_arithmetic<_Up>::value, int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI complex<__promote_t<_Tp, _Up> > pow(const complex<_Tp>& __x, const _Up& __y) {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI complex<__promote_t<_Tp, _Up> >
+pow(const complex<_Tp>& __x, const _Up& __y) {
   typedef complex<__promote_t<_Tp, _Up> > result_type;
   return std::pow(result_type(__x), result_type(__y));
 }
 
 template <class _Tp, class _Up, __enable_if_t<is_arithmetic<_Tp>::value && is_floating_point<_Up>::value, int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI complex<__promote_t<_Tp, _Up> > pow(const _Tp& __x, const complex<_Up>& __y) {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI complex<__promote_t<_Tp, _Up> >
+pow(const _Tp& __x, const complex<_Up>& __y) {
   typedef complex<__promote_t<_Tp, _Up> > result_type;
   return std::pow(result_type(__x), result_type(__y));
 }
@@ -1129,7 +1141,7 @@ inline _LIBCPP_HIDE_FROM_ABI complex<_Tp> __sqr(const complex<_Tp>& __x) {
 // asinh
 
 template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI complex<_Tp> asinh(const complex<_Tp>& __x) {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI complex<_Tp> asinh(const complex<_Tp>& __x) {
   const _Tp __pi(atan2(+0., -0.));
   if (std::isinf(__x.real())) {
     if (std::isnan(__x.imag()))
@@ -1154,7 +1166,7 @@ _LIBCPP_HIDE_FROM_ABI complex<_Tp> asinh(const complex<_Tp>& __x) {
 // acosh
 
 template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI complex<_Tp> acosh(const complex<_Tp>& __x) {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI complex<_Tp> acosh(const complex<_Tp>& __x) {
   const _Tp __pi(atan2(+0., -0.));
   if (std::isinf(__x.real())) {
     if (std::isnan(__x.imag()))
@@ -1183,7 +1195,7 @@ _LIBCPP_HIDE_FROM_ABI complex<_Tp> acosh(const complex<_Tp>& __x) {
 // atanh
 
 template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI complex<_Tp> atanh(const complex<_Tp>& __x) {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI complex<_Tp> atanh(const complex<_Tp>& __x) {
   const _Tp __pi(atan2(+0., -0.));
   if (std::isinf(__x.imag())) {
     return complex<_Tp>(std::copysign(_Tp(0), __x.real()), std::copysign(__pi / _Tp(2), __x.imag()));
@@ -1209,7 +1221,7 @@ _LIBCPP_HIDE_FROM_ABI complex<_Tp> atanh(const complex<_Tp>& __x) {
 // sinh
 
 template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI complex<_Tp> sinh(const complex<_Tp>& __x) {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI complex<_Tp> sinh(const complex<_Tp>& __x) {
   if (std::isinf(__x.real()) && !std::isfinite(__x.imag()))
     return complex<_Tp>(__x.real(), _Tp(NAN));
   if (__x.real() == 0 && !std::isfinite(__x.imag()))
@@ -1222,7 +1234,7 @@ _LIBCPP_HIDE_FROM_ABI complex<_Tp> sinh(const complex<_Tp>& __x) {
 // cosh
 
 template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI complex<_Tp> cosh(const complex<_Tp>& __x) {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI complex<_Tp> cosh(const complex<_Tp>& __x) {
   if (std::isinf(__x.real()) && !std::isfinite(__x.imag()))
     return complex<_Tp>(std::abs(__x.real()), _Tp(NAN));
   if (__x.real() == 0 && !std::isfinite(__x.imag()))
@@ -1237,7 +1249,7 @@ _LIBCPP_HIDE_FROM_ABI complex<_Tp> cosh(const complex<_Tp>& __x) {
 // tanh
 
 template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI complex<_Tp> tanh(const complex<_Tp>& __x) {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI complex<_Tp> tanh(const complex<_Tp>& __x) {
   if (std::isinf(__x.real())) {
     if (!std::isfinite(__x.imag()))
       return complex<_Tp>(std::copysign(_Tp(1), __x.real()), _Tp(0));
@@ -1257,7 +1269,7 @@ _LIBCPP_HIDE_FROM_ABI complex<_Tp> tanh(const complex<_Tp>& __x) {
 // asin
 
 template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI complex<_Tp> asin(const complex<_Tp>& __x) {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI complex<_Tp> asin(const complex<_Tp>& __x) {
   complex<_Tp> __z = std::asinh(complex<_Tp>(-__x.imag(), __x.real()));
   return complex<_Tp>(__z.imag(), -__z.real());
 }
@@ -1265,7 +1277,7 @@ _LIBCPP_HIDE_FROM_ABI complex<_Tp> asin(const complex<_Tp>& __x) {
 // acos
 
 template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI complex<_Tp> acos(const complex<_Tp>& __x) {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI complex<_Tp> acos(const complex<_Tp>& __x) {
   const _Tp __pi(atan2(+0., -0.));
   if (std::isinf(__x.real())) {
     if (std::isnan(__x.imag()))
@@ -1297,7 +1309,7 @@ _LIBCPP_HIDE_FROM_ABI complex<_Tp> acos(const complex<_Tp>& __x) {
 // atan
 
 template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI complex<_Tp> atan(const complex<_Tp>& __x) {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI complex<_Tp> atan(const complex<_Tp>& __x) {
   complex<_Tp> __z = std::atanh(complex<_Tp>(-__x.imag(), __x.real()));
   return complex<_Tp>(__z.imag(), -__z.real());
 }
@@ -1305,7 +1317,7 @@ _LIBCPP_HIDE_FROM_ABI complex<_Tp> atan(const complex<_Tp>& __x) {
 // sin
 
 template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI complex<_Tp> sin(const complex<_Tp>& __x) {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI complex<_Tp> sin(const complex<_Tp>& __x) {
   complex<_Tp> __z = std::sinh(complex<_Tp>(-__x.imag(), __x.real()));
   return complex<_Tp>(__z.imag(), -__z.real());
 }
@@ -1313,14 +1325,14 @@ _LIBCPP_HIDE_FROM_ABI complex<_Tp> sin(const complex<_Tp>& __x) {
 // cos
 
 template <class _Tp>
-inline _LIBCPP_HIDE_FROM_ABI complex<_Tp> cos(const complex<_Tp>& __x) {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI complex<_Tp> cos(const complex<_Tp>& __x) {
   return std::cosh(complex<_Tp>(-__x.imag(), __x.real()));
 }
 
 // tan
 
 template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI complex<_Tp> tan(const complex<_Tp>& __x) {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI complex<_Tp> tan(const complex<_Tp>& __x) {
   complex<_Tp> __z = std::tanh(complex<_Tp>(-__x.imag(), __x.real()));
   return complex<_Tp>(__z.imag(), -__z.real());
 }
@@ -1398,7 +1410,7 @@ struct tuple_element<_Ip, complex<_Tp>> {
 };
 
 template <size_t _Ip, class _Xp>
-_LIBCPP_HIDE_FROM_ABI constexpr _Xp& get(complex<_Xp>& __z) noexcept {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI constexpr _Xp& get(complex<_Xp>& __z) noexcept {
   static_assert(_Ip < 2, "Index value is out of range.");
   if constexpr (_Ip == 0) {
     return __z.__re_;
@@ -1408,7 +1420,7 @@ _LIBCPP_HIDE_FROM_ABI constexpr _Xp& get(complex<_Xp>& __z) noexcept {
 }
 
 template <size_t _Ip, class _Xp>
-_LIBCPP_HIDE_FROM_ABI constexpr _Xp&& get(complex<_Xp>&& __z) noexcept {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI constexpr _Xp&& get(complex<_Xp>&& __z) noexcept {
   static_assert(_Ip < 2, "Index value is out of range.");
   if constexpr (_Ip == 0) {
     return std::move(__z.__re_);
@@ -1418,7 +1430,7 @@ _LIBCPP_HIDE_FROM_ABI constexpr _Xp&& get(complex<_Xp>&& __z) noexcept {
 }
 
 template <size_t _Ip, class _Xp>
-_LIBCPP_HIDE_FROM_ABI constexpr const _Xp& get(const complex<_Xp>& __z) noexcept {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI constexpr const _Xp& get(const complex<_Xp>& __z) noexcept {
   static_assert(_Ip < 2, "Index value is out of range.");
   if constexpr (_Ip == 0) {
     return __z.__re_;
@@ -1428,7 +1440,7 @@ _LIBCPP_HIDE_FROM_ABI constexpr const _Xp& get(const complex<_Xp>& __z) noexcept
 }
 
 template <size_t _Ip, class _Xp>
-_LIBCPP_HIDE_FROM_ABI constexpr const _Xp&& get(const complex<_Xp>&& __z) noexcept {
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI constexpr const _Xp&& get(const complex<_Xp>&& __z) noexcept {
   static_assert(_Ip < 2, "Index value is out of range.");
   if constexpr (_Ip == 0) {
     return std::move(__z.__re_);
diff --git a/lib/libcxx/include/complex.h b/lib/libcxx/include/complex.h
index 8a1f926896..09d8080f0f 100644
--- a/lib/libcxx/include/complex.h
+++ b/lib/libcxx/include/complex.h
@@ -18,19 +18,19 @@
 */
 
 #if defined(__cplusplus) && __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)
-#  include <__cxx03/complex.h>
+#  include <__cxx03/__config>
 #else
 #  include <__config>
+#endif
 
-#  if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
-#    pragma GCC system_header
-#  endif
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
 
-#  ifdef __cplusplus
-#    include <complex>
-#  elif __has_include_next(<complex.h>)
-#    include_next <complex.h>
-#  endif
-#endif // defined(__cplusplus) && __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)
+#ifdef __cplusplus
+#  include <complex>
+#elif __has_include_next(<complex.h>)
+#  include_next <complex.h>
+#endif
 
 #endif // _LIBCPP_COMPLEX_H
diff --git a/lib/libcxx/include/condition_variable b/lib/libcxx/include/condition_variable
index 99c74b0280..d42a4802b1 100644
--- a/lib/libcxx/include/condition_variable
+++ b/lib/libcxx/include/condition_variable
@@ -206,14 +206,14 @@ public:
 #    if _LIBCPP_STD_VER >= 20
 
   template <class _Lock, class _Predicate>
-  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI bool wait(_Lock& __lock, stop_token __stoken, _Predicate __pred);
+  _LIBCPP_HIDE_FROM_ABI bool wait(_Lock& __lock, stop_token __stoken, _Predicate __pred);
 
   template <class _Lock, class _Clock, class _Duration, class _Predicate>
-  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI bool wait_until(
+  _LIBCPP_HIDE_FROM_ABI bool wait_until(
       _Lock& __lock, stop_token __stoken, const chrono::time_point<_Clock, _Duration>& __abs_time, _Predicate __pred);
 
   template <class _Lock, class _Rep, class _Period, class _Predicate>
-  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI bool
+  _LIBCPP_HIDE_FROM_ABI bool
   wait_for(_Lock& __lock, stop_token __stoken, const chrono::duration<_Rep, _Period>& __rel_time, _Predicate __pred);
 
 #    endif // _LIBCPP_STD_VER >= 20
diff --git a/lib/libcxx/include/cstdalign b/lib/libcxx/include/cstdalign
index 7f8dd1e1fb..7aa8cc81ad 100644
--- a/lib/libcxx/include/cstdalign
+++ b/lib/libcxx/include/cstdalign
@@ -43,17 +43,10 @@ Macros:
 #  undef __alignof_is_defined
 #  define __alignof_is_defined 1
 
-#  if _LIBCPP_STD_VER >= 20
-
-using __standard_header_cstdalign _LIBCPP_DEPRECATED_("removed in C++20.") _LIBCPP_NODEBUG = void;
-using __use_standard_header_cstdalign _LIBCPP_NODEBUG = __standard_header_cstdalign;
-
-#  elif _LIBCPP_STD_VER >= 17
-
-using __standard_header_cstdalign _LIBCPP_DEPRECATED _LIBCPP_NODEBUG = void;
-using __use_standard_header_cstdalign _LIBCPP_NODEBUG                = __standard_header_cstdalign;
-
+#  if _LIBCPP_STD_VER >= 17 && !__building_module(std) && _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS
+#    warning <cstdalign> is deprecated in C++17 and removed in C++20.
 #  endif
+
 #endif // __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)
 
 #endif // _LIBCPP_CSTDALIGN
diff --git a/lib/libcxx/include/cstdbool b/lib/libcxx/include/cstdbool
index a432d5f08b..805a287bd7 100644
--- a/lib/libcxx/include/cstdbool
+++ b/lib/libcxx/include/cstdbool
@@ -31,17 +31,10 @@ Macros:
 #  undef __bool_true_false_are_defined
 #  define __bool_true_false_are_defined 1
 
-#  if _LIBCPP_STD_VER >= 20
-
-using __standard_header_cstdbool _LIBCPP_DEPRECATED_("removed in C++20.") _LIBCPP_NODEBUG = void;
-using __use_standard_header_cstdbool _LIBCPP_NODEBUG                                      = __standard_header_cstdbool;
-
-#  elif _LIBCPP_STD_VER >= 17
-
-using __standard_header_cstdbool _LIBCPP_DEPRECATED _LIBCPP_NODEBUG = void;
-using __use_standard_header_cstdbool _LIBCPP_NODEBUG                = __standard_header_cstdbool;
-
+#  if _LIBCPP_STD_VER >= 17 && !__building_module(std) && _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS
+#    warning <cstdbool> is deprecated in C++17 and removed in C++20.
 #  endif
+
 #endif // __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)
 
 #endif // _LIBCPP_CSTDBOOL
diff --git a/lib/libcxx/include/ctgmath b/lib/libcxx/include/ctgmath
index db0786f1e2..13b7a96e4d 100644
--- a/lib/libcxx/include/ctgmath
+++ b/lib/libcxx/include/ctgmath
@@ -28,17 +28,8 @@
 #    pragma GCC system_header
 #  endif
 
-#  if _LIBCPP_STD_VER >= 20
-
-using __standard_header_ctgmath
-    _LIBCPP_DEPRECATED_("removed in C++20. Include <cmath> and <complex> instead.") _LIBCPP_NODEBUG = void;
-using __use_standard_header_ctgmath _LIBCPP_NODEBUG = __standard_header_ctgmath;
-
-#  elif _LIBCPP_STD_VER >= 17
-
-using __standard_header_ctgmath _LIBCPP_DEPRECATED_("Include <cmath> and <complex> instead.") _LIBCPP_NODEBUG = void;
-using __use_standard_header_ctgmath _LIBCPP_NODEBUG = __standard_header_ctgmath;
-
+#  if _LIBCPP_STD_VER >= 17 && !__building_module(std) && _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS
+#    warning <ctgmath> is deprecated in C++17 and removed in C++20. Include <cmath> and <complex> instead.
 #  endif
 
 #endif // __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)
diff --git a/lib/libcxx/include/ctype.h b/lib/libcxx/include/ctype.h
index 066f45018b..b2f83ca1c1 100644
--- a/lib/libcxx/include/ctype.h
+++ b/lib/libcxx/include/ctype.h
@@ -30,36 +30,36 @@ int toupper(int c);
 */
 
 #if defined(__cplusplus) && __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)
-#  include <__cxx03/ctype.h>
+#  include <__cxx03/__config>
 #else
 #  include <__config>
+#endif
 
-#  if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
-#    pragma GCC system_header
-#  endif
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
 
-#  if __has_include_next(<ctype.h>)
-#    include_next <ctype.h>
-#  endif
+#if __has_include_next(<ctype.h>)
+#  include_next <ctype.h>
+#endif
 
-#  ifdef __cplusplus
+#ifdef __cplusplus
 
-#    undef isalnum
-#    undef isalpha
-#    undef isblank
-#    undef iscntrl
-#    undef isdigit
-#    undef isgraph
-#    undef islower
-#    undef isprint
-#    undef ispunct
-#    undef isspace
-#    undef isupper
-#    undef isxdigit
-#    undef tolower
-#    undef toupper
+#  undef isalnum
+#  undef isalpha
+#  undef isblank
+#  undef iscntrl
+#  undef isdigit
+#  undef isgraph
+#  undef islower
+#  undef isprint
+#  undef ispunct
+#  undef isspace
+#  undef isupper
+#  undef isxdigit
+#  undef tolower
+#  undef toupper
 
-#  endif
-#endif // defined(__cplusplus) && __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)
+#endif
 
 #endif // _LIBCPP_CTYPE_H
diff --git a/lib/libcxx/include/cwchar b/lib/libcxx/include/cwchar
index 8b940b887d..d41af176f7 100644
--- a/lib/libcxx/include/cwchar
+++ b/lib/libcxx/include/cwchar
@@ -231,8 +231,8 @@ __constexpr_wmemcmp(const wchar_t* __lhs, const wchar_t* __rhs, size_t __count)
 
 template <class _Tp, class _Up>
 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp* __constexpr_wmemchr(_Tp* __str, _Up __value, size_t __count) {
-  static_assert(sizeof(_Tp) == sizeof(wchar_t)&& _LIBCPP_ALIGNOF(_Tp) >= _LIBCPP_ALIGNOF(wchar_t) &&
-                    __libcpp_is_trivially_equality_comparable<_Tp, _Tp>::value,
+  static_assert(sizeof(_Tp) == sizeof(wchar_t) && _LIBCPP_ALIGNOF(_Tp) >= _LIBCPP_ALIGNOF(wchar_t) &&
+                    __is_trivially_equality_comparable_v<_Tp, _Tp>,
                 "Calling wmemchr on non-trivially equality comparable types is unsafe.");
 
 #  if __has_builtin(__builtin_wmemchr)
diff --git a/lib/libcxx/include/deque b/lib/libcxx/include/deque
index e33e7d3150..890cb78828 100644
--- a/lib/libcxx/include/deque
+++ b/lib/libcxx/include/deque
@@ -191,9 +191,9 @@ template <class T, class Allocator, class Predicate>
 #  include <__algorithm/min.h>
 #  include <__algorithm/move.h>
 #  include <__algorithm/move_backward.h>
+#  include <__algorithm/ranges_copy_n.h>
 #  include <__algorithm/remove.h>
 #  include <__algorithm/remove_if.h>
-#  include <__algorithm/unwrap_iter.h>
 #  include <__assert>
 #  include <__config>
 #  include <__debug_utils/sanitizers.h>
@@ -220,21 +220,19 @@ template <class T, class Allocator, class Predicate>
 #  include <__ranges/concepts.h>
 #  include <__ranges/container_compatible_range.h>
 #  include <__ranges/from_range.h>
-#  include <__ranges/size.h>
 #  include <__split_buffer>
 #  include <__type_traits/conditional.h>
 #  include <__type_traits/container_traits.h>
-#  include <__type_traits/disjunction.h>
 #  include <__type_traits/enable_if.h>
 #  include <__type_traits/is_allocator.h>
 #  include <__type_traits/is_convertible.h>
 #  include <__type_traits/is_nothrow_assignable.h>
 #  include <__type_traits/is_nothrow_constructible.h>
-#  include <__type_traits/is_replaceable.h>
 #  include <__type_traits/is_same.h>
 #  include <__type_traits/is_swappable.h>
 #  include <__type_traits/is_trivially_relocatable.h>
 #  include <__type_traits/type_identity.h>
+#  include <__utility/exception_guard.h>
 #  include <__utility/forward.h>
 #  include <__utility/move.h>
 #  include <__utility/pair.h>
@@ -461,9 +459,8 @@ private:
       __deque_iterator<_ValueType, _Pointer, _Reference, _MapPointer, _DiffType, _BlockSize>;
 
 public:
-  using __is_segmented_iterator _LIBCPP_NODEBUG = true_type;
-  using __segment_iterator _LIBCPP_NODEBUG      = _MapPointer;
-  using __local_iterator _LIBCPP_NODEBUG        = _Pointer;
+  using __segment_iterator _LIBCPP_NODEBUG = _MapPointer;
+  using __local_iterator _LIBCPP_NODEBUG   = _Pointer;
 
   static _LIBCPP_HIDE_FROM_ABI __segment_iterator __segment(_Iterator __iter) { return __iter.__m_iter_; }
   static _LIBCPP_HIDE_FROM_ABI __local_iterator __local(_Iterator __iter) { return __iter.__ptr_; }
@@ -488,6 +485,9 @@ const _DiffType __deque_iterator<_ValueType, _Pointer, _Reference, _MapPointer,
 
 template <class _Tp, class _Allocator /*= allocator<_Tp>*/>
 class deque {
+  template <class _Up, class _Alloc>
+  using __split_buffer _LIBCPP_NODEBUG = std::__split_buffer<_Up, _Alloc, __split_buffer_pointer_layout>;
+
 public:
   // types:
 
@@ -531,10 +531,6 @@ public:
       __libcpp_is_trivially_relocatable<__map>::value && __libcpp_is_trivially_relocatable<allocator_type>::value,
       deque,
       void>;
-  using __replaceable _LIBCPP_NODEBUG =
-      __conditional_t<__is_replaceable_v<__map> && __container_allocator_is_replaceable<__alloc_traits>::value,
-                      deque,
-                      void>;
 
   static_assert(is_nothrow_default_constructible<allocator_type>::value ==
                     is_nothrow_default_constructible<__pointer_allocator>::value,
@@ -635,7 +631,7 @@ public:
 #  endif
   _LIBCPP_HIDE_FROM_ABI deque(size_type __n, const value_type& __v);
 
-  template <__enable_if_t<__is_allocator<_Allocator>::value, int> = 0>
+  template <__enable_if_t<__is_allocator_v<_Allocator>, int> = 0>
   _LIBCPP_HIDE_FROM_ABI deque(size_type __n, const value_type& __v, const allocator_type& __a)
       : __map_(__pointer_allocator(__a)), __start_(0), __size_(0), __alloc_(__a) {
     __annotate_new(0);
@@ -703,8 +699,16 @@ public:
       __assign_with_size_random_access(ranges::begin(__range), __n);
 
     } else if constexpr (ranges::forward_range<_Range> || ranges::sized_range<_Range>) {
-      auto __n = static_cast<size_type>(ranges::distance(__range));
-      __assign_with_size(ranges::begin(__range), __n);
+      auto __n     = ranges::distance(__range);
+      auto __first = ranges::begin(__range);
+
+      auto __result = std::ranges::copy_n(std::move(__first), std::min<size_t>(__n, size()), begin());
+
+      if (static_cast<size_type>(__n) > size()) {
+        __append_with_size(std::move(__result.in), __n - size());
+      } else {
+        __erase_to_end(__result.out);
+      }
 
     } else {
       __assign_with_sentinel(ranges::begin(__range), ranges::end(__range));
@@ -720,45 +724,53 @@ public:
 
   // iterators:
 
-  _LIBCPP_HIDE_FROM_ABI iterator begin() _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator begin() _NOEXCEPT {
     __map_pointer __mp = __map_.begin() + __start_ / __block_size;
     return iterator(__mp, __map_.empty() ? 0 : *__mp + __start_ % __block_size);
   }
 
-  _LIBCPP_HIDE_FROM_ABI const_iterator begin() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator begin() const _NOEXCEPT {
     __map_const_pointer __mp = static_cast<__map_const_pointer>(__map_.begin() + __start_ / __block_size);
     return const_iterator(__mp, __map_.empty() ? 0 : *__mp + __start_ % __block_size);
   }
 
-  _LIBCPP_HIDE_FROM_ABI iterator end() _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator end() _NOEXCEPT {
     size_type __p      = size() + __start_;
     __map_pointer __mp = __map_.begin() + __p / __block_size;
     return iterator(__mp, __map_.empty() ? 0 : *__mp + __p % __block_size);
   }
 
-  _LIBCPP_HIDE_FROM_ABI const_iterator end() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator end() const _NOEXCEPT {
     size_type __p            = size() + __start_;
     __map_const_pointer __mp = static_cast<__map_const_pointer>(__map_.begin() + __p / __block_size);
     return const_iterator(__mp, __map_.empty() ? 0 : *__mp + __p % __block_size);
   }
 
-  _LIBCPP_HIDE_FROM_ABI reverse_iterator rbegin() _NOEXCEPT { return reverse_iterator(end()); }
-  _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rbegin() const _NOEXCEPT { return const_reverse_iterator(end()); }
-  _LIBCPP_HIDE_FROM_ABI reverse_iterator rend() _NOEXCEPT { return reverse_iterator(begin()); }
-  _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rend() const _NOEXCEPT { return const_reverse_iterator(begin()); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI reverse_iterator rbegin() _NOEXCEPT { return reverse_iterator(end()); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rbegin() const _NOEXCEPT {
+    return const_reverse_iterator(end());
+  }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI reverse_iterator rend() _NOEXCEPT { return reverse_iterator(begin()); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rend() const _NOEXCEPT {
+    return const_reverse_iterator(begin());
+  }
 
-  _LIBCPP_HIDE_FROM_ABI const_iterator cbegin() const _NOEXCEPT { return begin(); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator cend() const _NOEXCEPT { return end(); }
-  _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crbegin() const _NOEXCEPT { return const_reverse_iterator(end()); }
-  _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crend() const _NOEXCEPT { return const_reverse_iterator(begin()); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator cbegin() const _NOEXCEPT { return begin(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator cend() const _NOEXCEPT { return end(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crbegin() const _NOEXCEPT {
+    return const_reverse_iterator(end());
+  }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crend() const _NOEXCEPT {
+    return const_reverse_iterator(begin());
+  }
 
   // capacity:
-  _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __size(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __size(); }
 
   _LIBCPP_HIDE_FROM_ABI size_type& __size() _NOEXCEPT { return __size_; }
   _LIBCPP_HIDE_FROM_ABI const size_type& __size() const _NOEXCEPT { return __size_; }
 
-  _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT {
     return std::min<size_type>(__alloc_traits::max_size(__alloc()), numeric_limits<difference_type>::max());
   }
   _LIBCPP_HIDE_FROM_ABI void resize(size_type __n);
@@ -767,18 +779,22 @@ public:
   [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return size() == 0; }
 
   // element access:
-  _LIBCPP_HIDE_FROM_ABI reference operator[](size_type __i) _NOEXCEPT;
-  _LIBCPP_HIDE_FROM_ABI const_reference operator[](size_type __i) const _NOEXCEPT;
-  _LIBCPP_HIDE_FROM_ABI reference at(size_type __i);
-  _LIBCPP_HIDE_FROM_ABI const_reference at(size_type __i) const;
-  _LIBCPP_HIDE_FROM_ABI reference front() _NOEXCEPT;
-  _LIBCPP_HIDE_FROM_ABI const_reference front() const _NOEXCEPT;
-  _LIBCPP_HIDE_FROM_ABI reference back() _NOEXCEPT;
-  _LIBCPP_HIDE_FROM_ABI const_reference back() const _NOEXCEPT;
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI reference operator[](size_type __i) _NOEXCEPT;
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_reference operator[](size_type __i) const _NOEXCEPT;
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI reference at(size_type __i);
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_reference at(size_type __i) const;
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI reference front() _NOEXCEPT;
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_reference front() const _NOEXCEPT;
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI reference back() _NOEXCEPT;
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_reference back() const _NOEXCEPT;
 
   // 23.2.2.3 modifiers:
   _LIBCPP_HIDE_FROM_ABI void push_front(const value_type& __v);
   _LIBCPP_HIDE_FROM_ABI void push_back(const value_type& __v);
+
+  template <class... _Args>
+  _LIBCPP_HIDE_FROM_ABI iterator __emplace(const_iterator __p, _Args&&... __args);
+
 #  ifndef _LIBCPP_CXX03_LANG
 #    if _LIBCPP_STD_VER >= 17
   template <class... _Args>
@@ -791,8 +807,11 @@ public:
   template <class... _Args>
   _LIBCPP_HIDE_FROM_ABI void emplace_back(_Args&&... __args);
 #    endif
+
   template <class... _Args>
-  _LIBCPP_HIDE_FROM_ABI iterator emplace(const_iterator __p, _Args&&... __args);
+  _LIBCPP_HIDE_FROM_ABI iterator emplace(const_iterator __p, _Args&&... __args) {
+    return __emplace(__p, std::forward<_Args>(__args)...);
+  }
 
   _LIBCPP_HIDE_FROM_ABI void push_front(value_type&& __v);
   _LIBCPP_HIDE_FROM_ABI void push_back(value_type&& __v);
@@ -805,17 +824,21 @@ public:
 
   template <_ContainerCompatibleRange<_Tp> _Range>
   _LIBCPP_HIDE_FROM_ABI void append_range(_Range&& __range) {
-    insert_range(end(), std::forward<_Range>(__range));
+    if constexpr (ranges::forward_range<_Range> || ranges::sized_range<_Range>) {
+      __append_with_size(ranges::begin(__range), ranges::distance(__range));
+    } else {
+      __append_with_sentinel(ranges::begin(__range), ranges::end(__range));
+    }
   }
 #    endif
 
-  _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __p, value_type&& __v);
+  _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __p, value_type&& __v) { return __emplace(__p, std::move(__v)); }
 
   _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __p, initializer_list<value_type> __il) {
     return insert(__p, __il.begin(), __il.end());
   }
 #  endif // _LIBCPP_CXX03_LANG
-  _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __p, const value_type& __v);
+  _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __p, const value_type& __v) { return __emplace(__p, __v); }
   _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __p, size_type __n, const value_type& __v);
   template <class _InputIter, __enable_if_t<__has_exactly_input_iterator_category<_InputIter>::value, int> = 0>
   _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __p, _InputIter __f, _InputIter __l);
@@ -1109,7 +1132,7 @@ public:
     // This function tests deque object annotations.
     if (empty()) {
       for (__map_const_iterator __it = __map_.begin(); __it != __map_.end(); ++__it) {
-        if (!__sanitizer_verify_double_ended_contiguous_container(
+        if (!::__sanitizer_verify_double_ended_contiguous_container(
                 std::__to_address(*__it),
                 std::__to_address(*__it),
                 std::__to_address(*__it),
@@ -1141,7 +1164,7 @@ public:
 
       // Is the block before or after deque blocks that contain elements?
       if (__it < __first_mp || __it > __last_mp) {
-        if (!__sanitizer_verify_double_ended_contiguous_container(
+        if (!::__sanitizer_verify_double_ended_contiguous_container(
                 std::__to_address(*__it),
                 std::__to_address(*__it),
                 std::__to_address(*__it),
@@ -1151,7 +1174,7 @@ public:
         const void* __containers_buffer_beg = (__it == __first_mp) ? __p_beg : (const void*)std::__to_address(*__it);
         const void* __containers_buffer_end =
             (__it == __last_mp) ? __p_end : (const void*)std::__to_address(*__it + __block_size);
-        if (!__sanitizer_verify_double_ended_contiguous_container(
+        if (!::__sanitizer_verify_double_ended_contiguous_container(
                 std::__to_address(*__it),
                 __containers_buffer_beg,
                 __containers_buffer_end,
@@ -1191,7 +1214,7 @@ private:
 
   template <class _RandomAccessIterator>
   _LIBCPP_HIDE_FROM_ABI void __assign_with_size_random_access(_RandomAccessIterator __f, difference_type __n);
-  template <class _Iterator>
+  template <class _AlgPolicy, class _Iterator>
   _LIBCPP_HIDE_FROM_ABI void __assign_with_size(_Iterator __f, difference_type __n);
 
   template <class _Iterator, class _Sentinel>
@@ -1239,8 +1262,8 @@ private:
       clear();
       shrink_to_fit();
     }
-    __alloc()       = __c.__alloc();
-    __map_.__alloc_ = __c.__map_.__alloc_;
+    __alloc()                = __c.__alloc();
+    __map_.__get_allocator() = __c.__map_.__get_allocator();
   }
 
   _LIBCPP_HIDE_FROM_ABI void __copy_assign_alloc(const deque&, false_type) {}
@@ -1256,22 +1279,22 @@ _LIBCPP_CONSTEXPR const typename allocator_traits<_Alloc>::difference_type deque
 
 #  if _LIBCPP_STD_VER >= 17
 template <class _InputIterator,
-          class _Alloc = allocator<__iter_value_type<_InputIterator>>,
+          class _Alloc = allocator<__iterator_value_type<_InputIterator>>,
           class        = enable_if_t<__has_input_iterator_category<_InputIterator>::value>,
-          class        = enable_if_t<__is_allocator<_Alloc>::value> >
-deque(_InputIterator, _InputIterator) -> deque<__iter_value_type<_InputIterator>, _Alloc>;
+          class        = enable_if_t<__is_allocator_v<_Alloc>>>
+deque(_InputIterator, _InputIterator) -> deque<__iterator_value_type<_InputIterator>, _Alloc>;
 
 template <class _InputIterator,
           class _Alloc,
           class = enable_if_t<__has_input_iterator_category<_InputIterator>::value>,
-          class = enable_if_t<__is_allocator<_Alloc>::value> >
-deque(_InputIterator, _InputIterator, _Alloc) -> deque<__iter_value_type<_InputIterator>, _Alloc>;
+          class = enable_if_t<__is_allocator_v<_Alloc>>>
+deque(_InputIterator, _InputIterator, _Alloc) -> deque<__iterator_value_type<_InputIterator>, _Alloc>;
 #  endif
 
 #  if _LIBCPP_STD_VER >= 23
 template <ranges::input_range _Range,
           class _Alloc = allocator<ranges::range_value_t<_Range>>,
-          class        = enable_if_t<__is_allocator<_Alloc>::value> >
+          class        = enable_if_t<__is_allocator_v<_Alloc>>>
 deque(from_range_t, _Range&&, _Alloc = _Alloc()) -> deque<ranges::range_value_t<_Range>, _Alloc>;
 #  endif
 
@@ -1319,7 +1342,7 @@ deque<_Tp, _Allocator>::deque(const deque& __c)
     : __map_(__pointer_allocator(__alloc_traits::select_on_container_copy_construction(__c.__alloc()))),
       __start_(0),
       __size_(0),
-      __alloc_(__map_.__alloc_) {
+      __alloc_(__map_.__get_allocator()) {
   __annotate_new(0);
   __append(__c.begin(), __c.end());
 }
@@ -1451,24 +1474,6 @@ deque<_Tp, _Allocator>::__assign_with_size_random_access(_RandomAccessIterator _
     __erase_to_end(std::copy_n(__f, __n, begin()));
 }
 
-template <class _Tp, class _Allocator>
-template <class _Iterator>
-_LIBCPP_HIDE_FROM_ABI void deque<_Tp, _Allocator>::__assign_with_size(_Iterator __f, difference_type __n) {
-  if (static_cast<size_type>(__n) > size()) {
-    auto __added_size = __n - size();
-
-    auto __i = begin();
-    for (auto __count = size(); __count != 0; --__count) {
-      *__i++ = *__f++;
-    }
-
-    __append_with_size(__f, __added_size);
-
-  } else {
-    __erase_to_end(std::copy_n(__f, __n, begin()));
-  }
-}
-
 template <class _Tp, class _Allocator>
 void deque<_Tp, _Allocator>::assign(size_type __n, const value_type& __v) {
   if (__n > size()) {
@@ -1661,56 +1666,11 @@ deque<_Tp, _Allocator>::emplace_front(_Args&&... __args) {
   return *begin();
 #    endif
 }
-
-template <class _Tp, class _Allocator>
-typename deque<_Tp, _Allocator>::iterator deque<_Tp, _Allocator>::insert(const_iterator __p, value_type&& __v) {
-  size_type __pos     = __p - begin();
-  size_type __to_end  = size() - __pos;
-  allocator_type& __a = __alloc();
-  if (__pos < __to_end) { // insert by shifting things backward
-    if (__front_spare() == 0)
-      __add_front_capacity();
-    // __front_spare() >= 1
-    __annotate_increase_front(1);
-    if (__pos == 0) {
-      __alloc_traits::construct(__a, std::addressof(*--begin()), std::move(__v));
-      --__start_;
-      ++__size();
-    } else {
-      iterator __b   = begin();
-      iterator __bm1 = std::prev(__b);
-      __alloc_traits::construct(__a, std::addressof(*__bm1), std::move(*__b));
-      --__start_;
-      ++__size();
-      if (__pos > 1)
-        __b = std::move(std::next(__b), __b + __pos, __b);
-      *__b = std::move(__v);
-    }
-  } else { // insert by shifting things forward
-    if (__back_spare() == 0)
-      __add_back_capacity();
-    // __back_capacity >= 1
-    __annotate_increase_back(1);
-    size_type __de = size() - __pos;
-    if (__de == 0) {
-      __alloc_traits::construct(__a, std::addressof(*end()), std::move(__v));
-      ++__size();
-    } else {
-      iterator __e   = end();
-      iterator __em1 = std::prev(__e);
-      __alloc_traits::construct(__a, std::addressof(*__e), std::move(*__em1));
-      ++__size();
-      if (__de > 1)
-        __e = std::move_backward(__e - __de, __em1, __e);
-      *--__e = std::move(__v);
-    }
-  }
-  return begin() + __pos;
-}
+#  endif // _LIBCPP_CXX03_LANG
 
 template <class _Tp, class _Allocator>
 template <class... _Args>
-typename deque<_Tp, _Allocator>::iterator deque<_Tp, _Allocator>::emplace(const_iterator __p, _Args&&... __args) {
+typename deque<_Tp, _Allocator>::iterator deque<_Tp, _Allocator>::__emplace(const_iterator __p, _Args&&... __args) {
   size_type __pos     = __p - begin();
   size_type __to_end  = size() - __pos;
   allocator_type& __a = __alloc();
@@ -1757,60 +1717,6 @@ typename deque<_Tp, _Allocator>::iterator deque<_Tp, _Allocator>::emplace(const_
   return begin() + __pos;
 }
 
-#  endif // _LIBCPP_CXX03_LANG
-
-template <class _Tp, class _Allocator>
-typename deque<_Tp, _Allocator>::iterator deque<_Tp, _Allocator>::insert(const_iterator __p, const value_type& __v) {
-  size_type __pos     = __p - begin();
-  size_type __to_end  = size() - __pos;
-  allocator_type& __a = __alloc();
-  if (__pos < __to_end) { // insert by shifting things backward
-    if (__front_spare() == 0)
-      __add_front_capacity();
-    // __front_spare() >= 1
-    __annotate_increase_front(1);
-    if (__pos == 0) {
-      __alloc_traits::construct(__a, std::addressof(*--begin()), __v);
-      --__start_;
-      ++__size();
-    } else {
-      const_pointer __vt = pointer_traits<const_pointer>::pointer_to(__v);
-      iterator __b       = begin();
-      iterator __bm1     = std::prev(__b);
-      if (__vt == pointer_traits<const_pointer>::pointer_to(*__b))
-        __vt = pointer_traits<const_pointer>::pointer_to(*__bm1);
-      __alloc_traits::construct(__a, std::addressof(*__bm1), std::move(*__b));
-      --__start_;
-      ++__size();
-      if (__pos > 1)
-        __b = __move_and_check(std::next(__b), __b + __pos, __b, __vt);
-      *__b = *__vt;
-    }
-  } else { // insert by shifting things forward
-    if (__back_spare() == 0)
-      __add_back_capacity();
-    // __back_capacity >= 1
-    __annotate_increase_back(1);
-    size_type __de = size() - __pos;
-    if (__de == 0) {
-      __alloc_traits::construct(__a, std::addressof(*end()), __v);
-      ++__size();
-    } else {
-      const_pointer __vt = pointer_traits<const_pointer>::pointer_to(__v);
-      iterator __e       = end();
-      iterator __em1     = std::prev(__e);
-      if (__vt == pointer_traits<const_pointer>::pointer_to(*__em1))
-        __vt = pointer_traits<const_pointer>::pointer_to(*__e);
-      __alloc_traits::construct(__a, std::addressof(*__e), std::move(*__em1));
-      ++__size();
-      if (__de > 1)
-        __e = __move_backward_and_check(__e - __de, __em1, __e, __vt);
-      *--__e = *__vt;
-    }
-  }
-  return begin() + __pos;
-}
-
 template <class _Tp, class _Allocator>
 typename deque<_Tp, _Allocator>::iterator
 deque<_Tp, _Allocator>::insert(const_iterator __p, size_type __n, const value_type& __v) {
@@ -1874,9 +1780,9 @@ template <class _Tp, class _Allocator>
 template <class _Iterator, class _Sentinel>
 _LIBCPP_HIDE_FROM_ABI typename deque<_Tp, _Allocator>::iterator
 deque<_Tp, _Allocator>::__insert_with_sentinel(const_iterator __p, _Iterator __f, _Sentinel __l) {
-  __split_buffer<value_type, allocator_type&> __buf(__alloc());
+  __split_buffer<value_type, allocator_type> __buf(__alloc());
   __buf.__construct_at_end_with_sentinel(std::move(__f), std::move(__l));
-  typedef typename __split_buffer<value_type, allocator_type&>::iterator __bi;
+  typedef typename __split_buffer<value_type, allocator_type>::iterator __bi;
   return insert(__p, move_iterator<__bi>(__buf.begin()), move_iterator<__bi>(__buf.end()));
 }
 
@@ -1891,9 +1797,9 @@ template <class _Tp, class _Allocator>
 template <class _Iterator>
 _LIBCPP_HIDE_FROM_ABI typename deque<_Tp, _Allocator>::iterator
 deque<_Tp, _Allocator>::__insert_with_size(const_iterator __p, _Iterator __f, size_type __n) {
-  __split_buffer<value_type, allocator_type&> __buf(__n, 0, __alloc());
-  __buf.__construct_at_end_with_size(__f, __n);
-  typedef typename __split_buffer<value_type, allocator_type&>::iterator __fwd;
+  __split_buffer<value_type, allocator_type> __buf(__n, 0, __alloc());
+  __buf.__construct_at_end_with_size(std::move(__f), __n);
+  typedef typename __split_buffer<value_type, allocator_type>::iterator __fwd;
   return insert(__p, move_iterator<__fwd>(__buf.begin()), move_iterator<__fwd>(__buf.end()));
 }
 
@@ -2071,8 +1977,8 @@ void deque<_Tp, _Allocator>::__add_front_capacity() {
   }
   // Else need to allocate 1 buffer, *and* we need to reallocate __map_.
   else {
-    __split_buffer<pointer, __pointer_allocator&> __buf(
-        std::max<size_type>(2 * __map_.capacity(), 1), 0, __map_.__alloc_);
+    __split_buffer<pointer, __pointer_allocator> __buf(
+        std::max<size_type>(2 * __map_.capacity(), 1), 0, __map_.__get_allocator());
 
     typedef __allocator_destructor<_Allocator> _Dp;
     unique_ptr<pointer, _Dp> __hold(__alloc_traits::allocate(__a, __block_size), _Dp(__a, __block_size));
@@ -2081,10 +1987,7 @@ void deque<_Tp, _Allocator>::__add_front_capacity() {
 
     for (__map_pointer __i = __map_.begin(); __i != __map_.end(); ++__i)
       __buf.emplace_back(*__i);
-    std::swap(__map_.__first_, __buf.__first_);
-    std::swap(__map_.__begin_, __buf.__begin_);
-    std::swap(__map_.__end_, __buf.__end_);
-    std::swap(__map_.__cap_, __buf.__cap_);
+    __map_.__swap_without_allocator(__buf);
     __start_ = __map_.size() == 1 ? __block_size / 2 : __start_ + __block_size;
   }
   __annotate_whole_block(0, __asan_poison);
@@ -2134,34 +2037,26 @@ void deque<_Tp, _Allocator>::__add_front_capacity(size_type __n) {
   // Else need to allocate __nb buffers, *and* we need to reallocate __map_.
   else {
     size_type __ds = (__nb + __back_capacity) * __block_size - __map_.empty();
-    __split_buffer<pointer, __pointer_allocator&> __buf(
-        std::max<size_type>(2 * __map_.capacity(), __nb + __map_.size()), 0, __map_.__alloc_);
-#  if _LIBCPP_HAS_EXCEPTIONS
-    try {
-#  endif // _LIBCPP_HAS_EXCEPTIONS
-      for (; __nb > 0; --__nb) {
-        __buf.emplace_back(__alloc_traits::allocate(__a, __block_size));
-        // ASan: this is empty container, we have to poison whole block
-        __annotate_poison_block(std::__to_address(__buf.back()), std::__to_address(__buf.back() + __block_size));
-      }
-#  if _LIBCPP_HAS_EXCEPTIONS
-    } catch (...) {
+    __split_buffer<pointer, __pointer_allocator> __buf(
+        std::max<size_type>(2 * __map_.capacity(), __nb + __map_.size()), 0, __map_.__get_allocator());
+    auto __guard = std::__make_exception_guard([&] {
       __annotate_delete();
       for (__map_pointer __i = __buf.begin(); __i != __buf.end(); ++__i)
         __alloc_traits::deallocate(__a, *__i, __block_size);
-      throw;
+    });
+    for (; __nb > 0; --__nb) {
+      __buf.emplace_back(__alloc_traits::allocate(__a, __block_size));
+      // ASan: this is empty container, we have to poison whole block
+      __annotate_poison_block(std::__to_address(__buf.back()), std::__to_address(__buf.back() + __block_size));
     }
-#  endif // _LIBCPP_HAS_EXCEPTIONS
+    __guard.__complete();
     for (; __back_capacity > 0; --__back_capacity) {
       __buf.emplace_back(__map_.back());
       __map_.pop_back();
     }
     for (__map_pointer __i = __map_.begin(); __i != __map_.end(); ++__i)
       __buf.emplace_back(*__i);
-    std::swap(__map_.__first_, __buf.__first_);
-    std::swap(__map_.__begin_, __buf.__begin_);
-    std::swap(__map_.__end_, __buf.__end_);
-    std::swap(__map_.__cap_, __buf.__cap_);
+    __map_.__swap_without_allocator(__buf);
     __start_ += __ds;
   }
 }
@@ -2194,8 +2089,8 @@ void deque<_Tp, _Allocator>::__add_back_capacity() {
   }
   // Else need to allocate 1 buffer, *and* we need to reallocate __map_.
   else {
-    __split_buffer<pointer, __pointer_allocator&> __buf(
-        std::max<size_type>(2 * __map_.capacity(), 1), __map_.size(), __map_.__alloc_);
+    __split_buffer<pointer, __pointer_allocator> __buf(
+        std::max<size_type>(2 * __map_.capacity(), 1), __map_.size(), __map_.__get_allocator());
 
     typedef __allocator_destructor<_Allocator> _Dp;
     unique_ptr<pointer, _Dp> __hold(__alloc_traits::allocate(__a, __block_size), _Dp(__a, __block_size));
@@ -2204,10 +2099,7 @@ void deque<_Tp, _Allocator>::__add_back_capacity() {
 
     for (__map_pointer __i = __map_.end(); __i != __map_.begin();)
       __buf.emplace_front(*--__i);
-    std::swap(__map_.__first_, __buf.__first_);
-    std::swap(__map_.__begin_, __buf.__begin_);
-    std::swap(__map_.__end_, __buf.__end_);
-    std::swap(__map_.__cap_, __buf.__cap_);
+    __map_.__swap_without_allocator(__buf);
     __annotate_whole_block(__map_.size() - 1, __asan_poison);
   }
 }
@@ -2257,36 +2149,28 @@ void deque<_Tp, _Allocator>::__add_back_capacity(size_type __n) {
   // Else need to allocate __nb buffers, *and* we need to reallocate __map_.
   else {
     size_type __ds = __front_capacity * __block_size;
-    __split_buffer<pointer, __pointer_allocator&> __buf(
+    __split_buffer<pointer, __pointer_allocator> __buf(
         std::max<size_type>(2 * __map_.capacity(), __nb + __map_.size()),
         __map_.size() - __front_capacity,
-        __map_.__alloc_);
-#  if _LIBCPP_HAS_EXCEPTIONS
-    try {
-#  endif // _LIBCPP_HAS_EXCEPTIONS
-      for (; __nb > 0; --__nb) {
-        __buf.emplace_back(__alloc_traits::allocate(__a, __block_size));
-        // ASan: this is an empty container, we have to poison the whole block
-        __annotate_poison_block(std::__to_address(__buf.back()), std::__to_address(__buf.back() + __block_size));
-      }
-#  if _LIBCPP_HAS_EXCEPTIONS
-    } catch (...) {
+        __map_.__get_allocator());
+    auto __guard = std::__make_exception_guard([&] {
       __annotate_delete();
       for (__map_pointer __i = __buf.begin(); __i != __buf.end(); ++__i)
         __alloc_traits::deallocate(__a, *__i, __block_size);
-      throw;
+    });
+    for (; __nb > 0; --__nb) {
+      __buf.emplace_back(__alloc_traits::allocate(__a, __block_size));
+      // ASan: this is an empty container, we have to poison the whole block
+      __annotate_poison_block(std::__to_address(__buf.back()), std::__to_address(__buf.back() + __block_size));
     }
-#  endif // _LIBCPP_HAS_EXCEPTIONS
+    __guard.__complete();
     for (; __front_capacity > 0; --__front_capacity) {
       __buf.emplace_back(__map_.front());
       __map_.pop_front();
     }
     for (__map_pointer __i = __map_.end(); __i != __map_.begin();)
       __buf.emplace_front(*--__i);
-    std::swap(__map_.__first_, __buf.__first_);
-    std::swap(__map_.__begin_, __buf.__begin_);
-    std::swap(__map_.__end_, __buf.__end_);
-    std::swap(__map_.__cap_, __buf.__cap_);
+    __map_.__swap_without_allocator(__buf);
     __start_ -= __ds;
   }
 }
diff --git a/lib/libcxx/include/errno.h b/lib/libcxx/include/errno.h
index 692f00f022..e87ad0d1b7 100644
--- a/lib/libcxx/include/errno.h
+++ b/lib/libcxx/include/errno.h
@@ -23,381 +23,381 @@ Macros:
 */
 
 #if defined(__cplusplus) && __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)
-#  include <__cxx03/errno.h>
+#  include <__cxx03/__config>
 #else
 #  include <__config>
+#endif
 
-#  if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
-#    pragma GCC system_header
-#  endif
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
 
-#  if __has_include_next(<errno.h>)
-#    include_next <errno.h>
-#  endif
+#if __has_include_next(<errno.h>)
+#  include_next <errno.h>
+#endif
 
-#  ifdef __cplusplus
+#ifdef __cplusplus
 
-#    if !defined(EOWNERDEAD) || !defined(ENOTRECOVERABLE)
+#  if !defined(EOWNERDEAD) || !defined(ENOTRECOVERABLE)
 
-#      ifdef ELAST
+#    ifdef ELAST
 
 static const int __elast1 = ELAST + 1;
 static const int __elast2 = ELAST + 2;
 
-#      else
+#    else
 
 static const int __elast1 = 104;
 static const int __elast2 = 105;
 
+#    endif
+
+#    ifdef ENOTRECOVERABLE
+
+#      define EOWNERDEAD __elast1
+
+#      ifdef ELAST
+#        undef ELAST
+#        define ELAST EOWNERDEAD
 #      endif
 
-#      ifdef ENOTRECOVERABLE
+#    elif defined(EOWNERDEAD)
 
-#        define EOWNERDEAD __elast1
+#      define ENOTRECOVERABLE __elast1
+#      ifdef ELAST
+#        undef ELAST
+#        define ELAST ENOTRECOVERABLE
+#      endif
 
-#        ifdef ELAST
-#          undef ELAST
-#          define ELAST EOWNERDEAD
-#        endif
+#    else // defined(EOWNERDEAD)
 
-#      elif defined(EOWNERDEAD)
+#      define EOWNERDEAD __elast1
+#      define ENOTRECOVERABLE __elast2
+#      ifdef ELAST
+#        undef ELAST
+#        define ELAST ENOTRECOVERABLE
+#      endif
 
-#        define ENOTRECOVERABLE __elast1
-#        ifdef ELAST
-#          undef ELAST
-#          define ELAST ENOTRECOVERABLE
-#        endif
+#    endif // defined(EOWNERDEAD)
 
-#      else // defined(EOWNERDEAD)
-
-#        define EOWNERDEAD __elast1
-#        define ENOTRECOVERABLE __elast2
-#        ifdef ELAST
-#          undef ELAST
-#          define ELAST ENOTRECOVERABLE
-#        endif
-
-#      endif // defined(EOWNERDEAD)
-
-#    endif // !defined(EOWNERDEAD) || !defined(ENOTRECOVERABLE)
+#  endif // !defined(EOWNERDEAD) || !defined(ENOTRECOVERABLE)
 
 //  supply errno values likely to be missing, particularly on Windows
 
-#    ifndef EAFNOSUPPORT
-#      define EAFNOSUPPORT 9901
-#    endif
+#  ifndef EAFNOSUPPORT
+#    define EAFNOSUPPORT 9901
+#  endif
 
-#    ifndef EADDRINUSE
-#      define EADDRINUSE 9902
-#    endif
+#  ifndef EADDRINUSE
+#    define EADDRINUSE 9902
+#  endif
 
-#    ifndef EADDRNOTAVAIL
-#      define EADDRNOTAVAIL 9903
-#    endif
+#  ifndef EADDRNOTAVAIL
+#    define EADDRNOTAVAIL 9903
+#  endif
 
-#    ifndef EISCONN
-#      define EISCONN 9904
-#    endif
+#  ifndef EISCONN
+#    define EISCONN 9904
+#  endif
 
-#    ifndef EBADMSG
-#      define EBADMSG 9905
-#    endif
+#  ifndef EBADMSG
+#    define EBADMSG 9905
+#  endif
 
-#    ifndef ECONNABORTED
-#      define ECONNABORTED 9906
-#    endif
+#  ifndef ECONNABORTED
+#    define ECONNABORTED 9906
+#  endif
 
-#    ifndef EALREADY
-#      define EALREADY 9907
-#    endif
+#  ifndef EALREADY
+#    define EALREADY 9907
+#  endif
 
-#    ifndef ECONNREFUSED
-#      define ECONNREFUSED 9908
-#    endif
+#  ifndef ECONNREFUSED
+#    define ECONNREFUSED 9908
+#  endif
 
-#    ifndef ECONNRESET
-#      define ECONNRESET 9909
-#    endif
+#  ifndef ECONNRESET
+#    define ECONNRESET 9909
+#  endif
 
-#    ifndef EDESTADDRREQ
-#      define EDESTADDRREQ 9910
-#    endif
+#  ifndef EDESTADDRREQ
+#    define EDESTADDRREQ 9910
+#  endif
 
-#    ifndef EHOSTUNREACH
-#      define EHOSTUNREACH 9911
-#    endif
+#  ifndef EHOSTUNREACH
+#    define EHOSTUNREACH 9911
+#  endif
 
-#    ifndef EIDRM
-#      define EIDRM 9912
-#    endif
+#  ifndef EIDRM
+#    define EIDRM 9912
+#  endif
 
-#    ifndef EMSGSIZE
-#      define EMSGSIZE 9913
-#    endif
+#  ifndef EMSGSIZE
+#    define EMSGSIZE 9913
+#  endif
 
-#    ifndef ENETDOWN
-#      define ENETDOWN 9914
-#    endif
+#  ifndef ENETDOWN
+#    define ENETDOWN 9914
+#  endif
 
-#    ifndef ENETRESET
-#      define ENETRESET 9915
-#    endif
+#  ifndef ENETRESET
+#    define ENETRESET 9915
+#  endif
 
-#    ifndef ENETUNREACH
-#      define ENETUNREACH 9916
-#    endif
+#  ifndef ENETUNREACH
+#    define ENETUNREACH 9916
+#  endif
 
-#    ifndef ENOBUFS
-#      define ENOBUFS 9917
-#    endif
+#  ifndef ENOBUFS
+#    define ENOBUFS 9917
+#  endif
 
-#    ifndef ENOLINK
-#      define ENOLINK 9918
-#    endif
+#  ifndef ENOLINK
+#    define ENOLINK 9918
+#  endif
 
-#    ifndef ENODATA
-#      define ENODATA 9919
-#    endif
+#  ifndef ENODATA
+#    define ENODATA 9919
+#  endif
 
-#    ifndef ENOMSG
-#      define ENOMSG 9920
-#    endif
+#  ifndef ENOMSG
+#    define ENOMSG 9920
+#  endif
 
-#    ifndef ENOPROTOOPT
-#      define ENOPROTOOPT 9921
-#    endif
+#  ifndef ENOPROTOOPT
+#    define ENOPROTOOPT 9921
+#  endif
 
-#    ifndef ENOSR
-#      define ENOSR 9922
-#    endif
+#  ifndef ENOSR
+#    define ENOSR 9922
+#  endif
 
-#    ifndef ENOTSOCK
-#      define ENOTSOCK 9923
-#    endif
+#  ifndef ENOTSOCK
+#    define ENOTSOCK 9923
+#  endif
 
-#    ifndef ENOSTR
-#      define ENOSTR 9924
-#    endif
+#  ifndef ENOSTR
+#    define ENOSTR 9924
+#  endif
 
-#    ifndef ENOTCONN
-#      define ENOTCONN 9925
-#    endif
+#  ifndef ENOTCONN
+#    define ENOTCONN 9925
+#  endif
 
-#    ifndef ENOTSUP
-#      define ENOTSUP 9926
-#    endif
+#  ifndef ENOTSUP
+#    define ENOTSUP 9926
+#  endif
 
-#    ifndef ECANCELED
-#      define ECANCELED 9927
-#    endif
+#  ifndef ECANCELED
+#    define ECANCELED 9927
+#  endif
 
-#    ifndef EINPROGRESS
-#      define EINPROGRESS 9928
-#    endif
+#  ifndef EINPROGRESS
+#    define EINPROGRESS 9928
+#  endif
 
-#    ifndef EOPNOTSUPP
-#      define EOPNOTSUPP 9929
-#    endif
+#  ifndef EOPNOTSUPP
+#    define EOPNOTSUPP 9929
+#  endif
 
-#    ifndef EWOULDBLOCK
-#      define EWOULDBLOCK 9930
-#    endif
+#  ifndef EWOULDBLOCK
+#    define EWOULDBLOCK 9930
+#  endif
 
-#    ifndef EOWNERDEAD
-#      define EOWNERDEAD 9931
-#    endif
+#  ifndef EOWNERDEAD
+#    define EOWNERDEAD 9931
+#  endif
 
-#    ifndef EPROTO
-#      define EPROTO 9932
-#    endif
+#  ifndef EPROTO
+#    define EPROTO 9932
+#  endif
 
-#    ifndef EPROTONOSUPPORT
-#      define EPROTONOSUPPORT 9933
-#    endif
+#  ifndef EPROTONOSUPPORT
+#    define EPROTONOSUPPORT 9933
+#  endif
 
-#    ifndef ENOTRECOVERABLE
-#      define ENOTRECOVERABLE 9934
-#    endif
+#  ifndef ENOTRECOVERABLE
+#    define ENOTRECOVERABLE 9934
+#  endif
 
-#    ifndef ETIME
-#      define ETIME 9935
-#    endif
+#  ifndef ETIME
+#    define ETIME 9935
+#  endif
 
-#    ifndef ETXTBSY
-#      define ETXTBSY 9936
-#    endif
+#  ifndef ETXTBSY
+#    define ETXTBSY 9936
+#  endif
 
-#    ifndef ETIMEDOUT
-#      define ETIMEDOUT 9938
-#    endif
+#  ifndef ETIMEDOUT
+#    define ETIMEDOUT 9938
+#  endif
 
-#    ifndef ELOOP
-#      define ELOOP 9939
-#    endif
+#  ifndef ELOOP
+#    define ELOOP 9939
+#  endif
 
-#    ifndef EOVERFLOW
-#      define EOVERFLOW 9940
-#    endif
+#  ifndef EOVERFLOW
+#    define EOVERFLOW 9940
+#  endif
 
-#    ifndef EPROTOTYPE
-#      define EPROTOTYPE 9941
-#    endif
+#  ifndef EPROTOTYPE
+#    define EPROTOTYPE 9941
+#  endif
 
-#    ifndef ENOSYS
-#      define ENOSYS 9942
-#    endif
+#  ifndef ENOSYS
+#    define ENOSYS 9942
+#  endif
 
-#    ifndef EINVAL
-#      define EINVAL 9943
-#    endif
+#  ifndef EINVAL
+#    define EINVAL 9943
+#  endif
 
-#    ifndef ERANGE
-#      define ERANGE 9944
-#    endif
+#  ifndef ERANGE
+#    define ERANGE 9944
+#  endif
 
-#    ifndef EILSEQ
-#      define EILSEQ 9945
-#    endif
+#  ifndef EILSEQ
+#    define EILSEQ 9945
+#  endif
 
 //  Windows Mobile doesn't appear to define these:
 
-#    ifndef E2BIG
-#      define E2BIG 9946
-#    endif
+#  ifndef E2BIG
+#    define E2BIG 9946
+#  endif
 
-#    ifndef EDOM
-#      define EDOM 9947
-#    endif
+#  ifndef EDOM
+#    define EDOM 9947
+#  endif
 
-#    ifndef EFAULT
-#      define EFAULT 9948
-#    endif
+#  ifndef EFAULT
+#    define EFAULT 9948
+#  endif
 
-#    ifndef EBADF
-#      define EBADF 9949
-#    endif
+#  ifndef EBADF
+#    define EBADF 9949
+#  endif
 
-#    ifndef EPIPE
-#      define EPIPE 9950
-#    endif
+#  ifndef EPIPE
+#    define EPIPE 9950
+#  endif
 
-#    ifndef EXDEV
-#      define EXDEV 9951
-#    endif
+#  ifndef EXDEV
+#    define EXDEV 9951
+#  endif
 
-#    ifndef EBUSY
-#      define EBUSY 9952
-#    endif
+#  ifndef EBUSY
+#    define EBUSY 9952
+#  endif
 
-#    ifndef ENOTEMPTY
-#      define ENOTEMPTY 9953
-#    endif
+#  ifndef ENOTEMPTY
+#    define ENOTEMPTY 9953
+#  endif
 
-#    ifndef ENOEXEC
-#      define ENOEXEC 9954
-#    endif
+#  ifndef ENOEXEC
+#    define ENOEXEC 9954
+#  endif
 
-#    ifndef EEXIST
-#      define EEXIST 9955
-#    endif
+#  ifndef EEXIST
+#    define EEXIST 9955
+#  endif
 
-#    ifndef EFBIG
-#      define EFBIG 9956
-#    endif
+#  ifndef EFBIG
+#    define EFBIG 9956
+#  endif
 
-#    ifndef ENAMETOOLONG
-#      define ENAMETOOLONG 9957
-#    endif
+#  ifndef ENAMETOOLONG
+#    define ENAMETOOLONG 9957
+#  endif
 
-#    ifndef ENOTTY
-#      define ENOTTY 9958
-#    endif
+#  ifndef ENOTTY
+#    define ENOTTY 9958
+#  endif
 
-#    ifndef EINTR
-#      define EINTR 9959
-#    endif
+#  ifndef EINTR
+#    define EINTR 9959
+#  endif
 
-#    ifndef ESPIPE
-#      define ESPIPE 9960
-#    endif
+#  ifndef ESPIPE
+#    define ESPIPE 9960
+#  endif
 
-#    ifndef EIO
-#      define EIO 9961
-#    endif
+#  ifndef EIO
+#    define EIO 9961
+#  endif
 
-#    ifndef EISDIR
-#      define EISDIR 9962
-#    endif
+#  ifndef EISDIR
+#    define EISDIR 9962
+#  endif
 
-#    ifndef ECHILD
-#      define ECHILD 9963
-#    endif
+#  ifndef ECHILD
+#    define ECHILD 9963
+#  endif
 
-#    ifndef ENOLCK
-#      define ENOLCK 9964
-#    endif
+#  ifndef ENOLCK
+#    define ENOLCK 9964
+#  endif
 
-#    ifndef ENOSPC
-#      define ENOSPC 9965
-#    endif
+#  ifndef ENOSPC
+#    define ENOSPC 9965
+#  endif
 
-#    ifndef ENXIO
-#      define ENXIO 9966
-#    endif
+#  ifndef ENXIO
+#    define ENXIO 9966
+#  endif
 
-#    ifndef ENODEV
-#      define ENODEV 9967
-#    endif
+#  ifndef ENODEV
+#    define ENODEV 9967
+#  endif
 
-#    ifndef ENOENT
-#      define ENOENT 9968
-#    endif
+#  ifndef ENOENT
+#    define ENOENT 9968
+#  endif
 
-#    ifndef ESRCH
-#      define ESRCH 9969
-#    endif
+#  ifndef ESRCH
+#    define ESRCH 9969
+#  endif
 
-#    ifndef ENOTDIR
-#      define ENOTDIR 9970
-#    endif
+#  ifndef ENOTDIR
+#    define ENOTDIR 9970
+#  endif
 
-#    ifndef ENOMEM
-#      define ENOMEM 9971
-#    endif
+#  ifndef ENOMEM
+#    define ENOMEM 9971
+#  endif
 
-#    ifndef EPERM
-#      define EPERM 9972
-#    endif
+#  ifndef EPERM
+#    define EPERM 9972
+#  endif
 
-#    ifndef EACCES
-#      define EACCES 9973
-#    endif
+#  ifndef EACCES
+#    define EACCES 9973
+#  endif
 
-#    ifndef EROFS
-#      define EROFS 9974
-#    endif
+#  ifndef EROFS
+#    define EROFS 9974
+#  endif
 
-#    ifndef EDEADLK
-#      define EDEADLK 9975
-#    endif
+#  ifndef EDEADLK
+#    define EDEADLK 9975
+#  endif
 
-#    ifndef EAGAIN
-#      define EAGAIN 9976
-#    endif
+#  ifndef EAGAIN
+#    define EAGAIN 9976
+#  endif
 
-#    ifndef ENFILE
-#      define ENFILE 9977
-#    endif
+#  ifndef ENFILE
+#    define ENFILE 9977
+#  endif
 
-#    ifndef EMFILE
-#      define EMFILE 9978
-#    endif
+#  ifndef EMFILE
+#    define EMFILE 9978
+#  endif
 
-#    ifndef EMLINK
-#      define EMLINK 9979
-#    endif
+#  ifndef EMLINK
+#    define EMLINK 9979
+#  endif
 
-#  endif // __cplusplus
-#endif   // defined(__cplusplus) && __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)
+#endif // __cplusplus
 
 #endif // _LIBCPP_ERRNO_H
diff --git a/lib/libcxx/include/exception b/lib/libcxx/include/exception
index 74229cd16c..0b2372e571 100644
--- a/lib/libcxx/include/exception
+++ b/lib/libcxx/include/exception
@@ -93,10 +93,13 @@ template <class E> void rethrow_if_nested(const E& e);
 
 #  if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
 #    include <cstddef>
-#    include <cstdlib>
 #    include <new>
 #    include <type_traits>
 #  endif
+
+#  if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 23
+#    include <cstdlib>
+#  endif
 #endif // __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)
 
 #endif // _LIBCPP_EXCEPTION
diff --git a/lib/libcxx/include/ext/hash_map b/lib/libcxx/include/ext/hash_map
index 46815eaffa..09c981131f 100644
--- a/lib/libcxx/include/ext/hash_map
+++ b/lib/libcxx/include/ext/hash_map
@@ -206,6 +206,7 @@ template <class Key, class T, class Hash, class Pred, class Alloc>
 #else
 #  include <__config>
 #  include <__hash_table>
+#  include <__memory/compressed_pair.h>
 #  include <algorithm>
 #  include <ext/__hash>
 #  include <functional>
@@ -224,21 +225,9 @@ _LIBCPP_WARNING("Use of the header <ext/hash_map> is deprecated.  Migrate to <un
 
 namespace __gnu_cxx {
 
-template <class _Tp, class _Hash, bool = std::is_empty<_Hash>::value && !std::__libcpp_is_final<_Hash>::value >
-class __hash_map_hasher : private _Hash {
-public:
-  _LIBCPP_HIDE_FROM_ABI __hash_map_hasher() : _Hash() {}
-  _LIBCPP_HIDE_FROM_ABI __hash_map_hasher(const _Hash& __h) : _Hash(__h) {}
-  _LIBCPP_HIDE_FROM_ABI const _Hash& hash_function() const { return *this; }
-  _LIBCPP_HIDE_FROM_ABI size_t operator()(const _Tp& __x) const { return static_cast<const _Hash&>(*this)(__x.first); }
-  _LIBCPP_HIDE_FROM_ABI size_t operator()(const typename _Tp::first_type& __x) const {
-    return static_cast<const _Hash&>(*this)(__x);
-  }
-};
-
 template <class _Tp, class _Hash>
-class __hash_map_hasher<_Tp, _Hash, false> {
-  _Hash __hash_;
+class __hash_map_hasher {
+  _LIBCPP_COMPRESSED_ELEMENT(_Hash, __hash_);
 
 public:
   _LIBCPP_HIDE_FROM_ABI __hash_map_hasher() : __hash_() {}
@@ -248,30 +237,9 @@ public:
   _LIBCPP_HIDE_FROM_ABI size_t operator()(const typename _Tp::first_type& __x) const { return __hash_(__x); }
 };
 
-template <class _Tp, class _Pred, bool = std::is_empty<_Pred>::value && !std::__libcpp_is_final<_Pred>::value >
-class __hash_map_equal : private _Pred {
-public:
-  _LIBCPP_HIDE_FROM_ABI __hash_map_equal() : _Pred() {}
-  _LIBCPP_HIDE_FROM_ABI __hash_map_equal(const _Pred& __p) : _Pred(__p) {}
-  _LIBCPP_HIDE_FROM_ABI const _Pred& key_eq() const { return *this; }
-  _LIBCPP_HIDE_FROM_ABI bool operator()(const _Tp& __x, const _Tp& __y) const {
-    return static_cast<const _Pred&>(*this)(__x.first, __y.first);
-  }
-  _LIBCPP_HIDE_FROM_ABI bool operator()(const typename _Tp::first_type& __x, const _Tp& __y) const {
-    return static_cast<const _Pred&>(*this)(__x, __y.first);
-  }
-  _LIBCPP_HIDE_FROM_ABI bool operator()(const _Tp& __x, const typename _Tp::first_type& __y) const {
-    return static_cast<const _Pred&>(*this)(__x.first, __y);
-  }
-  _LIBCPP_HIDE_FROM_ABI bool
-  operator()(const typename _Tp::first_type& __x, const typename _Tp::first_type& __y) const {
-    return static_cast<const _Pred&>(*this)(__x, __y);
-  }
-};
-
 template <class _Tp, class _Pred>
-class __hash_map_equal<_Tp, _Pred, false> {
-  _Pred __pred_;
+class __hash_map_equal {
+  _LIBCPP_COMPRESSED_ELEMENT(_Pred, __pred_);
 
 public:
   _LIBCPP_HIDE_FROM_ABI __hash_map_equal() : __pred_() {}
@@ -467,8 +435,6 @@ private:
 
   __table __table_;
 
-  typedef typename __table::__node_pointer __node_pointer;
-  typedef typename __table::__node_const_pointer __node_const_pointer;
   typedef typename __table::__node_traits __node_traits;
   typedef typename __table::__node_allocator __node_allocator;
   typedef typename __table::__node __node;
@@ -604,10 +570,7 @@ hash_map<_Key, _Tp, _Hash, _Pred, _Alloc>::hash_map(
 }
 
 template <class _Key, class _Tp, class _Hash, class _Pred, class _Alloc>
-hash_map<_Key, _Tp, _Hash, _Pred, _Alloc>::hash_map(const hash_map& __u) : __table_(__u.__table_) {
-  __table_.__rehash_unique(__u.bucket_count());
-  insert(__u.begin(), __u.end());
-}
+hash_map<_Key, _Tp, _Hash, _Pred, _Alloc>::hash_map(const hash_map& __u) : __table_(__u.__table_) {}
 
 template <class _Key, class _Tp, class _Hash, class _Pred, class _Alloc>
 typename hash_map<_Key, _Tp, _Hash, _Pred, _Alloc>::__node_holder
@@ -693,7 +656,6 @@ private:
 
   __table __table_;
 
-  typedef typename __table::__node_traits __node_traits;
   typedef typename __table::__node_allocator __node_allocator;
   typedef typename __table::__node __node;
   typedef __hash_map_node_destructor<__node_allocator> _Dp;
@@ -822,10 +784,7 @@ hash_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::hash_multimap(
 }
 
 template <class _Key, class _Tp, class _Hash, class _Pred, class _Alloc>
-hash_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::hash_multimap(const hash_multimap& __u) : __table_(__u.__table_) {
-  __table_.__rehash_multi(__u.bucket_count());
-  insert(__u.begin(), __u.end());
-}
+hash_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::hash_multimap(const hash_multimap& __u) : __table_(__u.__table_) {}
 
 template <class _Key, class _Tp, class _Hash, class _Pred, class _Alloc>
 template <class _InputIterator>
diff --git a/lib/libcxx/include/ext/hash_set b/lib/libcxx/include/ext/hash_set
index 62a7a0dbcf..56aa4d8a47 100644
--- a/lib/libcxx/include/ext/hash_set
+++ b/lib/libcxx/include/ext/hash_set
@@ -356,10 +356,7 @@ hash_set<_Value, _Hash, _Pred, _Alloc>::hash_set(
 }
 
 template <class _Value, class _Hash, class _Pred, class _Alloc>
-hash_set<_Value, _Hash, _Pred, _Alloc>::hash_set(const hash_set& __u) : __table_(__u.__table_) {
-  __table_.__rehash_unique(__u.bucket_count());
-  insert(__u.begin(), __u.end());
-}
+hash_set<_Value, _Hash, _Pred, _Alloc>::hash_set(const hash_set& __u) : __table_(__u.__table_) {}
 
 template <class _Value, class _Hash, class _Pred, class _Alloc>
 template <class _InputIterator>
@@ -534,10 +531,7 @@ hash_multiset<_Value, _Hash, _Pred, _Alloc>::hash_multiset(
 }
 
 template <class _Value, class _Hash, class _Pred, class _Alloc>
-hash_multiset<_Value, _Hash, _Pred, _Alloc>::hash_multiset(const hash_multiset& __u) : __table_(__u.__table_) {
-  __table_.__rehash_multi(__u.bucket_count());
-  insert(__u.begin(), __u.end());
-}
+hash_multiset<_Value, _Hash, _Pred, _Alloc>::hash_multiset(const hash_multiset& __u) : __table_(__u.__table_) {}
 
 template <class _Value, class _Hash, class _Pred, class _Alloc>
 template <class _InputIterator>
diff --git a/lib/libcxx/include/fenv.h b/lib/libcxx/include/fenv.h
index 157c24faa1..a767269b5c 100644
--- a/lib/libcxx/include/fenv.h
+++ b/lib/libcxx/include/fenv.h
@@ -50,69 +50,69 @@ int feupdateenv(const fenv_t* envp);
 */
 
 #if defined(__cplusplus) && __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)
-#  include <__cxx03/fenv.h>
+#  include <__cxx03/__config>
 #else
 #  include <__config>
+#endif
 
-#  if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
-#    pragma GCC system_header
-#  endif
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
 
-#  if __has_include_next(<fenv.h>)
-#    include_next <fenv.h>
-#  endif
+#if __has_include_next(<fenv.h>)
+#  include_next <fenv.h>
+#endif
 
-#  ifdef __cplusplus
+#ifdef __cplusplus
 
 extern "C++" {
 
-#    ifdef feclearexcept
-#      undef feclearexcept
-#    endif
+#  ifdef feclearexcept
+#    undef feclearexcept
+#  endif
 
-#    ifdef fegetexceptflag
-#      undef fegetexceptflag
-#    endif
+#  ifdef fegetexceptflag
+#    undef fegetexceptflag
+#  endif
 
-#    ifdef feraiseexcept
-#      undef feraiseexcept
-#    endif
+#  ifdef feraiseexcept
+#    undef feraiseexcept
+#  endif
 
-#    ifdef fesetexceptflag
-#      undef fesetexceptflag
-#    endif
+#  ifdef fesetexceptflag
+#    undef fesetexceptflag
+#  endif
 
-#    ifdef fetestexcept
-#      undef fetestexcept
-#    endif
+#  ifdef fetestexcept
+#    undef fetestexcept
+#  endif
 
-#    ifdef fegetround
-#      undef fegetround
-#    endif
+#  ifdef fegetround
+#    undef fegetround
+#  endif
 
-#    ifdef fesetround
-#      undef fesetround
-#    endif
+#  ifdef fesetround
+#    undef fesetround
+#  endif
 
-#    ifdef fegetenv
-#      undef fegetenv
-#    endif
+#  ifdef fegetenv
+#    undef fegetenv
+#  endif
 
-#    ifdef feholdexcept
-#      undef feholdexcept
-#    endif
+#  ifdef feholdexcept
+#    undef feholdexcept
+#  endif
 
-#    ifdef fesetenv
-#      undef fesetenv
-#    endif
+#  ifdef fesetenv
+#    undef fesetenv
+#  endif
 
-#    ifdef feupdateenv
-#      undef feupdateenv
-#    endif
+#  ifdef feupdateenv
+#    undef feupdateenv
+#  endif
 
 } // extern "C++"
 
-#  endif // defined(__cplusplus)
-#endif   // defined(__cplusplus) && __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)
+#endif // defined(__cplusplus)
 
 #endif // _LIBCPP_FENV_H
diff --git a/lib/libcxx/include/flat_map b/lib/libcxx/include/flat_map
index eea9896165..f2566c7745 100644
--- a/lib/libcxx/include/flat_map
+++ b/lib/libcxx/include/flat_map
@@ -17,18 +17,359 @@
 #include <initializer_list>     // see [initializer.list.syn]
 
 namespace std {
-  // [flat.map], class template flat_map
-  template<class Key, class T, class Compare = less<Key>,
-           class KeyContainer = vector<Key>, class MappedContainer = vector<T>>
-    class flat_map;
-
   struct sorted_unique_t { explicit sorted_unique_t() = default; };
   inline constexpr sorted_unique_t sorted_unique{};
 
+  // [flat.map], class template flat_map
+  template<class Key, class T, class Compare = less<Key>,
+           class KeyContainer = vector<Key>, class MappedContainer = vector<T>>
+  class flat_map {
+  public:
+    // types
+    using key_type               = Key;
+    using mapped_type            = T;
+    using value_type             = pair<key_type, mapped_type>;
+    using key_compare            = Compare;
+    using reference              = pair<const key_type&, mapped_type&>;
+    using const_reference        = pair<const key_type&, const mapped_type&>;
+    using size_type              = size_t;
+    using difference_type        = ptrdiff_t;
+    using iterator               = implementation-defined; // see [container.requirements]
+    using const_iterator         = implementation-defined; // see [container.requirements]
+    using reverse_iterator       = std::reverse_iterator<iterator>;
+    using const_reverse_iterator = std::reverse_iterator<const_iterator>;
+    using key_container_type     = KeyContainer;
+    using mapped_container_type  = MappedContainer;
+
+    class value_compare {
+    private:
+      key_compare comp;                                         // exposition only
+      constexpr value_compare(key_compare c) : comp(c) { }      // exposition only
+
+    public:
+      constexpr bool operator()(const_reference x, const_reference y) const {
+        return comp(x.first, y.first);
+      }
+    };
+
+    struct containers {
+      key_container_type keys;
+      mapped_container_type values;
+    };
+
+    // [flat.map.cons], constructors
+    constexpr flat_map() : flat_map(key_compare()) { }
+
+    constexpr flat_map(const flat_map&);
+    constexpr flat_map(flat_map&&);
+    constexpr flat_map& operator=(const flat_map&);
+    constexpr flat_map& operator=(flat_map&&);
+
+    constexpr explicit flat_map(const key_compare& comp)
+      : c(), compare(comp) { }
+
+    constexpr flat_map(key_container_type key_cont, mapped_container_type mapped_cont,
+                       const key_compare& comp = key_compare());
+
+    constexpr flat_map(sorted_unique_t, key_container_type key_cont,
+                       mapped_container_type mapped_cont,
+                       const key_compare& comp = key_compare());
+
+    template<class InputIterator>
+      constexpr flat_map(InputIterator first, InputIterator last,
+                         const key_compare& comp = key_compare())
+        : c(), compare(comp) { insert(first, last); }
+
+    template<class InputIterator>
+      constexpr flat_map(sorted_unique_t, InputIterator first, InputIterator last,
+                         const key_compare& comp = key_compare())
+        : c(), compare(comp) { insert(sorted_unique, first, last); }
+
+    template<container-compatible-range<value_type> R>
+      constexpr flat_map(from_range_t, R&& rg)
+        : flat_map(from_range, std::forward<R>(rg), key_compare()) { }
+    template<container-compatible-range<value_type> R>
+      constexpr flat_map(from_range_t, R&& rg, const key_compare& comp)
+        : flat_map(comp) { insert_range(std::forward<R>(rg)); }
+
+    constexpr flat_map(initializer_list<value_type> il, const key_compare& comp = key_compare())
+        : flat_map(il.begin(), il.end(), comp) { }
+
+    constexpr flat_map(sorted_unique_t, initializer_list<value_type> il,
+                       const key_compare& comp = key_compare())
+        : flat_map(sorted_unique, il.begin(), il.end(), comp) { }
+
+    // [flat.map.cons.alloc], constructors with allocators
+
+    template<class Alloc>
+      constexpr explicit flat_map(const Alloc& a);
+    template<class Alloc>
+      constexpr flat_map(const key_compare& comp, const Alloc& a);
+    template<class Alloc>
+      constexpr flat_map(const key_container_type& key_cont,
+                         const mapped_container_type& mapped_cont,
+                         const Alloc& a);
+    template<class Alloc>
+      constexpr flat_map(const key_container_type& key_cont,
+                         const mapped_container_type& mapped_cont,
+                         const key_compare& comp, const Alloc& a);
+    template<class Alloc>
+      constexpr flat_map(sorted_unique_t, const key_container_type& key_cont,
+                         const mapped_container_type& mapped_cont, const Alloc& a);
+    template<class Alloc>
+      constexpr flat_map(sorted_unique_t, const key_container_type& key_cont,
+                         const mapped_container_type& mapped_cont, const key_compare& comp,
+                         const Alloc& a);
+    template<class Alloc>
+      constexpr flat_map(const flat_map&, const Alloc& a);
+    template<class Alloc>
+      constexpr flat_map(flat_map&&, const Alloc& a);
+    template<class InputIterator, class Alloc>
+      constexpr flat_map(InputIterator first, InputIterator last, const Alloc& a);
+    template<class InputIterator, class Alloc>
+      constexpr flat_map(InputIterator first, InputIterator last,
+                         const key_compare& comp, const Alloc& a);
+    template<class InputIterator, class Alloc>
+      constexpr flat_map(sorted_unique_t, InputIterator first, InputIterator last,
+                         const Alloc& a);
+    template<class InputIterator, class Alloc>
+      constexpr flat_map(sorted_unique_t, InputIterator first, InputIterator last,
+                         const key_compare& comp, const Alloc& a);
+    template<container-compatible-range<value_type> R, class Alloc>
+      constexpr flat_map(from_range_t, R&& rg, const Alloc& a);
+    template<container-compatible-range<value_type> R, class Alloc>
+      constexpr flat_map(from_range_t, R&& rg, const key_compare& comp, const Alloc& a);
+    template<class Alloc>
+      constexpr flat_map(initializer_list<value_type> il, const Alloc& a);
+    template<class Alloc>
+      constexpr flat_map(initializer_list<value_type> il, const key_compare& comp,
+                         const Alloc& a);
+    template<class Alloc>
+      constexpr flat_map(sorted_unique_t, initializer_list<value_type> il, const Alloc& a);
+    template<class Alloc>
+      constexpr flat_map(sorted_unique_t, initializer_list<value_type> il,
+                         const key_compare& comp, const Alloc& a);
+
+    constexpr flat_map& operator=(initializer_list<value_type>);
+
+    // iterators
+    constexpr iterator               begin() noexcept;
+    constexpr const_iterator         begin() const noexcept;
+    constexpr iterator               end() noexcept;
+    constexpr const_iterator         end() const noexcept;
+
+    constexpr reverse_iterator       rbegin() noexcept;
+    constexpr const_reverse_iterator rbegin() const noexcept;
+    constexpr reverse_iterator       rend() noexcept;
+    constexpr const_reverse_iterator rend() const noexcept;
+
+    constexpr const_iterator         cbegin() const noexcept;
+    constexpr const_iterator         cend() const noexcept;
+    constexpr const_reverse_iterator crbegin() const noexcept;
+    constexpr const_reverse_iterator crend() const noexcept;
+
+    // [flat.map.capacity], capacity
+    constexpr bool empty() const noexcept;
+    constexpr size_type size() const noexcept;
+    constexpr size_type max_size() const noexcept;
+
+    // [flat.map.access], element access
+    constexpr mapped_type& operator[](const key_type& x);
+    constexpr mapped_type& operator[](key_type&& x);
+    template<class K> constexpr mapped_type& operator[](K&& x);
+    constexpr mapped_type& at(const key_type& x);
+    constexpr const mapped_type& at(const key_type& x) const;
+    template<class K> constexpr mapped_type& at(const K& x);
+    template<class K> constexpr const mapped_type& at(const K& x) const;
+
+    // [flat.map.modifiers], modifiers
+    template<class... Args> constexpr pair<iterator, bool> emplace(Args&&... args);
+    template<class... Args>
+      constexpr iterator emplace_hint(const_iterator position, Args&&... args);
+
+    constexpr pair<iterator, bool> insert(const value_type& x)
+      { return emplace(x); }
+    constexpr pair<iterator, bool> insert(value_type&& x)
+      { return emplace(std::move(x)); }
+    constexpr iterator insert(const_iterator position, const value_type& x)
+      { return emplace_hint(position, x); }
+    constexpr iterator insert(const_iterator position, value_type&& x)
+      { return emplace_hint(position, std::move(x)); }
+
+    template<class P> constexpr pair<iterator, bool> insert(P&& x);
+    template<class P>
+      constexpr iterator insert(const_iterator position, P&&);
+    template<class InputIterator>
+      constexpr void insert(InputIterator first, InputIterator last);
+    template<class InputIterator>
+      constexpr void insert(sorted_unique_t, InputIterator first, InputIterator last);
+    template<container-compatible-range<value_type> R>
+      constexpr void insert_range(R&& rg);
+    template<container-compatible-range<value_type> R>
+      constexpr void insert_range(sorted_unique_t, R&& rg);
+
+    constexpr void insert(initializer_list<value_type> il)
+      { insert(il.begin(), il.end()); }
+    constexpr void insert(sorted_unique_t, initializer_list<value_type> il)
+      { insert(sorted_unique, il.begin(), il.end()); }
+
+    constexpr containers extract() &&;
+    constexpr void replace(key_container_type&& key_cont, mapped_container_type&& mapped_cont);
+
+    template<class... Args>
+      constexpr pair<iterator, bool> try_emplace(const key_type& k, Args&&... args);
+    template<class... Args>
+      constexpr pair<iterator, bool> try_emplace(key_type&& k, Args&&... args);
+    template<class K, class... Args>
+      constexpr pair<iterator, bool> try_emplace(K&& k, Args&&... args);
+    template<class... Args>
+      constexpr iterator try_emplace(const_iterator hint, const key_type& k, Args&&... args);
+    template<class... Args>
+      constexpr iterator try_emplace(const_iterator hint, key_type&& k, Args&&... args);
+    template<class K, class... Args>
+      constexpr iterator try_emplace(const_iterator hint, K&& k, Args&&... args);
+    template<class M>
+      constexpr pair<iterator, bool> insert_or_assign(const key_type& k, M&& obj);
+    template<class M>
+      constexpr pair<iterator, bool> insert_or_assign(key_type&& k, M&& obj);
+    template<class K, class M>
+      constexpr pair<iterator, bool> insert_or_assign(K&& k, M&& obj);
+    template<class M>
+      constexpr iterator insert_or_assign(const_iterator hint, const key_type& k, M&& obj);
+    template<class M>
+      constexpr iterator insert_or_assign(const_iterator hint, key_type&& k, M&& obj);
+    template<class K, class M>
+      constexpr iterator insert_or_assign(const_iterator hint, K&& k, M&& obj);
+
+    constexpr iterator erase(iterator position);
+    constexpr iterator erase(const_iterator position);
+    constexpr size_type erase(const key_type& x);
+    template<class K> constexpr size_type erase(K&& x);
+    constexpr iterator erase(const_iterator first, const_iterator last);
+
+    constexpr void swap(flat_map& y) noexcept(see below);
+    constexpr void clear() noexcept;
+
+    // observers
+    constexpr key_compare key_comp() const;
+    constexpr value_compare value_comp() const;
+
+    constexpr const key_container_type& keys() const noexcept      { return c.keys; }
+    constexpr const mapped_container_type& values() const noexcept { return c.values; }
+
+    // map operations
+    constexpr iterator find(const key_type& x);
+    constexpr const_iterator find(const key_type& x) const;
+    template<class K> constexpr iterator find(const K& x);
+    template<class K> constexpr const_iterator find(const K& x) const;
+
+    constexpr size_type count(const key_type& x) const;
+    template<class K> constexpr size_type count(const K& x) const;
+
+    constexpr bool contains(const key_type& x) const;
+    template<class K> constexpr bool contains(const K& x) const;
+
+    constexpr iterator lower_bound(const key_type& x);
+    constexpr const_iterator lower_bound(const key_type& x) const;
+    template<class K> constexpr iterator lower_bound(const K& x);
+    template<class K> constexpr const_iterator lower_bound(const K& x) const;
+
+    constexpr iterator upper_bound(const key_type& x);
+    constexpr const_iterator upper_bound(const key_type& x) const;
+    template<class K> constexpr iterator upper_bound(const K& x);
+    template<class K> constexpr const_iterator upper_bound(const K& x) const;
+
+    constexpr pair<iterator, iterator> equal_range(const key_type& x);
+    constexpr pair<const_iterator, const_iterator> equal_range(const key_type& x) const;
+    template<class K> constexpr pair<iterator, iterator> equal_range(const K& x);
+    template<class K>
+      constexpr pair<const_iterator, const_iterator> equal_range(const K& x) const;
+
+    friend constexpr bool operator==(const flat_map& x, const flat_map& y);
+
+    friend constexpr synth-three-way-result<value_type>
+      operator<=>(const flat_map& x, const flat_map& y);
+
+    friend constexpr void swap(flat_map& x, flat_map& y) noexcept(noexcept(x.swap(y)))
+      { x.swap(y); }
+
+  private:
+    containers c;               // exposition only
+    key_compare compare;        // exposition only
+
+    struct key-equiv {  // exposition only
+      constexpr key-equiv(key_compare c) : comp(c) { }
+      constexpr bool operator()(const_reference x, const_reference y) const {
+        return !comp(x.first, y.first) && !comp(y.first, x.first);
+      }
+      key_compare comp;
+    };
+  };
+
+  template<class KeyContainer, class MappedContainer,
+           class Compare = less<typename KeyContainer::value_type>>
+    flat_map(KeyContainer, MappedContainer, Compare = Compare())
+      -> flat_map<typename KeyContainer::value_type, typename MappedContainer::value_type,
+                  Compare, KeyContainer, MappedContainer>;
+
+  template<class KeyContainer, class MappedContainer, class Allocator>
+    flat_map(KeyContainer, MappedContainer, Allocator)
+      -> flat_map<typename KeyContainer::value_type, typename MappedContainer::value_type,
+                  less<typename KeyContainer::value_type>, KeyContainer, MappedContainer>;
+  template<class KeyContainer, class MappedContainer, class Compare, class Allocator>
+    flat_map(KeyContainer, MappedContainer, Compare, Allocator)
+      -> flat_map<typename KeyContainer::value_type, typename MappedContainer::value_type,
+                  Compare, KeyContainer, MappedContainer>;
+
+  template<class KeyContainer, class MappedContainer,
+           class Compare = less<typename KeyContainer::value_type>>
+    flat_map(sorted_unique_t, KeyContainer, MappedContainer, Compare = Compare())
+      -> flat_map<typename KeyContainer::value_type, typename MappedContainer::value_type,
+                  Compare, KeyContainer, MappedContainer>;
+
+  template<class KeyContainer, class MappedContainer, class Allocator>
+    flat_map(sorted_unique_t, KeyContainer, MappedContainer, Allocator)
+      -> flat_map<typename KeyContainer::value_type, typename MappedContainer::value_type,
+                  less<typename KeyContainer::value_type>, KeyContainer, MappedContainer>;
+  template<class KeyContainer, class MappedContainer, class Compare, class Allocator>
+    flat_map(sorted_unique_t, KeyContainer, MappedContainer, Compare, Allocator)
+      -> flat_map<typename KeyContainer::value_type, typename MappedContainer::value_type,
+                  Compare, KeyContainer, MappedContainer>;
+
+  template<class InputIterator, class Compare = less<iter-key-type<InputIterator>>>
+    flat_map(InputIterator, InputIterator, Compare = Compare())
+      -> flat_map<iter-key-type<InputIterator>, iter-mapped-type<InputIterator>, Compare>;
+
+  template<class InputIterator, class Compare = less<iter-key-type<InputIterator>>>
+    flat_map(sorted_unique_t, InputIterator, InputIterator, Compare = Compare())
+      -> flat_map<iter-key-type<InputIterator>, iter-mapped-type<InputIterator>, Compare>;
+
+  template<ranges::input_range R, class Compare = less<range-key-type<R>>,
+           class Allocator = allocator<byte>>
+    flat_map(from_range_t, R&&, Compare = Compare(), Allocator = Allocator())
+      -> flat_map<range-key-type<R>, range-mapped-type<R>, Compare,
+                  vector<range-key-type<R>, alloc-rebind<Allocator, range-key-type<R>>>,
+                  vector<range-mapped-type<R>, alloc-rebind<Allocator, range-mapped-type<R>>>>;
+
+  template<ranges::input_range R, class Allocator>
+    flat_map(from_range_t, R&&, Allocator)
+      -> flat_map<range-key-type<R>, range-mapped-type<R>, less<range-key-type<R>>,
+                  vector<range-key-type<R>, alloc-rebind<Allocator, range-key-type<R>>>,
+                  vector<range-mapped-type<R>, alloc-rebind<Allocator, range-mapped-type<R>>>>;
+
+  template<class Key, class T, class Compare = less<Key>>
+    flat_map(initializer_list<pair<Key, T>>, Compare = Compare())
+      -> flat_map<Key, T, Compare>;
+
+  template<class Key, class T, class Compare = less<Key>>
+    flat_map(sorted_unique_t, initializer_list<pair<Key, T>>, Compare = Compare())
+        -> flat_map<Key, T, Compare>;
+
   template<class Key, class T, class Compare, class KeyContainer, class MappedContainer,
-           class Allocator>
-    struct uses_allocator<flat_map<Key, T, Compare, KeyContainer, MappedContainer>,
-                          Allocator>;
+            class Allocator>
+    struct uses_allocator<flat_map<Key, T, Compare, KeyContainer, MappedContainer>, Allocator>
+      : bool_constant<uses_allocator_v<KeyContainer, Allocator> &&
+                      uses_allocator_v<MappedContainer, Allocator>> { };
 
   // [flat.map.erasure], erasure for flat_map
   template<class Key, class T, class Compare, class KeyContainer, class MappedContainer,
@@ -36,18 +377,329 @@ namespace std {
     typename flat_map<Key, T, Compare, KeyContainer, MappedContainer>::size_type
       erase_if(flat_map<Key, T, Compare, KeyContainer, MappedContainer>& c, Predicate pred);
 
-  // [flat.multimap], class template flat_multimap
-  template<class Key, class T, class Compare = less<Key>,
-           class KeyContainer = vector<Key>, class MappedContainer = vector<T>>
-    class flat_multimap;
-
   struct sorted_equivalent_t { explicit sorted_equivalent_t() = default; };
   inline constexpr sorted_equivalent_t sorted_equivalent{};
 
+  // [flat.multimap], class template flat_multimap
+  template<class Key, class T, class Compare = less<Key>,
+           class KeyContainer = vector<Key>, class MappedContainer = vector<T>>
+  class flat_multimap {
+  public:
+    // types
+    using key_type               = Key;
+    using mapped_type            = T;
+    using value_type             = pair<key_type, mapped_type>;
+    using key_compare            = Compare;
+    using reference              = pair<const key_type&, mapped_type&>;
+    using const_reference        = pair<const key_type&, const mapped_type&>;
+    using size_type              = size_t;
+    using difference_type        = ptrdiff_t;
+    using iterator               = implementation-defined;     // see [container.requirements]
+    using const_iterator         = implementation-defined;     // see [container.requirements]
+    using reverse_iterator       = std::reverse_iterator<iterator>;
+    using const_reverse_iterator = std::reverse_iterator<const_iterator>;
+    using key_container_type     = KeyContainer;
+    using mapped_container_type  = MappedContainer;
+
+    class value_compare {
+    private:
+      key_compare comp;                                         // exposition only
+      constexpr value_compare(key_compare c) : comp(c) { }      // exposition only
+
+    public:
+      constexpr bool operator()(const_reference x, const_reference y) const {
+        return comp(x.first, y.first);
+      }
+    };
+
+    struct containers {
+      key_container_type keys;
+      mapped_container_type values;
+    };
+
+    // [flat.multimap.cons], constructors
+    constexpr flat_multimap() : flat_multimap(key_compare()) { }
+
+    constexpr flat_multimap(const flat_multimap&);
+    constexpr flat_multimap(flat_multimap&&);
+    constexpr flat_multimap& operator=(const flat_multimap&);
+    constexpr flat_multimap& operator=(flat_multimap&&);
+
+    constexpr explicit flat_multimap(const key_compare& comp)
+      : c(), compare(comp) { }
+
+    constexpr flat_multimap(key_container_type key_cont, mapped_container_type mapped_cont,
+                            const key_compare& comp = key_compare());
+
+    constexpr flat_multimap(sorted_equivalent_t,
+                            key_container_type key_cont, mapped_container_type mapped_cont,
+                  const key_compare& comp = key_compare());
+
+    template<class InputIterator>
+      constexpr flat_multimap(InputIterator first, InputIterator last,
+                              const key_compare& comp = key_compare())
+        : c(), compare(comp)
+        { insert(first, last); }
+
+    template<class InputIterator>
+      constexpr flat_multimap(sorted_equivalent_t, InputIterator first, InputIterator last,
+                              const key_compare& comp = key_compare())
+        : c(), compare(comp) { insert(sorted_equivalent, first, last); }
+
+    template<container-compatible-range<value_type> R>
+      constexpr flat_multimap(from_range_t, R&& rg)
+        : flat_multimap(from_range, std::forward<R>(rg), key_compare()) { }
+    template<container-compatible-range<value_type> R>
+      constexpr flat_multimap(from_range_t, R&& rg, const key_compare& comp)
+        : flat_multimap(comp) { insert_range(std::forward<R>(rg)); }
+
+    constexpr flat_multimap(initializer_list<value_type> il,
+                            const key_compare& comp = key_compare())
+        : flat_multimap(il.begin(), il.end(), comp) { }
+
+    constexpr flat_multimap(sorted_equivalent_t, initializer_list<value_type> il,
+                            const key_compare& comp = key_compare())
+        : flat_multimap(sorted_equivalent, il.begin(), il.end(), comp) { }
+
+    // [flat.multimap.cons.alloc], constructors with allocators
+
+    template<class Alloc>
+      constexpr explicit flat_multimap(const Alloc& a);
+    template<class Alloc>
+      constexpr flat_multimap(const key_compare& comp, const Alloc& a);
+    template<class Alloc>
+      constexpr flat_multimap(const key_container_type& key_cont,
+                              const mapped_container_type& mapped_cont, const Alloc& a);
+    template<class Alloc>
+      constexpr flat_multimap(const key_container_type& key_cont,
+                              const mapped_container_type& mapped_cont,
+                              const key_compare& comp, const Alloc& a);
+    template<class Alloc>
+      constexpr flat_multimap(sorted_equivalent_t, const key_container_type& key_cont,
+                              const mapped_container_type& mapped_cont, const Alloc& a);
+    template<class Alloc>
+      constexpr flat_multimap(sorted_equivalent_t, const key_container_type& key_cont,
+                              const mapped_container_type& mapped_cont,
+                              const key_compare& comp, const Alloc& a);
+    template<class Alloc>
+      constexpr flat_multimap(const flat_multimap&, const Alloc& a);
+    template<class Alloc>
+      constexpr flat_multimap(flat_multimap&&, const Alloc& a);
+    template<class InputIterator, class Alloc>
+      constexpr flat_multimap(InputIterator first, InputIterator last, const Alloc& a);
+    template<class InputIterator, class Alloc>
+      constexpr flat_multimap(InputIterator first, InputIterator last,
+                              const key_compare& comp, const Alloc& a);
+    template<class InputIterator, class Alloc>
+      constexpr flat_multimap(sorted_equivalent_t, InputIterator first, InputIterator last,
+                              const Alloc& a);
+    template<class InputIterator, class Alloc>
+      constexpr flat_multimap(sorted_equivalent_t, InputIterator first, InputIterator last,
+                              const key_compare& comp, const Alloc& a);
+    template<container-compatible-range<value_type> R, class Alloc>
+      constexpr flat_multimap(from_range_t, R&& rg, const Alloc& a);
+    template<container-compatible-range<value_type> R, class Alloc>
+      constexpr flat_multimap(from_range_t, R&& rg, const key_compare& comp, const Alloc& a);
+    template<class Alloc>
+      constexpr flat_multimap(initializer_list<value_type> il, const Alloc& a);
+    template<class Alloc>
+      constexpr flat_multimap(initializer_list<value_type> il, const key_compare& comp,
+                              const Alloc& a);
+    template<class Alloc>
+      constexpr flat_multimap(sorted_equivalent_t, initializer_list<value_type> il,
+                              const Alloc& a);
+    template<class Alloc>
+      constexpr flat_multimap(sorted_equivalent_t, initializer_list<value_type> il,
+                              const key_compare& comp, const Alloc& a);
+
+    flat_multimap& operator=(initializer_list<value_type>);
+
+    // iterators
+    constexpr iterator               begin() noexcept;
+    constexpr const_iterator         begin() const noexcept;
+    constexpr iterator               end() noexcept;
+    constexpr const_iterator         end() const noexcept;
+
+    constexpr reverse_iterator       rbegin() noexcept;
+    constexpr const_reverse_iterator rbegin() const noexcept;
+    constexpr reverse_iterator       rend() noexcept;
+    constexpr const_reverse_iterator rend() const noexcept;
+
+    constexpr const_iterator         cbegin() const noexcept;
+    constexpr const_iterator         cend() const noexcept;
+    constexpr const_reverse_iterator crbegin() const noexcept;
+    constexpr const_reverse_iterator crend() const noexcept;
+
+    // capacity
+    constexpr bool empty() const noexcept;
+    constexpr size_type size() const noexcept;
+    constexpr size_type max_size() const noexcept;
+
+    // modifiers
+    template<class... Args> constexpr iterator emplace(Args&&... args);
+    template<class... Args>
+      constexpr iterator emplace_hint(const_iterator position, Args&&... args);
+
+    constexpr iterator insert(const value_type& x)
+      { return emplace(x); }
+    constexpr iterator insert(value_type&& x)
+      { return emplace(std::move(x)); }
+    constexpr iterator insert(const_iterator position, const value_type& x)
+      { return emplace_hint(position, x); }
+    constexpr iterator insert(const_iterator position, value_type&& x)
+      { return emplace_hint(position, std::move(x)); }
+
+    template<class P> constexpr iterator insert(P&& x);
+    template<class P>
+      constexpr iterator insert(const_iterator position, P&&);
+    template<class InputIterator>
+      constexpr void insert(InputIterator first, InputIterator last);
+    template<class InputIterator>
+      constexpr void insert(sorted_equivalent_t, InputIterator first, InputIterator last);
+    template<container-compatible-range<value_type> R>
+      constexpr void insert_range(R&& rg);
+    template<container-compatible-range<value_type> R>
+      constexpr void insert_range(sorted_equivalent_t, R&& rg);
+
+    constexpr void insert(initializer_list<value_type> il)
+      { insert(il.begin(), il.end()); }
+    constexpr void insert(sorted_equivalent_t, initializer_list<value_type> il)
+      { insert(sorted_equivalent, il.begin(), il.end()); }
+
+    constexpr containers extract() &&;
+    constexpr void replace(key_container_type&& key_cont, mapped_container_type&& mapped_cont);
+
+    constexpr iterator erase(iterator position);
+    constexpr iterator erase(const_iterator position);
+    constexpr size_type erase(const key_type& x);
+    template<class K> constexpr size_type erase(K&& x);
+    constexpr iterator erase(const_iterator first, const_iterator last);
+
+    constexpr void swap(flat_multimap&)
+      noexcept(is_nothrow_swappable_v<key_container_type> &&
+               is_nothrow_swappable_v<mapped_container_type> &&
+               is_nothrow_swappable_v<key_compare>);
+    constexpr void clear() noexcept;
+
+    // observers
+    constexpr key_compare key_comp() const;
+    constexpr value_compare value_comp() const;
+
+    constexpr const key_container_type& keys() const noexcept { return c.keys; }
+    constexpr const mapped_container_type& values() const noexcept { return c.values; }
+
+    // map operations
+    constexpr iterator find(const key_type& x);
+    constexpr const_iterator find(const key_type& x) const;
+    template<class K> constexpr iterator find(const K& x);
+    template<class K> constexpr const_iterator find(const K& x) const;
+
+    constexpr size_type count(const key_type& x) const;
+    template<class K> constexpr size_type count(const K& x) const;
+
+    constexpr bool contains(const key_type& x) const;
+    template<class K> constexpr bool contains(const K& x) const;
+
+    constexpr iterator lower_bound(const key_type& x);
+    constexpr const_iterator lower_bound(const key_type& x) const;
+    template<class K> constexpr iterator lower_bound(const K& x);
+    template<class K> constexpr const_iterator lower_bound(const K& x) const;
+
+    constexpr iterator upper_bound(const key_type& x);
+    constexpr const_iterator upper_bound(const key_type& x) const;
+    template<class K> constexpr iterator upper_bound(const K& x);
+    template<class K> constexpr const_iterator upper_bound(const K& x) const;
+
+    constexpr pair<iterator, iterator> equal_range(const key_type& x);
+    constexpr pair<const_iterator, const_iterator> equal_range(const key_type& x) const;
+    template<class K>
+      constexpr pair<iterator, iterator> equal_range(const K& x);
+    template<class K>
+      constexpr pair<const_iterator, const_iterator> equal_range(const K& x) const;
+
+    friend constexpr bool operator==(const flat_multimap& x, const flat_multimap& y);
+
+    friend constexpr synth-three-way-result<value_type>
+      operator<=>(const flat_multimap& x, const flat_multimap& y);
+
+    friend constexpr void swap(flat_multimap& x, flat_multimap& y)
+      noexcept(noexcept(x.swap(y)))
+      { x.swap(y); }
+
+  private:
+    containers c;               // exposition only
+    key_compare compare;        // exposition only
+  };
+
+  template<class KeyContainer, class MappedContainer,
+           class Compare = less<typename KeyContainer::value_type>>
+    flat_multimap(KeyContainer, MappedContainer, Compare = Compare())
+      -> flat_multimap<typename KeyContainer::value_type, typename MappedContainer::value_type,
+                       Compare, KeyContainer, MappedContainer>;
+
+  template<class KeyContainer, class MappedContainer, class Allocator>
+    flat_multimap(KeyContainer, MappedContainer, Allocator)
+      -> flat_multimap<typename KeyContainer::value_type, typename MappedContainer::value_type,
+                       less<typename KeyContainer::value_type>, KeyContainer, MappedContainer>;
+  template<class KeyContainer, class MappedContainer, class Compare, class Allocator>
+    flat_multimap(KeyContainer, MappedContainer, Compare, Allocator)
+      -> flat_multimap<typename KeyContainer::value_type, typename MappedContainer::value_type,
+                       Compare, KeyContainer, MappedContainer>;
+
+  template<class KeyContainer, class MappedContainer,
+           class Compare = less<typename KeyContainer::value_type>>
+    flat_multimap(sorted_equivalent_t, KeyContainer, MappedContainer, Compare = Compare())
+      -> flat_multimap<typename KeyContainer::value_type, typename MappedContainer::value_type,
+                       Compare, KeyContainer, MappedContainer>;
+
+  template<class KeyContainer, class MappedContainer, class Allocator>
+    flat_multimap(sorted_equivalent_t, KeyContainer, MappedContainer, Allocator)
+      -> flat_multimap<typename KeyContainer::value_type, typename MappedContainer::value_type,
+                       less<typename KeyContainer::value_type>, KeyContainer, MappedContainer>;
+  template<class KeyContainer, class MappedContainer, class Compare, class Allocator>
+    flat_multimap(sorted_equivalent_t, KeyContainer, MappedContainer, Compare, Allocator)
+      -> flat_multimap<typename KeyContainer::value_type, typename MappedContainer::value_type,
+                       Compare, KeyContainer, MappedContainer>;
+
+  template<class InputIterator, class Compare = less<iter-key-type<InputIterator>>>
+    flat_multimap(InputIterator, InputIterator, Compare = Compare())
+      -> flat_multimap<iter-key-type<InputIterator>, iter-mapped-type<InputIterator>, Compare>;
+
+  template<class InputIterator, class Compare = less<iter-key-type<InputIterator>>>
+    flat_multimap(sorted_equivalent_t, InputIterator, InputIterator, Compare = Compare())
+      -> flat_multimap<iter-key-type<InputIterator>, iter-mapped-type<InputIterator>, Compare>;
+
+  template<ranges::input_range R, class Compare = less<range-key-type<R>>,
+           class Allocator = allocator<byte>>
+    flat_multimap(from_range_t, R&&, Compare = Compare(), Allocator = Allocator())
+      -> flat_multimap<range-key-type<R>, range-mapped-type<R>, Compare,
+                       vector<range-key-type<R>,
+                              alloc-rebind<Allocator, range-key-type<R>>>,
+                       vector<range-mapped-type<R>,
+                              alloc-rebind<Allocator, range-mapped-type<R>>>>;
+
+  template<ranges::input_range R, class Allocator>
+    flat_multimap(from_range_t, R&&, Allocator)
+      -> flat_multimap<range-key-type<R>, range-mapped-type<R>, less<range-key-type<R>>,
+                       vector<range-key-type<R>,
+                              alloc-rebind<Allocator, range-key-type<R>>>,
+                       vector<range-mapped-type<R>,
+                              alloc-rebind<Allocator, range-mapped-type<R>>>>;
+
+  template<class Key, class T, class Compare = less<Key>>
+    flat_multimap(initializer_list<pair<Key, T>>, Compare = Compare())
+      -> flat_multimap<Key, T, Compare>;
+
+  template<class Key, class T, class Compare = less<Key>>
+    flat_multimap(sorted_equivalent_t, initializer_list<pair<Key, T>>, Compare = Compare())
+        -> flat_multimap<Key, T, Compare>;
+
   template<class Key, class T, class Compare, class KeyContainer, class MappedContainer,
-           class Allocator>
+            class Allocator>
     struct uses_allocator<flat_multimap<Key, T, Compare, KeyContainer, MappedContainer>,
-                          Allocator>;
+                          Allocator>
+      : bool_constant<uses_allocator_v<KeyContainer, Allocator> &&
+                      uses_allocator_v<MappedContainer, Allocator>> { };
 
   // [flat.multimap.erasure], erasure for flat_multimap
   template<class Key, class T, class Compare, class KeyContainer, class MappedContainer,
diff --git a/lib/libcxx/include/flat_set b/lib/libcxx/include/flat_set
index 66041a42b7..f41e825dbd 100644
--- a/lib/libcxx/include/flat_set
+++ b/lib/libcxx/include/flat_set
@@ -17,30 +17,565 @@
 #include <initializer_list>     // see [initializer.list.syn]
 
 namespace std {
-  // [flat.set], class template flat_set
-  template<class Key, class Compare = less<Key>, class KeyContainer = vector<Key>>
-    class flat_set;
-
   struct sorted_unique_t { explicit sorted_unique_t() = default; };
   inline constexpr sorted_unique_t sorted_unique{};
 
+  // [flat.set], class template flat_set
+  template<class Key, class Compare = less<Key>, class KeyContainer = vector<Key>>
+  class flat_set {
+  public:
+    // types
+    using key_type                  = Key;
+    using value_type                = Key;
+    using key_compare               = Compare;
+    using value_compare             = Compare;
+    using reference                 = value_type&;
+    using const_reference           = const value_type&;
+    using size_type                 = KeyContainer::size_type;
+    using difference_type           = KeyContainer::difference_type;
+    using iterator                  = implementation-defined;  // see [container.requirements]
+    using const_iterator            = implementation-defined;  // see [container.requirements]
+    using reverse_iterator          = std::reverse_iterator<iterator>;
+    using const_reverse_iterator    = std::reverse_iterator<const_iterator>;
+    using container_type            = KeyContainer;
+
+    // [flat.set.cons], constructors
+    constexpr flat_set() : flat_set(key_compare()) { }
+
+    constexpr flat_set(const flat_set&);
+    constexpr flat_set(flat_set&&);
+    constexpr flat_set& operator=(const flat_set&);
+    constexpr flat_set& operator=(flat_set&&);
+
+    constexpr explicit flat_set(const key_compare& comp)
+      : c(), compare(comp) { }
+
+    constexpr explicit flat_set(container_type cont, const key_compare& comp = key_compare());
+
+    constexpr flat_set(sorted_unique_t, container_type cont,
+                       const key_compare& comp = key_compare())
+      : c(std::move(cont)), compare(comp) { }
+
+    template<class InputIterator>
+      constexpr flat_set(InputIterator first, InputIterator last,
+                         const key_compare& comp = key_compare())
+        : c(), compare(comp)
+        { insert(first, last); }
+
+    template<class InputIterator>
+      constexpr flat_set(sorted_unique_t, InputIterator first, InputIterator last,
+               const key_compare& comp = key_compare())
+        : c(first, last), compare(comp) { }
+
+    template<container-compatible-range<value_type> R>
+      constexpr flat_set(from_range_t, R&& rg)
+        : flat_set(from_range, std::forward<R>(rg), key_compare()) { }
+    template<container-compatible-range<value_type> R>
+      constexpr flat_set(from_range_t, R&& rg, const key_compare& comp)
+        : flat_set(comp)
+        { insert_range(std::forward<R>(rg)); }
+
+    constexpr flat_set(initializer_list<value_type> il, const key_compare& comp = key_compare())
+        : flat_set(il.begin(), il.end(), comp) { }
+
+    constexpr flat_set(sorted_unique_t, initializer_list<value_type> il,
+             const key_compare& comp = key_compare())
+        : flat_set(sorted_unique, il.begin(), il.end(), comp) { }
+
+    // [flat.set.cons.alloc], constructors with allocators
+
+    template<class Alloc>
+      constexpr explicit flat_set(const Alloc& a);
+    template<class Alloc>
+      constexpr flat_set(const key_compare& comp, const Alloc& a);
+    template<class Alloc>
+      constexpr flat_set(const container_type& cont, const Alloc& a);
+    template<class Alloc>
+      constexpr flat_set(const container_type& cont, const key_compare& comp, const Alloc& a);
+    template<class Alloc>
+      constexpr flat_set(sorted_unique_t, const container_type& cont, const Alloc& a);
+    template<class Alloc>
+      constexpr flat_set(sorted_unique_t, const container_type& cont,
+                         const key_compare& comp, const Alloc& a);
+    template<class Alloc>
+      constexpr flat_set(const flat_set&, const Alloc& a);
+    template<class Alloc>
+      constexpr flat_set(flat_set&&, const Alloc& a);
+    template<class InputIterator, class Alloc>
+      constexpr flat_set(InputIterator first, InputIterator last, const Alloc& a);
+    template<class InputIterator, class Alloc>
+      constexpr flat_set(InputIterator first, InputIterator last,
+                         const key_compare& comp, const Alloc& a);
+    template<class InputIterator, class Alloc>
+      constexpr flat_set(sorted_unique_t, InputIterator first, InputIterator last,
+                         const Alloc& a);
+    template<class InputIterator, class Alloc>
+      constexpr flat_set(sorted_unique_t, InputIterator first, InputIterator last,
+                         const key_compare& comp, const Alloc& a);
+    template<container-compatible-range<value_type> R, class Alloc>
+      constexpr flat_set(from_range_t, R&& rg, const Alloc& a);
+    template<container-compatible-range<value_type> R, class Alloc>
+      constexpr flat_set(from_range_t, R&& rg, const key_compare& comp, const Alloc& a);
+    template<class Alloc>
+      constexpr flat_set(initializer_list<value_type> il, const Alloc& a);
+    template<class Alloc>
+      constexpr flat_set(initializer_list<value_type> il, const key_compare& comp,
+                         const Alloc& a);
+    template<class Alloc>
+      constexpr flat_set(sorted_unique_t, initializer_list<value_type> il, const Alloc& a);
+    template<class Alloc>
+      constexpr flat_set(sorted_unique_t, initializer_list<value_type> il,
+                         const key_compare& comp, const Alloc& a);
+
+    constexpr flat_set& operator=(initializer_list<value_type>);
+
+    // iterators
+    constexpr iterator               begin() noexcept;
+    constexpr const_iterator         begin() const noexcept;
+    constexpr iterator               end() noexcept;
+    constexpr const_iterator         end() const noexcept;
+
+    constexpr reverse_iterator       rbegin() noexcept;
+    constexpr const_reverse_iterator rbegin() const noexcept;
+    constexpr reverse_iterator       rend() noexcept;
+    constexpr const_reverse_iterator rend() const noexcept;
+
+    constexpr const_iterator         cbegin() const noexcept;
+    constexpr const_iterator         cend() const noexcept;
+    constexpr const_reverse_iterator crbegin() const noexcept;
+    constexpr const_reverse_iterator crend() const noexcept;
+
+    // capacity
+    constexpr bool empty() const noexcept;
+    constexpr size_type size() const noexcept;
+    constexpr size_type max_size() const noexcept;
+
+    // [flat.set.modifiers], modifiers
+    template<class... Args> constexpr pair<iterator, bool> emplace(Args&&... args);
+    template<class... Args>
+      constexpr iterator emplace_hint(const_iterator position, Args&&... args);
+
+    constexpr pair<iterator, bool> insert(const value_type& x)
+      { return emplace(x); }
+    constexpr pair<iterator, bool> insert(value_type&& x)
+      { return emplace(std::move(x)); }
+    template<class K> constexpr pair<iterator, bool> insert(K&& x);
+    constexpr iterator insert(const_iterator position, const value_type& x)
+      { return emplace_hint(position, x); }
+    constexpr iterator insert(const_iterator position, value_type&& x)
+      { return emplace_hint(position, std::move(x)); }
+    template<class K> constexpr iterator insert(const_iterator hint, K&& x);
+
+    template<class InputIterator>
+      constexpr void insert(InputIterator first, InputIterator last);
+    template<class InputIterator>
+      constexpr void insert(sorted_unique_t, InputIterator first, InputIterator last);
+    template<container-compatible-range<value_type> R>
+      constexpr void insert_range(R&& rg);
+    template<container-compatible-range<value_type> R>
+      constexpr void insert_range(sorted_unique_t, R&& rg);
+
+    constexpr void insert(initializer_list<value_type> il)
+      { insert(il.begin(), il.end()); }
+    constexpr void insert(sorted_unique_t, initializer_list<value_type> il)
+      { insert(sorted_unique, il.begin(), il.end()); }
+
+    constexpr container_type extract() &&;
+    constexpr void replace(container_type&&);
+
+    constexpr iterator erase(iterator position) requires (!same_as<iterator, const_iterator>);
+    constexpr iterator erase(const_iterator position);
+    constexpr size_type erase(const key_type& x);
+    template<class K> constexpr size_type erase(K&& x);
+    constexpr iterator erase(const_iterator first, const_iterator last);
+
+    constexpr void swap(flat_set& y) noexcept(see below);
+    constexpr void clear() noexcept;
+
+    // observers
+    constexpr key_compare key_comp() const;
+    constexpr value_compare value_comp() const;
+
+    // set operations
+    constexpr iterator find(const key_type& x);
+    constexpr const_iterator find(const key_type& x) const;
+    template<class K> constexpr iterator find(const K& x);
+    template<class K> constexpr const_iterator find(const K& x) const;
+
+    constexpr size_type count(const key_type& x) const;
+    template<class K> constexpr size_type count(const K& x) const;
+
+    constexpr bool contains(const key_type& x) const;
+    template<class K> constexpr bool contains(const K& x) const;
+
+    constexpr iterator lower_bound(const key_type& x);
+    constexpr const_iterator lower_bound(const key_type& x) const;
+    template<class K> constexpr iterator lower_bound(const K& x);
+    template<class K> constexpr const_iterator lower_bound(const K& x) const;
+
+    constexpr iterator upper_bound(const key_type& x);
+    constexpr const_iterator upper_bound(const key_type& x) const;
+    template<class K> constexpr iterator upper_bound(const K& x);
+    template<class K> constexpr const_iterator upper_bound(const K& x) const;
+
+    constexpr pair<iterator, iterator> equal_range(const key_type& x);
+    constexpr pair<const_iterator, const_iterator> equal_range(const key_type& x) const;
+    template<class K>
+      constexpr pair<iterator, iterator> equal_range(const K& x);
+    template<class K>
+      constexpr pair<const_iterator, const_iterator> equal_range(const K& x) const;
+
+    friend constexpr bool operator==(const flat_set& x, const flat_set& y);
+
+    friend constexpr synth-three-way-result<value_type>
+      operator<=>(const flat_set& x, const flat_set& y);
+
+    friend constexpr void swap(flat_set& x, flat_set& y) noexcept(noexcept(x.swap(y)))
+      { x.swap(y); }
+
+  private:
+    container_type c;           // exposition only
+    key_compare compare;        // exposition only
+  };
+
+  template<class KeyContainer, class Compare = less<typename KeyContainer::value_type>>
+    flat_set(KeyContainer, Compare = Compare())
+      -> flat_set<typename KeyContainer::value_type, Compare, KeyContainer>;
+  template<class KeyContainer, class Allocator>
+    flat_set(KeyContainer, Allocator)
+      -> flat_set<typename KeyContainer::value_type,
+                  less<typename KeyContainer::value_type>, KeyContainer>;
+  template<class KeyContainer, class Compare, class Allocator>
+    flat_set(KeyContainer, Compare, Allocator)
+      -> flat_set<typename KeyContainer::value_type, Compare, KeyContainer>;
+
+  template<class KeyContainer, class Compare = less<typename KeyContainer::value_type>>
+    flat_set(sorted_unique_t, KeyContainer, Compare = Compare())
+      -> flat_set<typename KeyContainer::value_type, Compare, KeyContainer>;
+  template<class KeyContainer, class Allocator>
+    flat_set(sorted_unique_t, KeyContainer, Allocator)
+      -> flat_set<typename KeyContainer::value_type,
+                  less<typename KeyContainer::value_type>, KeyContainer>;
+  template<class KeyContainer, class Compare, class Allocator>
+    flat_set(sorted_unique_t, KeyContainer, Compare, Allocator)
+      -> flat_set<typename KeyContainer::value_type, Compare, KeyContainer>;
+
+  template<class InputIterator, class Compare = less<iter-value-type<InputIterator>>>
+    flat_set(InputIterator, InputIterator, Compare = Compare())
+      -> flat_set<iter-value-type<InputIterator>, Compare>;
+
+  template<class InputIterator, class Compare = less<iter-value-type<InputIterator>>>
+    flat_set(sorted_unique_t, InputIterator, InputIterator, Compare = Compare())
+      -> flat_set<iter-value-type<InputIterator>, Compare>;
+
+  template<ranges::input_range R, class Compare = less<ranges::range_value_t<R>>,
+           class Allocator = allocator<ranges::range_value_t<R>>>
+    flat_set(from_range_t, R&&, Compare = Compare(), Allocator = Allocator())
+      -> flat_set<ranges::range_value_t<R>, Compare,
+                  vector<ranges::range_value_t<R>,
+                         alloc-rebind<Allocator, ranges::range_value_t<R>>>>;
+
+  template<ranges::input_range R, class Allocator>
+    flat_set(from_range_t, R&&, Allocator)
+      -> flat_set<ranges::range_value_t<R>, less<ranges::range_value_t<R>>,
+                  vector<ranges::range_value_t<R>,
+                         alloc-rebind<Allocator, ranges::range_value_t<R>>>>;
+
+  template<class Key, class Compare = less<Key>>
+    flat_set(initializer_list<Key>, Compare = Compare())
+      -> flat_set<Key, Compare>;
+
+  template<class Key, class Compare = less<Key>>
+    flat_set(sorted_unique_t, initializer_list<Key>, Compare = Compare())
+      -> flat_set<Key, Compare>;
+
   template<class Key, class Compare, class KeyContainer, class Allocator>
-    struct uses_allocator<flat_set<Key, Compare, KeyContainer>, Allocator>;
+    struct uses_allocator<flat_set<Key, Compare, KeyContainer>, Allocator>
+      : bool_constant<uses_allocator_v<KeyContainer, Allocator>> { };
 
   // [flat.set.erasure], erasure for flat_set
   template<class Key, class Compare, class KeyContainer, class Predicate>
     typename flat_set<Key, Compare, KeyContainer>::size_type
       erase_if(flat_set<Key, Compare, KeyContainer>& c, Predicate pred);
 
-   // [flat.multiset], class template flat_multiset
-  template<class Key, class Compare = less<Key>, class KeyContainer = vector<Key>>
-    class flat_multiset;
-
   struct sorted_equivalent_t { explicit sorted_equivalent_t() = default; };
   inline constexpr sorted_equivalent_t sorted_equivalent{};
 
+   // [flat.multiset], class template flat_multiset
+  template<class Key, class Compare = less<Key>, class KeyContainer = vector<Key>>
+  class flat_multiset {
+  public:
+    // types
+    using key_type                  = Key;
+    using value_type                = Key;
+    using key_compare               = Compare;
+    using value_compare             = Compare;
+    using reference                 = value_type&;
+    using const_reference           = const value_type&;
+    using size_type                 = KeyContainer::size_type;
+    using difference_type           = KeyContainer::difference_type;
+    using iterator                  = implementation-defined;  // see [container.requirements]
+    using const_iterator            = implementation-defined;  // see [container.requirements]
+    using reverse_iterator          = std::reverse_iterator<iterator>;
+    using const_reverse_iterator    = std::reverse_iterator<const_iterator>;
+    using container_type            = KeyContainer;
+
+    // [flat.multiset.cons], constructors
+    constexpr flat_multiset() : flat_multiset(key_compare()) { }
+
+    constexpr flat_multiset(const flat_multiset&);
+    constexpr flat_multiset(flat_multiset&&);
+    constexpr flat_multiset& operator=(const flat_multiset&);
+    constexpr flat_multiset& operator=(flat_multiset&&);
+
+    constexpr explicit flat_multiset(const key_compare& comp)
+      : c(), compare(comp) { }
+
+    constexpr explicit flat_multiset(container_type cont,
+                                     const key_compare& comp = key_compare());
+
+    constexpr flat_multiset(sorted_equivalent_t, container_type cont,
+                            const key_compare& comp = key_compare())
+      : c(std::move(cont)), compare(comp) { }
+
+    template<class InputIterator>
+      constexpr flat_multiset(InputIterator first, InputIterator last,
+                              const key_compare& comp = key_compare())
+        : c(), compare(comp)
+        { insert(first, last); }
+
+    template<class InputIterator>
+      constexpr flat_multiset(sorted_equivalent_t, InputIterator first, InputIterator last,
+                              const key_compare& comp = key_compare())
+        : c(first, last), compare(comp) { }
+
+    template<container-compatible-range<value_type> R>
+      constexpr flat_multiset(from_range_t, R&& rg)
+        : flat_multiset(from_range, std::forward<R>(rg), key_compare()) { }
+    template<container-compatible-range<value_type> R>
+      constexpr flat_multiset(from_range_t, R&& rg, const key_compare& comp)
+        : flat_multiset(comp)
+        { insert_range(std::forward<R>(rg)); }
+
+    constexpr flat_multiset(initializer_list<value_type> il,
+                            const key_compare& comp = key_compare())
+      : flat_multiset(il.begin(), il.end(), comp) { }
+
+    constexpr flat_multiset(sorted_equivalent_t, initializer_list<value_type> il,
+                            const key_compare& comp = key_compare())
+        : flat_multiset(sorted_equivalent, il.begin(), il.end(), comp) { }
+
+    // [flat.multiset.cons.alloc], constructors with allocators
+
+    template<class Alloc>
+      constexpr explicit flat_multiset(const Alloc& a);
+    template<class Alloc>
+      constexpr flat_multiset(const key_compare& comp, const Alloc& a);
+    template<class Alloc>
+      constexpr flat_multiset(const container_type& cont, const Alloc& a);
+    template<class Alloc>
+      constexpr flat_multiset(const container_type& cont, const key_compare& comp,
+                              const Alloc& a);
+    template<class Alloc>
+      constexpr flat_multiset(sorted_equivalent_t, const container_type& cont, const Alloc& a);
+    template<class Alloc>
+      constexpr flat_multiset(sorted_equivalent_t, const container_type& cont,
+                              const key_compare& comp, const Alloc& a);
+    template<class Alloc>
+      constexpr flat_multiset(const flat_multiset&, const Alloc& a);
+    template<class Alloc>
+      constexpr flat_multiset(flat_multiset&&, const Alloc& a);
+    template<class InputIterator, class Alloc>
+      constexpr flat_multiset(InputIterator first, InputIterator last, const Alloc& a);
+    template<class InputIterator, class Alloc>
+      constexpr flat_multiset(InputIterator first, InputIterator last,
+                              const key_compare& comp, const Alloc& a);
+    template<class InputIterator, class Alloc>
+      constexpr flat_multiset(sorted_equivalent_t, InputIterator first, InputIterator last,
+                              const Alloc& a);
+    template<class InputIterator, class Alloc>
+      constexpr flat_multiset(sorted_equivalent_t, InputIterator first, InputIterator last,
+                              const key_compare& comp, const Alloc& a);
+    template<container-compatible-range<value_type> R, class Alloc>
+      constexpr flat_multiset(from_range_t, R&& rg, const Alloc& a);
+    template<container-compatible-range<value_type> R, class Alloc>
+      constexpr flat_multiset(from_range_t, R&& rg, const key_compare& comp, const Alloc& a);
+    template<class Alloc>
+      constexpr flat_multiset(initializer_list<value_type> il, const Alloc& a);
+    template<class Alloc>
+      constexpr flat_multiset(initializer_list<value_type> il, const key_compare& comp,
+                              const Alloc& a);
+    template<class Alloc>
+      constexpr flat_multiset(sorted_equivalent_t, initializer_list<value_type> il,
+                              const Alloc& a);
+    template<class Alloc>
+      constexpr flat_multiset(sorted_equivalent_t, initializer_list<value_type> il,
+                              const key_compare& comp, const Alloc& a);
+
+    constexpr flat_multiset& operator=(initializer_list<value_type>);
+
+    // iterators
+    constexpr iterator               begin() noexcept;
+    constexpr const_iterator         begin() const noexcept;
+    constexpr iterator               end() noexcept;
+    constexpr const_iterator         end() const noexcept;
+
+    constexpr reverse_iterator       rbegin() noexcept;
+    constexpr const_reverse_iterator rbegin() const noexcept;
+    constexpr reverse_iterator       rend() noexcept;
+    constexpr const_reverse_iterator rend() const noexcept;
+
+    constexpr const_iterator         cbegin() const noexcept;
+    constexpr const_iterator         cend() const noexcept;
+    constexpr const_reverse_iterator crbegin() const noexcept;
+    constexpr const_reverse_iterator crend() const noexcept;
+
+    // capacity
+    constexpr bool empty() const noexcept;
+    constexpr size_type size() const noexcept;
+    constexpr size_type max_size() const noexcept;
+
+    // [flat.multiset.modifiers], modifiers
+    template<class... Args> constexpr iterator emplace(Args&&... args);
+    template<class... Args>
+      constexpr iterator emplace_hint(const_iterator position, Args&&... args);
+
+    constexpr iterator insert(const value_type& x)
+      { return emplace(x); }
+    constexpr iterator insert(value_type&& x)
+      { return emplace(std::move(x)); }
+    constexpr iterator insert(const_iterator position, const value_type& x)
+      { return emplace_hint(position, x); }
+    constexpr iterator insert(const_iterator position, value_type&& x)
+      { return emplace_hint(position, std::move(x)); }
+
+    template<class InputIterator>
+      constexpr void insert(InputIterator first, InputIterator last);
+    template<class InputIterator>
+      constexpr void insert(sorted_equivalent_t, InputIterator first, InputIterator last);
+    template<container-compatible-range<value_type> R>
+      constexpr void insert_range(R&& rg);
+    template<container-compatible-range<value_type> R>
+      constexpr void insert_range(sorted_equivalent_t, R&& rg);
+
+    constexpr void insert(initializer_list<value_type> il)
+      { insert(il.begin(), il.end()); }
+    constexpr void insert(sorted_equivalent_t, initializer_list<value_type> il)
+      { insert(sorted_equivalent, il.begin(), il.end()); }
+
+    constexpr container_type extract() &&;
+    constexpr void replace(container_type&&);
+
+    constexpr iterator erase(iterator position) requires (!same_as<iterator, const_iterator>);
+    constexpr iterator erase(const_iterator position);
+    constexpr size_type erase(const key_type& x);
+    template<class K> constexpr size_type erase(K&& x);
+    constexpr iterator erase(const_iterator first, const_iterator last);
+
+    constexpr void swap(flat_multiset& y) noexcept(see below);
+    constexpr void clear() noexcept;
+
+    // observers
+    constexpr key_compare key_comp() const;
+    constexpr value_compare value_comp() const;
+
+    // set operations
+    constexpr iterator find(const key_type& x);
+    constexpr const_iterator find(const key_type& x) const;
+    template<class K> constexpr iterator find(const K& x);
+    template<class K> constexpr const_iterator find(const K& x) const;
+
+    constexpr size_type count(const key_type& x) const;
+    template<class K> constexpr size_type count(const K& x) const;
+
+    constexpr bool contains(const key_type& x) const;
+    template<class K> constexpr bool contains(const K& x) const;
+
+    constexpr iterator lower_bound(const key_type& x);
+    constexpr const_iterator lower_bound(const key_type& x) const;
+    template<class K> constexpr iterator lower_bound(const K& x);
+    template<class K> constexpr const_iterator lower_bound(const K& x) const;
+
+    constexpr iterator upper_bound(const key_type& x);
+    constexpr const_iterator upper_bound(const key_type& x) const;
+    template<class K> constexpr iterator upper_bound(const K& x);
+    template<class K> constexpr const_iterator upper_bound(const K& x) const;
+
+    constexpr pair<iterator, iterator> equal_range(const key_type& x);
+    constexpr pair<const_iterator, const_iterator> equal_range(const key_type& x) const;
+    template<class K>
+      constexpr pair<iterator, iterator> equal_range(const K& x);
+    template<class K>
+      constexpr pair<const_iterator, const_iterator> equal_range(const K& x) const;
+
+    friend constexpr bool operator==(const flat_multiset& x, const flat_multiset& y);
+
+    friend constexpr synth-three-way-result<value_type>
+      operator<=>(const flat_multiset& x, const flat_multiset& y);
+
+    friend constexpr void swap(flat_multiset& x, flat_multiset& y)
+      noexcept(noexcept(x.swap(y)))
+      { x.swap(y); }
+
+  private:
+    container_type c;           // exposition only
+    key_compare compare;        // exposition only
+  };
+
+  template<class KeyContainer, class Compare = less<typename KeyContainer::value_type>>
+    flat_multiset(KeyContainer, Compare = Compare())
+      -> flat_multiset<typename KeyContainer::value_type, Compare, KeyContainer>;
+  template<class KeyContainer, class Allocator>
+    flat_multiset(KeyContainer, Allocator)
+      -> flat_multiset<typename KeyContainer::value_type,
+                       less<typename KeyContainer::value_type>, KeyContainer>;
+  template<class KeyContainer, class Compare, class Allocator>
+    flat_multiset(KeyContainer, Compare, Allocator)
+      -> flat_multiset<typename KeyContainer::value_type, Compare, KeyContainer>;
+
+  template<class KeyContainer, class Compare = less<typename KeyContainer::value_type>>
+    flat_multiset(sorted_equivalent_t, KeyContainer, Compare = Compare())
+      -> flat_multiset<typename KeyContainer::value_type, Compare, KeyContainer>;
+  template<class KeyContainer, class Allocator>
+    flat_multiset(sorted_equivalent_t, KeyContainer, Allocator)
+      -> flat_multiset<typename KeyContainer::value_type,
+                       less<typename KeyContainer::value_type>, KeyContainer>;
+  template<class KeyContainer, class Compare, class Allocator>
+    flat_multiset(sorted_equivalent_t, KeyContainer, Compare, Allocator)
+      -> flat_multiset<typename KeyContainer::value_type, Compare, KeyContainer>;
+
+  template<class InputIterator, class Compare = less<iter-value-type<InputIterator>>>
+    flat_multiset(InputIterator, InputIterator, Compare = Compare())
+      -> flat_multiset<iter-value-type<InputIterator>, Compare>;
+
+  template<class InputIterator, class Compare = less<iter-value-type<InputIterator>>>
+    flat_multiset(sorted_equivalent_t, InputIterator, InputIterator, Compare = Compare())
+      -> flat_multiset<iter-value-type<InputIterator>, Compare>;
+
+  template<ranges::input_range R, class Compare = less<ranges::range_value_t<R>>,
+           class Allocator = allocator<ranges::range_value_t<R>>>
+    flat_multiset(from_range_t, R&&, Compare = Compare(), Allocator = Allocator())
+      -> flat_multiset<ranges::range_value_t<R>, Compare,
+                       vector<ranges::range_value_t<R>,
+                              alloc-rebind<Allocator, ranges::range_value_t<R>>>>;
+
+  template<ranges::input_range R, class Allocator>
+    flat_multiset(from_range_t, R&&, Allocator)
+      -> flat_multiset<ranges::range_value_t<R>, less<ranges::range_value_t<R>>,
+                       vector<ranges::range_value_t<R>,
+                              alloc-rebind<Allocator, ranges::range_value_t<R>>>>;
+
+  template<class Key, class Compare = less<Key>>
+    flat_multiset(initializer_list<Key>, Compare = Compare())
+      -> flat_multiset<Key, Compare>;
+
+  template<class Key, class Compare = less<Key>>
+  flat_multiset(sorted_equivalent_t, initializer_list<Key>, Compare = Compare())
+      -> flat_multiset<Key, Compare>;
+
   template<class Key, class Compare, class KeyContainer, class Allocator>
-    struct uses_allocator<flat_multiset<Key, Compare, KeyContainer>, Allocator>;
+    struct uses_allocator<flat_multiset<Key, Compare, KeyContainer>, Allocator>
+      : bool_constant<uses_allocator_v<KeyContainer, Allocator>> { };
 
   // [flat.multiset.erasure], erasure for flat_multiset
   template<class Key, class Compare, class KeyContainer, class Predicate>
diff --git a/lib/libcxx/include/float.h b/lib/libcxx/include/float.h
index a6e79db912..9de29ec739 100644
--- a/lib/libcxx/include/float.h
+++ b/lib/libcxx/include/float.h
@@ -71,29 +71,29 @@ Macros:
 */
 
 #if defined(__cplusplus) && __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)
-#  include <__cxx03/float.h>
+#  include <__cxx03/__config>
 #else
 #  include <__config>
+#endif
 
-#  if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
-#    pragma GCC system_header
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+#if __has_include_next(<float.h>)
+#  include_next <float.h>
+#endif
+
+#ifdef __cplusplus
+
+#  ifndef FLT_EVAL_METHOD
+#    define FLT_EVAL_METHOD __FLT_EVAL_METHOD__
 #  endif
 
-#  if __has_include_next(<float.h>)
-#    include_next <float.h>
+#  ifndef DECIMAL_DIG
+#    define DECIMAL_DIG __DECIMAL_DIG__
 #  endif
 
-#  ifdef __cplusplus
-
-#    ifndef FLT_EVAL_METHOD
-#      define FLT_EVAL_METHOD __FLT_EVAL_METHOD__
-#    endif
-
-#    ifndef DECIMAL_DIG
-#      define DECIMAL_DIG __DECIMAL_DIG__
-#    endif
-
-#  endif // __cplusplus
-#endif   // defined(__cplusplus) && __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)
+#endif // __cplusplus
 
 #endif // _LIBCPP_FLOAT_H
diff --git a/lib/libcxx/include/forward_list b/lib/libcxx/include/forward_list
index 6daa7fbbc0..56c45d0d46 100644
--- a/lib/libcxx/include/forward_list
+++ b/lib/libcxx/include/forward_list
@@ -223,18 +223,17 @@ template <class T, class Allocator, class Predicate>
 #  include <__ranges/concepts.h>
 #  include <__ranges/container_compatible_range.h>
 #  include <__ranges/from_range.h>
-#  include <__type_traits/conditional.h>
 #  include <__type_traits/container_traits.h>
 #  include <__type_traits/enable_if.h>
 #  include <__type_traits/is_allocator.h>
 #  include <__type_traits/is_const.h>
 #  include <__type_traits/is_nothrow_assignable.h>
 #  include <__type_traits/is_nothrow_constructible.h>
-#  include <__type_traits/is_pointer.h>
 #  include <__type_traits/is_same.h>
 #  include <__type_traits/is_swappable.h>
 #  include <__type_traits/remove_cv.h>
 #  include <__type_traits/type_identity.h>
+#  include <__utility/exception_guard.h>
 #  include <__utility/forward.h>
 #  include <__utility/move.h>
 #  include <__utility/swap.h>
@@ -283,17 +282,6 @@ struct __forward_node_traits {
   typedef _NodePtr __node_pointer;
   typedef __forward_begin_node<_NodePtr> __begin_node;
   typedef __rebind_pointer_t<_NodePtr, __begin_node> __begin_node_pointer;
-
-// TODO(LLVM 22): Remove this check
-#  ifndef _LIBCPP_ABI_FORWARD_LIST_REMOVE_NODE_POINTER_UB
-  static_assert(sizeof(__begin_node_pointer) == sizeof(__node_pointer) && _LIBCPP_ALIGNOF(__begin_node_pointer) ==
-                    _LIBCPP_ALIGNOF(__node_pointer),
-                "It looks like you are using std::forward_list with a fancy pointer type that thas a different "
-                "representation depending on whether it points to a forward_list base pointer or a forward_list node "
-                "pointer (both of which are implementation details of the standard library). This means that your ABI "
-                "is being broken between LLVM 19 and LLVM 20. If you don't care about your ABI being broken, define "
-                "the _LIBCPP_ABI_FORWARD_LIST_REMOVE_NODE_POINTER_UB macro to silence this diagnostic.");
-#  endif
 };
 
 template <class _NodePtr>
@@ -680,7 +668,7 @@ public:
 #  endif
   _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI forward_list(size_type __n, const value_type& __v);
 
-  template <__enable_if_t<__is_allocator<_Alloc>::value, int> = 0>
+  template <__enable_if_t<__is_allocator_v<_Alloc>, int> = 0>
   _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI
   forward_list(size_type __n, const value_type& __v, const allocator_type& __a)
       : __base(__a) {
@@ -744,50 +732,52 @@ public:
 
   _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void assign(size_type __n, const value_type& __v);
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI allocator_type get_allocator() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI allocator_type get_allocator() const _NOEXCEPT {
     return allocator_type(this->__alloc_);
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI iterator begin() _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI iterator begin() _NOEXCEPT {
     return iterator(__base::__before_begin()->__next_);
   }
-  _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_iterator begin() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_iterator begin() const _NOEXCEPT {
     return const_iterator(__base::__before_begin()->__next_);
   }
-  _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI iterator end() _NOEXCEPT { return iterator(nullptr); }
-  _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_iterator end() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI iterator end() _NOEXCEPT {
+    return iterator(nullptr);
+  }
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_iterator end() const _NOEXCEPT {
     return const_iterator(nullptr);
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_iterator cbegin() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_iterator cbegin() const _NOEXCEPT {
     return const_iterator(__base::__before_begin()->__next_);
   }
-  _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_iterator cend() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_iterator cend() const _NOEXCEPT {
     return const_iterator(nullptr);
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI iterator before_begin() _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI iterator before_begin() _NOEXCEPT {
     return iterator(__base::__before_begin());
   }
-  _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_iterator before_begin() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_iterator before_begin() const _NOEXCEPT {
     return const_iterator(__base::__before_begin());
   }
-  _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_iterator cbefore_begin() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_iterator cbefore_begin() const _NOEXCEPT {
     return const_iterator(__base::__before_begin());
   }
 
   [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT {
     return __base::__before_begin()->__next_ == nullptr;
   }
-  _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT {
     return std::min<size_type>(__node_traits::max_size(this->__alloc_), numeric_limits<difference_type>::max());
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI reference front() {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI reference front() {
     _LIBCPP_ASSERT_NON_NULL(!empty(), "forward_list::front called on an empty list");
     return __base::__before_begin()->__next_->__get_value();
   }
-  _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_reference front() const {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_reference front() const {
     _LIBCPP_ASSERT_NON_NULL(!empty(), "forward_list::front called on an empty list");
     return __base::__before_begin()->__next_->__get_value();
   }
@@ -918,22 +908,22 @@ private:
 
 #  if _LIBCPP_STD_VER >= 17
 template <class _InputIterator,
-          class _Alloc = allocator<__iter_value_type<_InputIterator>>,
+          class _Alloc = allocator<__iterator_value_type<_InputIterator>>,
           class        = enable_if_t<__has_input_iterator_category<_InputIterator>::value>,
-          class        = enable_if_t<__is_allocator<_Alloc>::value> >
-forward_list(_InputIterator, _InputIterator) -> forward_list<__iter_value_type<_InputIterator>, _Alloc>;
+          class        = enable_if_t<__is_allocator_v<_Alloc>>>
+forward_list(_InputIterator, _InputIterator) -> forward_list<__iterator_value_type<_InputIterator>, _Alloc>;
 
 template <class _InputIterator,
           class _Alloc,
           class = enable_if_t<__has_input_iterator_category<_InputIterator>::value>,
-          class = enable_if_t<__is_allocator<_Alloc>::value> >
-forward_list(_InputIterator, _InputIterator, _Alloc) -> forward_list<__iter_value_type<_InputIterator>, _Alloc>;
+          class = enable_if_t<__is_allocator_v<_Alloc>>>
+forward_list(_InputIterator, _InputIterator, _Alloc) -> forward_list<__iterator_value_type<_InputIterator>, _Alloc>;
 #  endif
 
 #  if _LIBCPP_STD_VER >= 23
 template <ranges::input_range _Range,
           class _Alloc = allocator<ranges::range_value_t<_Range>>,
-          class        = enable_if_t<__is_allocator<_Alloc>::value> >
+          class        = enable_if_t<__is_allocator_v<_Alloc>>>
 forward_list(from_range_t, _Range&&, _Alloc = _Alloc()) -> forward_list<ranges::range_value_t<_Range>, _Alloc>;
 #  endif
 
@@ -1180,22 +1170,17 @@ forward_list<_Tp, _Alloc>::__insert_after(const_iterator __p, size_type __n, _Ar
   if (__n > 0) {
     __node_pointer __first = this->__create_node(/* next = */ nullptr, std::forward<_Args>(__args)...);
     __node_pointer __last  = __first;
-#  if _LIBCPP_HAS_EXCEPTIONS
-    try {
-#  endif // _LIBCPP_HAS_EXCEPTIONS
-      for (--__n; __n != 0; --__n, __last = __last->__next_) {
-        __last->__next_ = this->__create_node(/* next = */ nullptr, std::forward<_Args>(__args)...);
-      }
-#  if _LIBCPP_HAS_EXCEPTIONS
-    } catch (...) {
+    auto __guard           = std::__make_exception_guard([&] {
       while (__first != nullptr) {
         __node_pointer __next = __first->__next_;
         this->__delete_node(__first);
         __first = __next;
       }
-      throw;
+    });
+    for (--__n; __n != 0; --__n, __last = __last->__next_) {
+      __last->__next_ = this->__create_node(/* next = */ nullptr, std::forward<_Args>(__args)...);
     }
-#  endif // _LIBCPP_HAS_EXCEPTIONS
+    __guard.__complete();
     __last->__next_ = __r->__next_;
     __r->__next_    = __first;
     __r             = std::__static_fancy_pointer_cast<__begin_node_pointer>(__last);
@@ -1220,22 +1205,17 @@ forward_list<_Tp, _Alloc>::__insert_after_with_sentinel(const_iterator __p, _Inp
     __node_pointer __first = this->__create_node(/* next = */ nullptr, *__f);
     __node_pointer __last  = __first;
 
-#  if _LIBCPP_HAS_EXCEPTIONS
-    try {
-#  endif // _LIBCPP_HAS_EXCEPTIONS
-      for (++__f; __f != __l; ++__f, ((void)(__last = __last->__next_))) {
-        __last->__next_ = this->__create_node(/* next = */ nullptr, *__f);
-      }
-#  if _LIBCPP_HAS_EXCEPTIONS
-    } catch (...) {
+    auto __guard = std::__make_exception_guard([&] {
       while (__first != nullptr) {
         __node_pointer __next = __first->__next_;
         this->__delete_node(__first);
         __first = __next;
       }
-      throw;
+    });
+    for (++__f; __f != __l; ++__f, ((void)(__last = __last->__next_))) {
+      __last->__next_ = this->__create_node(/* next = */ nullptr, *__f);
     }
-#  endif // _LIBCPP_HAS_EXCEPTIONS
+    __guard.__complete();
 
     __last->__next_ = __r->__next_;
     __r->__next_    = __first;
diff --git a/lib/libcxx/include/fstream b/lib/libcxx/include/fstream
index dc5c47304f..ec7b633b86 100644
--- a/lib/libcxx/include/fstream
+++ b/lib/libcxx/include/fstream
@@ -221,7 +221,7 @@ _LIBCPP_PUSH_MACROS
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-#    if _LIBCPP_STD_VER >= 26 && defined(_LIBCPP_WIN32API)
+#    if _LIBCPP_STD_VER >= 23 && defined(_LIBCPP_WIN32API)
 _LIBCPP_EXPORTED_FROM_ABI void* __filebuf_windows_native_handle(FILE* __file) noexcept;
 #    endif
 
@@ -254,7 +254,7 @@ public:
   void swap(basic_filebuf& __rhs);
 
   // 27.9.1.4 Members:
-  _LIBCPP_HIDE_FROM_ABI bool is_open() const;
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool is_open() const;
   basic_filebuf* open(const char* __s, ios_base::openmode __mode);
 #    if _LIBCPP_HAS_OPEN_WITH_WCHAR
   basic_filebuf* open(const wchar_t* __s, ios_base::openmode __mode);
@@ -262,15 +262,14 @@ public:
   _LIBCPP_HIDE_FROM_ABI basic_filebuf* open(const string& __s, ios_base::openmode __mode);
 
 #    if _LIBCPP_STD_VER >= 17
-  _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY _LIBCPP_HIDE_FROM_ABI basic_filebuf*
-  open(const filesystem::path& __p, ios_base::openmode __mode) {
+  _LIBCPP_HIDE_FROM_ABI basic_filebuf* open(const filesystem::path& __p, ios_base::openmode __mode) {
     return open(__p.c_str(), __mode);
   }
 #    endif
   _LIBCPP_HIDE_FROM_ABI basic_filebuf* __open(int __fd, ios_base::openmode __mode);
   basic_filebuf* close();
 #    if _LIBCPP_STD_VER >= 26
-  _LIBCPP_HIDE_FROM_ABI native_handle_type native_handle() const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI native_handle_type native_handle() const noexcept {
     _LIBCPP_ASSERT_UNCATEGORIZED(this->is_open(), "File must be opened");
 #      if defined(_LIBCPP_WIN32API)
     return std::__filebuf_windows_native_handle(__file_);
@@ -299,6 +298,16 @@ protected:
   int sync() override;
   void imbue(const locale& __loc) override;
 
+  _LIBCPP_HIDE_FROM_ABI_VIRTUAL streamsize xsputn(const char_type* __str, streamsize __len) override {
+    if (__always_noconv_ && __len >= (this->epptr() - this->pbase())) {
+      if (traits_type::eq_int_type(overflow(), traits_type::eof()))
+        return 0;
+
+      return std::fwrite(__str, sizeof(char_type), __len, __file_);
+    }
+    return basic_streambuf<_CharT, _Traits>::xsputn(__str, __len);
+  }
+
 private:
   char* __extbuf_;
   const char* __extbufnext_;
@@ -401,6 +410,14 @@ private:
       }
     }
   }
+
+  _LIBCPP_HIDE_FROM_ABI typename traits_type::int_type __overflow_failed() {
+    if (this->pptr() == this->epptr() + 1) {
+      this->pbump(-1); // lose the character we overflowed above -- we don't really have a
+                       // choice since we couldn't commit the contents of the put area
+    }
+    return traits_type::eof();
+  }
 };
 
 template <class _CharT, class _Traits>
@@ -742,7 +759,12 @@ basic_filebuf<_CharT, _Traits>* basic_filebuf<_CharT, _Traits>::close() {
     if (fclose(__h.release()))
       __rt = nullptr;
     __file_ = nullptr;
-    setbuf(0, 0);
+    // Reset the get and the put areas without getting rid of the underlying buffers,
+    // which might have been configured by the user. Make sure to keep the buffers
+    // since the user may re-open the stream.
+    this->setg(nullptr, nullptr, nullptr);
+    this->setp(nullptr, nullptr);
+    __cm_ = __no_io_operations;
   }
   return __rt;
 }
@@ -821,14 +843,6 @@ typename basic_filebuf<_CharT, _Traits>::int_type basic_filebuf<_CharT, _Traits>
 
 template <class _CharT, class _Traits>
 typename basic_filebuf<_CharT, _Traits>::int_type basic_filebuf<_CharT, _Traits>::overflow(int_type __c) {
-  auto __failed = [this]() {
-    if (this->pptr() == this->epptr() + 1) {
-      this->pbump(-1); // lose the character we overflowed above -- we don't really have a
-                       // choice since we couldn't commit the contents of the put area
-    }
-    return traits_type::eof();
-  };
-
   if (__file_ == nullptr)
     return traits_type::eof();
   __write_mode();
@@ -850,7 +864,7 @@ typename basic_filebuf<_CharT, _Traits>::int_type basic_filebuf<_CharT, _Traits>
   if (__always_noconv_) {
     size_t __n = static_cast<size_t>(this->pptr() - this->pbase());
     if (std::fwrite(this->pbase(), sizeof(char_type), __n, __file_) != __n) {
-      return __failed();
+      return __overflow_failed();
     }
   } else {
     if (!__cv_)
@@ -864,14 +878,14 @@ typename basic_filebuf<_CharT, _Traits>::int_type basic_filebuf<_CharT, _Traits>
     do {
       codecvt_base::result __r = __cv_->out(__st_, __b, __p, __end, __extbuf_, __extbuf_ + __ebs_, __extbuf_end);
       if (__end == __b) {
-        return __failed();
+        return __overflow_failed();
       }
 
       // No conversion needed: output characters directly to the file, done.
       if (__r == codecvt_base::noconv) {
         size_t __n = static_cast<size_t>(__p - __b);
         if (std::fwrite(__b, 1, __n, __file_) != __n) {
-          return __failed();
+          return __overflow_failed();
         }
         break;
 
@@ -879,7 +893,7 @@ typename basic_filebuf<_CharT, _Traits>::int_type basic_filebuf<_CharT, _Traits>
       } else if (__r == codecvt_base::ok) {
         size_t __n = static_cast<size_t>(__extbuf_end - __extbuf_);
         if (std::fwrite(__extbuf_, 1, __n, __file_) != __n) {
-          return __failed();
+          return __overflow_failed();
         }
         break;
 
@@ -888,13 +902,13 @@ typename basic_filebuf<_CharT, _Traits>::int_type basic_filebuf<_CharT, _Traits>
       } else if (__r == codecvt_base::partial) {
         size_t __n = static_cast<size_t>(__extbuf_end - __extbuf_);
         if (std::fwrite(__extbuf_, 1, __n, __file_) != __n) {
-          return __failed();
+          return __overflow_failed();
         }
         __b = const_cast<char_type*>(__end);
         continue;
 
       } else {
-        return __failed();
+        return __overflow_failed();
       }
     } while (true);
   }
@@ -977,7 +991,7 @@ template <class _CharT, class _Traits>
 int basic_filebuf<_CharT, _Traits>::__fseek(FILE* __file, pos_type __offset, int __whence) {
 #    if defined(_LIBCPP_MSVCRT_LIKE)
   return _fseeki64(__file, __offset, __whence);
-#    elif defined(_NEWLIB_VERSION)
+#    elif _LIBCPP_LIBC_NEWLIB
   return fseek(__file, __offset, __whence);
 #    else
   return ::fseeko(__file, __offset, __whence);
@@ -988,7 +1002,7 @@ template <class _CharT, class _Traits>
 typename basic_filebuf<_CharT, _Traits>::pos_type basic_filebuf<_CharT, _Traits>::__ftell(FILE* __file) {
 #    if defined(_LIBCPP_MSVCRT_LIKE)
   return _ftelli64(__file);
-#    elif defined(_NEWLIB_VERSION)
+#    elif _LIBCPP_LIBC_NEWLIB
   return ftell(__file);
 #    else
   return ftello(__file);
@@ -1147,27 +1161,27 @@ public:
   _LIBCPP_HIDE_FROM_ABI explicit basic_ifstream(const string& __s, ios_base::openmode __mode = ios_base::in);
 #    if _LIBCPP_STD_VER >= 17
   template <class _Tp, class = enable_if_t<is_same_v<_Tp, filesystem::path>>>
-  _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY
-      _LIBCPP_HIDE_FROM_ABI explicit basic_ifstream(const _Tp& __p, ios_base::openmode __mode = ios_base::in)
+  _LIBCPP_HIDE_FROM_ABI explicit basic_ifstream(const _Tp& __p, ios_base::openmode __mode = ios_base::in)
       : basic_ifstream(__p.c_str(), __mode) {}
 #    endif // _LIBCPP_STD_VER >= 17
   _LIBCPP_HIDE_FROM_ABI basic_ifstream(basic_ifstream&& __rhs);
   _LIBCPP_HIDE_FROM_ABI basic_ifstream& operator=(basic_ifstream&& __rhs);
   _LIBCPP_HIDE_FROM_ABI void swap(basic_ifstream& __rhs);
 
-  _LIBCPP_HIDE_FROM_ABI basic_filebuf<char_type, traits_type>* rdbuf() const;
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI basic_filebuf<char_type, traits_type>* rdbuf() const;
 #    if _LIBCPP_STD_VER >= 26
-  _LIBCPP_HIDE_FROM_ABI native_handle_type native_handle() const noexcept { return rdbuf()->native_handle(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI native_handle_type native_handle() const noexcept {
+    return rdbuf()->native_handle();
+  }
 #    endif
-  _LIBCPP_HIDE_FROM_ABI bool is_open() const;
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool is_open() const;
   void open(const char* __s, ios_base::openmode __mode = ios_base::in);
 #    if _LIBCPP_HAS_OPEN_WITH_WCHAR
   void open(const wchar_t* __s, ios_base::openmode __mode = ios_base::in);
 #    endif
   void open(const string& __s, ios_base::openmode __mode = ios_base::in);
 #    if _LIBCPP_STD_VER >= 17
-  _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY _LIBCPP_HIDE_FROM_ABI void
-  open(const filesystem::path& __p, ios_base::openmode __mode = ios_base::in) {
+  _LIBCPP_HIDE_FROM_ABI void open(const filesystem::path& __p, ios_base::openmode __mode = ios_base::in) {
     return open(__p.c_str(), __mode);
   }
 #    endif // _LIBCPP_STD_VER >= 17
@@ -1304,8 +1318,7 @@ public:
 
 #    if _LIBCPP_STD_VER >= 17
   template <class _Tp, class = enable_if_t<is_same_v<_Tp, filesystem::path>>>
-  _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY
-      _LIBCPP_HIDE_FROM_ABI explicit basic_ofstream(const _Tp& __p, ios_base::openmode __mode = ios_base::out)
+  _LIBCPP_HIDE_FROM_ABI explicit basic_ofstream(const _Tp& __p, ios_base::openmode __mode = ios_base::out)
       : basic_ofstream(__p.c_str(), __mode) {}
 #    endif // _LIBCPP_STD_VER >= 17
 
@@ -1313,11 +1326,13 @@ public:
   _LIBCPP_HIDE_FROM_ABI basic_ofstream& operator=(basic_ofstream&& __rhs);
   _LIBCPP_HIDE_FROM_ABI void swap(basic_ofstream& __rhs);
 
-  _LIBCPP_HIDE_FROM_ABI basic_filebuf<char_type, traits_type>* rdbuf() const;
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI basic_filebuf<char_type, traits_type>* rdbuf() const;
 #    if _LIBCPP_STD_VER >= 26
-  _LIBCPP_HIDE_FROM_ABI native_handle_type native_handle() const noexcept { return rdbuf()->native_handle(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI native_handle_type native_handle() const noexcept {
+    return rdbuf()->native_handle();
+  }
 #    endif
-  _LIBCPP_HIDE_FROM_ABI bool is_open() const;
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool is_open() const;
   void open(const char* __s, ios_base::openmode __mode = ios_base::out);
 #    if _LIBCPP_HAS_OPEN_WITH_WCHAR
   void open(const wchar_t* __s, ios_base::openmode __mode = ios_base::out);
@@ -1325,8 +1340,7 @@ public:
   void open(const string& __s, ios_base::openmode __mode = ios_base::out);
 
 #    if _LIBCPP_STD_VER >= 17
-  _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY _LIBCPP_HIDE_FROM_ABI void
-  open(const filesystem::path& __p, ios_base::openmode __mode = ios_base::out) {
+  _LIBCPP_HIDE_FROM_ABI void open(const filesystem::path& __p, ios_base::openmode __mode = ios_base::out) {
     return open(__p.c_str(), __mode);
   }
 #    endif // _LIBCPP_STD_VER >= 17
@@ -1466,8 +1480,7 @@ public:
 
 #    if _LIBCPP_STD_VER >= 17
   template <class _Tp, class = enable_if_t<is_same_v<_Tp, filesystem::path>>>
-  _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY _LIBCPP_HIDE_FROM_ABI explicit basic_fstream(
-      const _Tp& __p, ios_base::openmode __mode = ios_base::in | ios_base::out)
+  _LIBCPP_HIDE_FROM_ABI explicit basic_fstream(const _Tp& __p, ios_base::openmode __mode = ios_base::in | ios_base::out)
       : basic_fstream(__p.c_str(), __mode) {}
 #    endif // _LIBCPP_STD_VER >= 17
 
@@ -1477,11 +1490,13 @@ public:
 
   _LIBCPP_HIDE_FROM_ABI void swap(basic_fstream& __rhs);
 
-  _LIBCPP_HIDE_FROM_ABI basic_filebuf<char_type, traits_type>* rdbuf() const;
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI basic_filebuf<char_type, traits_type>* rdbuf() const;
 #    if _LIBCPP_STD_VER >= 26
-  _LIBCPP_HIDE_FROM_ABI native_handle_type native_handle() const noexcept { return rdbuf()->native_handle(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI native_handle_type native_handle() const noexcept {
+    return rdbuf()->native_handle();
+  }
 #    endif
-  _LIBCPP_HIDE_FROM_ABI bool is_open() const;
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool is_open() const;
   _LIBCPP_HIDE_FROM_ABI void open(const char* __s, ios_base::openmode __mode = ios_base::in | ios_base::out);
 #    if _LIBCPP_HAS_OPEN_WITH_WCHAR
   void open(const wchar_t* __s, ios_base::openmode __mode = ios_base::in | ios_base::out);
@@ -1489,7 +1504,7 @@ public:
   _LIBCPP_HIDE_FROM_ABI void open(const string& __s, ios_base::openmode __mode = ios_base::in | ios_base::out);
 
 #    if _LIBCPP_STD_VER >= 17
-  _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY _LIBCPP_HIDE_FROM_ABI void
+  _LIBCPP_HIDE_FROM_ABI void
   open(const filesystem::path& __p, ios_base::openmode __mode = ios_base::in | ios_base::out) {
     return open(__p.c_str(), __mode);
   }
diff --git a/lib/libcxx/include/future b/lib/libcxx/include/future
index abdd82dc95..4cd5e8ab2a 100644
--- a/lib/libcxx/include/future
+++ b/lib/libcxx/include/future
@@ -397,12 +397,14 @@ template <class R, class Alloc> struct uses_allocator<packaged_task<R>, Alloc>;
 #    include <__type_traits/decay.h>
 #    include <__type_traits/enable_if.h>
 #    include <__type_traits/invoke.h>
+#    include <__type_traits/is_constructible.h>
 #    include <__type_traits/is_same.h>
 #    include <__type_traits/remove_cvref.h>
 #    include <__type_traits/remove_reference.h>
 #    include <__type_traits/strip_signature.h>
 #    include <__type_traits/underlying_type.h>
 #    include <__utility/auto_cast.h>
+#    include <__utility/exception_guard.h>
 #    include <__utility/forward.h>
 #    include <__utility/move.h>
 #    include <__utility/swap.h>
@@ -477,13 +479,13 @@ inline _LIBCPP_HIDE_FROM_ABI launch& operator^=(launch& __x, launch __y) {
 _LIBCPP_DECLARE_STRONG_ENUM(future_status){ready, timeout, deferred};
 _LIBCPP_DECLARE_STRONG_ENUM_EPILOG(future_status)
 
-_LIBCPP_EXPORTED_FROM_ABI const error_category& future_category() _NOEXCEPT;
+[[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI const error_category& future_category() _NOEXCEPT;
 
-inline _LIBCPP_HIDE_FROM_ABI error_code make_error_code(future_errc __e) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI error_code make_error_code(future_errc __e) _NOEXCEPT {
   return error_code(static_cast<int>(__e), future_category());
 }
 
-inline _LIBCPP_HIDE_FROM_ABI error_condition make_error_condition(future_errc __e) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI error_condition make_error_condition(future_errc __e) _NOEXCEPT {
   return error_condition(static_cast<int>(__e), future_category());
 }
 
@@ -502,7 +504,7 @@ public:
   _LIBCPP_HIDE_FROM_ABI explicit future_error(future_errc __ec) : future_error(std::make_error_code(__ec)) {}
 #    endif
 
-  _LIBCPP_HIDE_FROM_ABI const error_code& code() const _NOEXCEPT { return __ec_; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const error_code& code() const _NOEXCEPT { return __ec_; }
 
   _LIBCPP_HIDE_FROM_ABI future_error(const future_error&) _NOEXCEPT = default;
   ~future_error() _NOEXCEPT override;
@@ -583,12 +585,9 @@ inline future_status __assoc_sub_state::wait_for(const chrono::duration<_Rep, _P
 template <class _Rp>
 class _LIBCPP_HIDDEN __assoc_state : public __assoc_sub_state {
   typedef __assoc_sub_state base;
-  _LIBCPP_SUPPRESS_DEPRECATED_PUSH
-  typedef typename aligned_storage<sizeof(_Rp), _LIBCPP_ALIGNOF(_Rp)>::type _Up;
-  _LIBCPP_SUPPRESS_DEPRECATED_POP
 
 protected:
-  _Up __value_;
+  _ALIGNAS_TYPE(_Rp) char __value_[sizeof(_Rp)];
 
   _LIBCPP_HIDE_FROM_ABI_VIRTUAL void __on_zero_shared() _NOEXCEPT override;
 
@@ -943,15 +942,15 @@ public:
   }
 
   _LIBCPP_HIDE_FROM_ABI ~future();
-  _LIBCPP_HIDE_FROM_ABI shared_future<_Rp> share() _NOEXCEPT;
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI shared_future<_Rp> share() _NOEXCEPT;
 
   // retrieving the value
-  _LIBCPP_HIDE_FROM_ABI _Rp get();
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _Rp get();
 
   _LIBCPP_HIDE_FROM_ABI void swap(future& __rhs) _NOEXCEPT { std::swap(__state_, __rhs.__state_); }
 
   // functions to check state
-  _LIBCPP_HIDE_FROM_ABI bool valid() const _NOEXCEPT { return __state_ != nullptr; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool valid() const _NOEXCEPT { return __state_ != nullptr; }
 
   _LIBCPP_HIDE_FROM_ABI void wait() const { __state_->wait(); }
   template <class _Rep, class _Period>
@@ -1014,15 +1013,15 @@ public:
   }
 
   _LIBCPP_HIDE_FROM_ABI ~future();
-  _LIBCPP_HIDE_FROM_ABI shared_future<_Rp&> share() _NOEXCEPT;
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI shared_future<_Rp&> share() _NOEXCEPT;
 
   // retrieving the value
-  _LIBCPP_HIDE_FROM_ABI _Rp& get();
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _Rp& get();
 
   _LIBCPP_HIDE_FROM_ABI void swap(future& __rhs) _NOEXCEPT { std::swap(__state_, __rhs.__state_); }
 
   // functions to check state
-  _LIBCPP_HIDE_FROM_ABI bool valid() const _NOEXCEPT { return __state_ != nullptr; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool valid() const _NOEXCEPT { return __state_ != nullptr; }
 
   _LIBCPP_HIDE_FROM_ABI void wait() const { __state_->wait(); }
   template <class _Rep, class _Period>
@@ -1089,7 +1088,7 @@ public:
   _LIBCPP_HIDE_FROM_ABI void swap(future& __rhs) _NOEXCEPT { std::swap(__state_, __rhs.__state_); }
 
   // functions to check state
-  _LIBCPP_HIDE_FROM_ABI bool valid() const _NOEXCEPT { return __state_ != nullptr; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool valid() const _NOEXCEPT { return __state_ != nullptr; }
 
   _LIBCPP_HIDE_FROM_ABI void wait() const { __state_->wait(); }
   template <class _Rep, class _Period>
@@ -1139,7 +1138,7 @@ public:
   _LIBCPP_HIDE_FROM_ABI void swap(promise& __rhs) _NOEXCEPT { std::swap(__state_, __rhs.__state_); }
 
   // retrieving the result
-  _LIBCPP_HIDE_FROM_ABI future<_Rp> get_future();
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI future<_Rp> get_future();
 
   // setting the result
   _LIBCPP_HIDE_FROM_ABI void set_value(const _Rp& __r);
@@ -1256,7 +1255,7 @@ public:
   _LIBCPP_HIDE_FROM_ABI void swap(promise& __rhs) _NOEXCEPT { std::swap(__state_, __rhs.__state_); }
 
   // retrieving the result
-  _LIBCPP_HIDE_FROM_ABI future<_Rp&> get_future();
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI future<_Rp&> get_future();
 
   // setting the result
   _LIBCPP_HIDE_FROM_ABI void set_value(_Rp& __r);
@@ -1366,7 +1365,7 @@ public:
   _LIBCPP_HIDE_FROM_ABI void swap(promise& __rhs) _NOEXCEPT { std::swap(__state_, __rhs.__state_); }
 
   // retrieving the result
-  future<void> get_future();
+  [[__nodiscard__]] future<void> get_future();
 
   // setting the result
   void set_value();
@@ -1639,10 +1638,10 @@ public:
     __p_.swap(__other.__p_);
   }
 
-  _LIBCPP_HIDE_FROM_ABI bool valid() const _NOEXCEPT { return __p_.__state_ != nullptr; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool valid() const _NOEXCEPT { return __p_.__state_ != nullptr; }
 
   // result retrieval
-  _LIBCPP_HIDE_FROM_ABI future<_Rp> get_future() { return __p_.get_future(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI future<_Rp> get_future() { return __p_.get_future(); }
 
   // execution
   _LIBCPP_HIDE_FROM_ABI void operator()(_ArgTypes... __args);
@@ -1728,10 +1727,10 @@ public:
     __p_.swap(__other.__p_);
   }
 
-  _LIBCPP_HIDE_FROM_ABI bool valid() const _NOEXCEPT { return __p_.__state_ != nullptr; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool valid() const _NOEXCEPT { return __p_.__state_ != nullptr; }
 
   // result retrieval
-  _LIBCPP_HIDE_FROM_ABI future<void> get_future() { return __p_.get_future(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI future<void> get_future() { return __p_.get_future(); }
 
   // execution
   _LIBCPP_HIDE_FROM_ABI void operator()(_ArgTypes... __args);
@@ -1815,16 +1814,9 @@ template <class _Rp, class _Fp>
 _LIBCPP_HIDE_FROM_ABI future<_Rp> __make_async_assoc_state(_Fp&& __f) {
   unique_ptr<__async_assoc_state<_Rp, _Fp>, __release_shared_count> __h(
       new __async_assoc_state<_Rp, _Fp>(std::forward<_Fp>(__f)));
-#    if _LIBCPP_HAS_EXCEPTIONS
-  try {
-#    endif
-    std::thread(&__async_assoc_state<_Rp, _Fp>::__execute, __h.get()).detach();
-#    if _LIBCPP_HAS_EXCEPTIONS
-  } catch (...) {
-    __h->__make_ready();
-    throw;
-  }
-#    endif
+  auto __guard = std::__make_exception_guard([&] { __h->__make_ready(); });
+  std::thread(&__async_assoc_state<_Rp, _Fp>::__execute, __h.get()).detach();
+  __guard.__complete();
   return future<_Rp>(__h.get());
 }
 
@@ -1837,20 +1829,16 @@ class _LIBCPP_HIDDEN __async_func {
 public:
   using _Rp _LIBCPP_NODEBUG = __invoke_result_t<_Fp, _Args...>;
 
-  _LIBCPP_HIDE_FROM_ABI explicit __async_func(_Fp&& __f, _Args&&... __args)
-      : __f_(std::move(__f), std::move(__args)...) {}
+  template <class _Gp, class... _BArgs>
+  _LIBCPP_HIDE_FROM_ABI explicit __async_func(_Gp&& __g, _BArgs&&... __bargs)
+      : __f_(std::forward<_Gp>(__g), std::forward<_BArgs>(__bargs)...) {}
 
   _LIBCPP_HIDE_FROM_ABI __async_func(__async_func&& __f) : __f_(std::move(__f.__f_)) {}
 
   _LIBCPP_HIDE_FROM_ABI _Rp operator()() {
-    typedef typename __make_tuple_indices<1 + sizeof...(_Args), 1>::type _Index;
-    return __execute(_Index());
-  }
-
-private:
-  template <size_t... _Indices>
-  _LIBCPP_HIDE_FROM_ABI _Rp __execute(__tuple_indices<_Indices...>) {
-    return std::__invoke(std::move(std::get<0>(__f_)), std::move(std::get<_Indices>(__f_))...);
+    return [&]<size_t... _Indices>(__index_sequence<_Indices...>) -> _Rp {
+      return std::__invoke(std::move(std::get<_Indices>(__f_))...);
+    }(__index_sequence_for<_Fp, _Args...>{});
   }
 };
 
@@ -1861,6 +1849,10 @@ inline _LIBCPP_HIDE_FROM_ABI bool __does_policy_contain(launch __policy, launch
 template <class _Fp, class... _Args>
 [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI future<__invoke_result_t<__decay_t<_Fp>, __decay_t<_Args>...> >
 async(launch __policy, _Fp&& __f, _Args&&... __args) {
+  static_assert(is_constructible<__decay_t<_Fp>, _Fp>::value, "");
+  static_assert(_And<is_constructible<__decay_t<_Args>, _Args>...>::value, "");
+  static_assert(__is_invocable_v<__decay_t<_Fp>, __decay_t<_Args>...>, "");
+
   typedef __async_func<__decay_t<_Fp>, __decay_t<_Args>...> _BF;
   typedef typename _BF::_Rp _Rp;
 
@@ -1868,8 +1860,7 @@ async(launch __policy, _Fp&& __f, _Args&&... __args) {
   try {
 #      endif
     if (__does_policy_contain(__policy, launch::async))
-      return std::__make_async_assoc_state<_Rp>(
-          _BF(_LIBCPP_AUTO_CAST(std::forward<_Fp>(__f)), _LIBCPP_AUTO_CAST(std::forward<_Args>(__args))...));
+      return std::__make_async_assoc_state<_Rp>(_BF(std::forward<_Fp>(__f), std::forward<_Args>(__args)...));
 #      if _LIBCPP_HAS_EXCEPTIONS
   } catch (...) {
     if (__policy == launch::async)
@@ -1878,8 +1869,7 @@ async(launch __policy, _Fp&& __f, _Args&&... __args) {
 #      endif
 
   if (__does_policy_contain(__policy, launch::deferred))
-    return std::__make_deferred_assoc_state<_Rp>(
-        _BF(_LIBCPP_AUTO_CAST(std::forward<_Fp>(__f)), _LIBCPP_AUTO_CAST(std::forward<_Args>(__args))...));
+    return std::__make_deferred_assoc_state<_Rp>(_BF(std::forward<_Fp>(__f), std::forward<_Args>(__args)...));
   return future<_Rp>{};
 }
 
@@ -1915,12 +1905,12 @@ public:
   }
 
   // retrieving the value
-  _LIBCPP_HIDE_FROM_ABI const _Rp& get() const { return __state_->copy(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const _Rp& get() const { return __state_->copy(); }
 
   _LIBCPP_HIDE_FROM_ABI void swap(shared_future& __rhs) _NOEXCEPT { std::swap(__state_, __rhs.__state_); }
 
   // functions to check state
-  _LIBCPP_HIDE_FROM_ABI bool valid() const _NOEXCEPT { return __state_ != nullptr; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool valid() const _NOEXCEPT { return __state_ != nullptr; }
 
   _LIBCPP_HIDE_FROM_ABI void wait() const { __state_->wait(); }
   template <class _Rep, class _Period>
@@ -1971,12 +1961,12 @@ public:
   }
 
   // retrieving the value
-  _LIBCPP_HIDE_FROM_ABI _Rp& get() const { return __state_->copy(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _Rp& get() const { return __state_->copy(); }
 
   _LIBCPP_HIDE_FROM_ABI void swap(shared_future& __rhs) _NOEXCEPT { std::swap(__state_, __rhs.__state_); }
 
   // functions to check state
-  _LIBCPP_HIDE_FROM_ABI bool valid() const _NOEXCEPT { return __state_ != nullptr; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool valid() const _NOEXCEPT { return __state_ != nullptr; }
 
   _LIBCPP_HIDE_FROM_ABI void wait() const { __state_->wait(); }
   template <class _Rep, class _Period>
@@ -2032,7 +2022,7 @@ public:
   _LIBCPP_HIDE_FROM_ABI void swap(shared_future& __rhs) _NOEXCEPT { std::swap(__state_, __rhs.__state_); }
 
   // functions to check state
-  _LIBCPP_HIDE_FROM_ABI bool valid() const _NOEXCEPT { return __state_ != nullptr; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool valid() const _NOEXCEPT { return __state_ != nullptr; }
 
   _LIBCPP_HIDE_FROM_ABI void wait() const { __state_->wait(); }
   template <class _Rep, class _Period>
diff --git a/lib/libcxx/include/initializer_list b/lib/libcxx/include/initializer_list
index 00e0d4ea4a..44cd456683 100644
--- a/lib/libcxx/include/initializer_list
+++ b/lib/libcxx/include/initializer_list
@@ -78,11 +78,17 @@ public:
 
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 initializer_list() _NOEXCEPT : __begin_(nullptr), __size_(0) {}
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 size_t size() const _NOEXCEPT { return __size_; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 size_t size() const _NOEXCEPT {
+    return __size_;
+  }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const _Ep* begin() const _NOEXCEPT { return __begin_; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const _Ep* begin() const _NOEXCEPT {
+    return __begin_;
+  }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const _Ep* end() const _NOEXCEPT { return __begin_ + __size_; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const _Ep* end() const _NOEXCEPT {
+    return __begin_ + __size_;
+  }
 };
 
 template <class _Ep>
diff --git a/lib/libcxx/include/inttypes.h b/lib/libcxx/include/inttypes.h
index 1869284550..cd487a8408 100644
--- a/lib/libcxx/include/inttypes.h
+++ b/lib/libcxx/include/inttypes.h
@@ -236,33 +236,33 @@ uintmax_t wcstoumax(const wchar_t* restrict nptr, wchar_t** restrict endptr, int
 */
 
 #if defined(__cplusplus) && __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)
-#  include <__cxx03/inttypes.h>
+#  include <__cxx03/__config>
 #else
 #  include <__config>
+#endif
 
-#  if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
-#    pragma GCC system_header
-#  endif
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
 
 /* C99 stdlib (e.g. glibc < 2.18) does not provide format macros needed
    for C++11 unless __STDC_FORMAT_MACROS is defined
 */
-#  if defined(__cplusplus) && !defined(__STDC_FORMAT_MACROS)
-#    define __STDC_FORMAT_MACROS
-#  endif
+#if defined(__cplusplus) && !defined(__STDC_FORMAT_MACROS)
+#  define __STDC_FORMAT_MACROS
+#endif
 
-#  if __has_include_next(<inttypes.h>)
-#    include_next <inttypes.h>
-#  endif
+#if __has_include_next(<inttypes.h>)
+#  include_next <inttypes.h>
+#endif
 
-#  ifdef __cplusplus
+#ifdef __cplusplus
 
-#    include <stdint.h>
+#  include <stdint.h>
 
-#    undef imaxabs
-#    undef imaxdiv
+#  undef imaxabs
+#  undef imaxdiv
 
-#  endif // __cplusplus
-#endif   // defined(__cplusplus) && __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)
+#endif // __cplusplus
 
 #endif // _LIBCPP_INTTYPES_H
diff --git a/lib/libcxx/include/ios b/lib/libcxx/include/ios
index 78fc9d6109..9cf0aa8998 100644
--- a/lib/libcxx/include/ios
+++ b/lib/libcxx/include/ios
@@ -305,25 +305,25 @@ public:
   class _LIBCPP_EXPORTED_FROM_ABI Init;
 
   // 27.5.2.2 fmtflags state:
-  _LIBCPP_HIDE_FROM_ABI fmtflags flags() const;
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI fmtflags flags() const;
   _LIBCPP_HIDE_FROM_ABI fmtflags flags(fmtflags __fmtfl);
   _LIBCPP_HIDE_FROM_ABI fmtflags setf(fmtflags __fmtfl);
   _LIBCPP_HIDE_FROM_ABI fmtflags setf(fmtflags __fmtfl, fmtflags __mask);
   _LIBCPP_HIDE_FROM_ABI void unsetf(fmtflags __mask);
 
-  _LIBCPP_HIDE_FROM_ABI streamsize precision() const;
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI streamsize precision() const;
   _LIBCPP_HIDE_FROM_ABI streamsize precision(streamsize __prec);
-  _LIBCPP_HIDE_FROM_ABI streamsize width() const;
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI streamsize width() const;
   _LIBCPP_HIDE_FROM_ABI streamsize width(streamsize __wide);
 
   // 27.5.2.3 locales:
   locale imbue(const locale& __loc);
-  locale getloc() const;
+  [[__nodiscard__]] locale getloc() const;
 
   // 27.5.2.5 storage:
-  static int xalloc();
-  long& iword(int __index);
-  void*& pword(int __index);
+  [[__nodiscard__]] static int xalloc();
+  [[__nodiscard__]] long& iword(int __index);
+  [[__nodiscard__]] void*& pword(int __index);
 
   // destructor
   virtual ~ios_base();
@@ -338,16 +338,16 @@ public:
 
   static bool sync_with_stdio(bool __sync = true);
 
-  _LIBCPP_HIDE_FROM_ABI iostate rdstate() const;
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iostate rdstate() const;
   void clear(iostate __state = goodbit);
   _LIBCPP_HIDE_FROM_ABI void setstate(iostate __state);
 
-  _LIBCPP_HIDE_FROM_ABI bool good() const;
-  _LIBCPP_HIDE_FROM_ABI bool eof() const;
-  _LIBCPP_HIDE_FROM_ABI bool fail() const;
-  _LIBCPP_HIDE_FROM_ABI bool bad() const;
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool good() const;
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool eof() const;
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool fail() const;
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool bad() const;
 
-  _LIBCPP_HIDE_FROM_ABI iostate exceptions() const;
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iostate exceptions() const;
   _LIBCPP_HIDE_FROM_ABI void exceptions(iostate __iostate);
 
   void __set_badbit_and_consider_rethrow();
@@ -425,13 +425,13 @@ template <>
 struct is_error_code_enum<io_errc::__lx> : public true_type {};
 #    endif
 
-_LIBCPP_EXPORTED_FROM_ABI const error_category& iostream_category() _NOEXCEPT;
+[[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI const error_category& iostream_category() _NOEXCEPT;
 
-inline _LIBCPP_HIDE_FROM_ABI error_code make_error_code(io_errc __e) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI error_code make_error_code(io_errc __e) _NOEXCEPT {
   return error_code(static_cast<int>(__e), iostream_category());
 }
 
-inline _LIBCPP_HIDE_FROM_ABI error_condition make_error_condition(io_errc __e) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI error_condition make_error_condition(io_errc __e) _NOEXCEPT {
   return error_condition(static_cast<int>(__e), iostream_category());
 }
 
@@ -580,16 +580,16 @@ public:
   _LIBCPP_HIDE_FROM_ABI explicit operator bool() const { return !fail(); }
 #    endif
 
-  _LIBCPP_HIDE_FROM_ABI bool operator!() const { return fail(); }
-  _LIBCPP_HIDE_FROM_ABI iostate rdstate() const { return ios_base::rdstate(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool operator!() const { return fail(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iostate rdstate() const { return ios_base::rdstate(); }
   _LIBCPP_HIDE_FROM_ABI void clear(iostate __state = goodbit) { ios_base::clear(__state); }
   _LIBCPP_HIDE_FROM_ABI void setstate(iostate __state) { ios_base::setstate(__state); }
-  _LIBCPP_HIDE_FROM_ABI bool good() const { return ios_base::good(); }
-  _LIBCPP_HIDE_FROM_ABI bool eof() const { return ios_base::eof(); }
-  _LIBCPP_HIDE_FROM_ABI bool fail() const { return ios_base::fail(); }
-  _LIBCPP_HIDE_FROM_ABI bool bad() const { return ios_base::bad(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool good() const { return ios_base::good(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool eof() const { return ios_base::eof(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool fail() const { return ios_base::fail(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool bad() const { return ios_base::bad(); }
 
-  _LIBCPP_HIDE_FROM_ABI iostate exceptions() const { return ios_base::exceptions(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iostate exceptions() const { return ios_base::exceptions(); }
   _LIBCPP_HIDE_FROM_ABI void exceptions(iostate __iostate) { ios_base::exceptions(__iostate); }
 
   // 27.5.4.1 Constructor/destructor:
@@ -597,21 +597,21 @@ public:
   ~basic_ios() override;
 
   // 27.5.4.2 Members:
-  _LIBCPP_HIDE_FROM_ABI basic_ostream<char_type, traits_type>* tie() const;
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI basic_ostream<char_type, traits_type>* tie() const;
   _LIBCPP_HIDE_FROM_ABI basic_ostream<char_type, traits_type>* tie(basic_ostream<char_type, traits_type>* __tiestr);
 
-  _LIBCPP_HIDE_FROM_ABI basic_streambuf<char_type, traits_type>* rdbuf() const;
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI basic_streambuf<char_type, traits_type>* rdbuf() const;
   _LIBCPP_HIDE_FROM_ABI basic_streambuf<char_type, traits_type>* rdbuf(basic_streambuf<char_type, traits_type>* __sb);
 
   basic_ios& copyfmt(const basic_ios& __rhs);
 
-  _LIBCPP_HIDE_FROM_ABI char_type fill() const;
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI char_type fill() const;
   _LIBCPP_HIDE_FROM_ABI char_type fill(char_type __ch);
 
   _LIBCPP_HIDE_FROM_ABI locale imbue(const locale& __loc);
 
-  _LIBCPP_HIDE_FROM_ABI char narrow(char_type __c, char __dfault) const;
-  _LIBCPP_HIDE_FROM_ABI char_type widen(char __c) const;
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI char narrow(char_type __c, char __dfault) const;
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI char_type widen(char __c) const;
 
 protected:
   _LIBCPP_HIDE_FROM_ABI basic_ios() {
diff --git a/lib/libcxx/include/istream b/lib/libcxx/include/istream
index 93def61a8b..dfa22e9f3b 100644
--- a/lib/libcxx/include/istream
+++ b/lib/libcxx/include/istream
@@ -70,6 +70,7 @@ public:
     basic_istream& getline(char_type* s, streamsize n, char_type delim);
 
     basic_istream& ignore(streamsize n = 1, int_type delim = traits_type::eof());
+    basic_istream& ignore(streamsize n, char_type delim);                         // Since C++26, implemented as a DR
     int_type peek();
     basic_istream& read (char_type* s, streamsize n);
     streamsize readsome(char_type* s, streamsize n);
@@ -172,6 +173,7 @@ template <class Stream, class T>
 #    include <__type_traits/conjunction.h>
 #    include <__type_traits/enable_if.h>
 #    include <__type_traits/is_base_of.h>
+#    include <__type_traits/is_same.h>
 #    include <__type_traits/make_unsigned.h>
 #    include <__utility/declval.h>
 #    include <__utility/forward.h>
@@ -207,7 +209,7 @@ public:
   typedef typename traits_type::off_type off_type;
 
   // 27.7.1.1.1 Constructor/destructor:
-  inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 explicit basic_istream(basic_streambuf<char_type, traits_type>* __sb)
+  inline _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 explicit basic_istream(basic_streambuf<char_type, traits_type>* __sb)
       : __gc_(0) {
     this->init(__sb);
   }
@@ -219,7 +221,7 @@ protected:
   // 27.7.1.1.2 Assign/swap:
   inline _LIBCPP_HIDE_FROM_ABI basic_istream& operator=(basic_istream&& __rhs);
 
-  inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 void swap(basic_istream& __rhs) {
+  inline _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 void swap(basic_istream& __rhs) {
     std::swap(__gc_, __rhs.__gc_);
     basic_ios<char_type, traits_type>::swap(__rhs);
   }
@@ -232,17 +234,17 @@ public:
   class sentry;
 
   // 27.7.1.2 Formatted input:
-  inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 basic_istream& operator>>(basic_istream& (*__pf)(basic_istream&)) {
+  inline _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 basic_istream& operator>>(basic_istream& (*__pf)(basic_istream&)) {
     return __pf(*this);
   }
 
-  inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 basic_istream&
+  inline _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 basic_istream&
   operator>>(basic_ios<char_type, traits_type>& (*__pf)(basic_ios<char_type, traits_type>&)) {
     __pf(*this);
     return *this;
   }
 
-  inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 basic_istream& operator>>(ios_base& (*__pf)(ios_base&)) {
+  inline _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 basic_istream& operator>>(ios_base& (*__pf)(ios_base&)) {
     __pf(*this);
     return *this;
   }
@@ -263,35 +265,39 @@ public:
   basic_istream& operator>>(void*& __p);
 
   // 27.7.1.3 Unformatted input:
-  _LIBCPP_HIDE_FROM_ABI streamsize gcount() const { return __gc_; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI streamsize gcount() const { return __gc_; }
   int_type get();
 
-  inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 basic_istream& get(char_type& __c) {
+  inline _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 basic_istream& get(char_type& __c) {
     int_type __ch = get();
     if (__ch != traits_type::eof())
       __c = traits_type::to_char_type(__ch);
     return *this;
   }
 
-  inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 basic_istream& get(char_type* __s, streamsize __n) {
+  inline _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 basic_istream& get(char_type* __s, streamsize __n) {
     return get(__s, __n, this->widen('\n'));
   }
 
   basic_istream& get(char_type* __s, streamsize __n, char_type __dlm);
 
-  inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 basic_istream& get(basic_streambuf<char_type, traits_type>& __sb) {
+  inline _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 basic_istream& get(basic_streambuf<char_type, traits_type>& __sb) {
     return get(__sb, this->widen('\n'));
   }
 
   basic_istream& get(basic_streambuf<char_type, traits_type>& __sb, char_type __dlm);
 
-  inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 basic_istream& getline(char_type* __s, streamsize __n) {
+  inline _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 basic_istream& getline(char_type* __s, streamsize __n) {
     return getline(__s, __n, this->widen('\n'));
   }
 
   basic_istream& getline(char_type* __s, streamsize __n, char_type __dlm);
 
   basic_istream& ignore(streamsize __n = 1, int_type __dlm = traits_type::eof());
+  template <class _Tp = char_type, __enable_if_t<is_same<_Tp, char>::value, int> = 0>
+  _LIBCPP_HIDE_FROM_ABI basic_istream& ignore(streamsize __n, char_type __delim) {
+    return ignore(__n, traits_type::to_int_type(__delim));
+  }
   int_type peek();
   basic_istream& read(char_type* __s, streamsize __n);
   streamsize readsome(char_type* __s, streamsize __n);
@@ -300,7 +306,7 @@ public:
   basic_istream& unget();
   int sync();
 
-  pos_type tellg();
+  [[__nodiscard__]] pos_type tellg();
   basic_istream& seekg(pos_type __pos);
   basic_istream& seekg(off_type __off, ios_base::seekdir __dir);
 };
@@ -1178,7 +1184,7 @@ public:
   typedef typename traits_type::off_type off_type;
 
   // constructor/destructor
-  inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 explicit basic_iostream(basic_streambuf<char_type, traits_type>* __sb)
+  inline _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 explicit basic_iostream(basic_streambuf<char_type, traits_type>* __sb)
       : basic_istream<_CharT, _Traits>(__sb) {}
 
   ~basic_iostream() override;
@@ -1189,7 +1195,7 @@ protected:
   // assign/swap
   inline _LIBCPP_HIDE_FROM_ABI basic_iostream& operator=(basic_iostream&& __rhs);
 
-  inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 void swap(basic_iostream& __rhs) {
+  inline _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 void swap(basic_iostream& __rhs) {
     basic_istream<char_type, traits_type>::swap(__rhs);
   }
 };
diff --git a/lib/libcxx/include/iterator b/lib/libcxx/include/iterator
index d25fdfd2a8..fc8bdc5e6b 100644
--- a/lib/libcxx/include/iterator
+++ b/lib/libcxx/include/iterator
@@ -737,6 +737,16 @@ template <class E> constexpr const E* data(initializer_list<E> il) noexcept;
 #  include <compare>
 #  include <concepts>
 
+// [range.access.general]
+#  if _LIBCPP_STD_VER >= 20
+#    include <__ranges/access.h>
+#    include <__ranges/data.h>
+#    include <__ranges/empty.h>
+#    include <__ranges/rbegin.h>
+#    include <__ranges/rend.h>
+#    include <__ranges/size.h>
+#  endif
+
 #  if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #    pragma GCC system_header
 #  endif
diff --git a/lib/libcxx/include/latch b/lib/libcxx/include/latch
index c98205b150..33268d9655 100644
--- a/lib/libcxx/include/latch
+++ b/lib/libcxx/include/latch
@@ -70,7 +70,9 @@ class latch {
   atomic<ptrdiff_t> __a_;
 
 public:
-  static _LIBCPP_HIDE_FROM_ABI constexpr ptrdiff_t max() noexcept { return numeric_limits<ptrdiff_t>::max(); }
+  [[nodiscard]] static _LIBCPP_HIDE_FROM_ABI constexpr ptrdiff_t max() noexcept {
+    return numeric_limits<ptrdiff_t>::max();
+  }
 
   inline _LIBCPP_HIDE_FROM_ABI constexpr explicit latch(ptrdiff_t __expected) : __a_(__expected) {
     _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
@@ -87,7 +89,7 @@ public:
   latch(const latch&)            = delete;
   latch& operator=(const latch&) = delete;
 
-  inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void count_down(ptrdiff_t __update = 1) {
+  inline _LIBCPP_HIDE_FROM_ABI void count_down(ptrdiff_t __update = 1) {
     _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(__update >= 0, "latch::count_down called with a negative value");
     auto const __old = __a_.fetch_sub(__update, memory_order_release);
     _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
@@ -97,16 +99,16 @@ public:
     if (__old == __update)
       __a_.notify_all();
   }
-  inline _LIBCPP_HIDE_FROM_ABI bool try_wait() const noexcept {
+  [[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI bool try_wait() const noexcept {
     auto __value = __a_.load(memory_order_acquire);
     return try_wait_impl(__value);
   }
-  inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait() const {
+  inline _LIBCPP_HIDE_FROM_ABI void wait() const {
     std::__atomic_wait_unless(__a_, memory_order_acquire, [this](ptrdiff_t& __value) -> bool {
       return try_wait_impl(__value);
     });
   }
-  inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_wait(ptrdiff_t __update = 1) {
+  inline _LIBCPP_HIDE_FROM_ABI void arrive_and_wait(ptrdiff_t __update = 1) {
     _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(__update >= 0, "latch::arrive_and_wait called with a negative value");
     // other preconditions on __update are checked in count_down()
 
diff --git a/lib/libcxx/include/limits b/lib/libcxx/include/limits
index 1205e6a0c2..ff40d2051d 100644
--- a/lib/libcxx/include/limits
+++ b/lib/libcxx/include/limits
@@ -107,7 +107,7 @@ template<> class numeric_limits<cv long double>;
 #else
 #  include <__config>
 #  include <__type_traits/is_arithmetic.h>
-#  include <__type_traits/is_signed.h>
+#  include <__type_traits/is_same.h>
 
 #  if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #    pragma GCC system_header
@@ -217,10 +217,10 @@ protected:
 
   static _LIBCPP_CONSTEXPR const bool is_iec559  = false;
   static _LIBCPP_CONSTEXPR const bool is_bounded = true;
-  static _LIBCPP_CONSTEXPR const bool is_modulo  = !std::is_signed<_Tp>::value;
+  static _LIBCPP_CONSTEXPR const bool is_modulo  = !is_signed;
 
-#  if defined(__i386__) || defined(__x86_64__) || defined(__pnacl__) || defined(__wasm__)
-  static _LIBCPP_CONSTEXPR const bool traps = true;
+#  if defined(__i386__) || defined(__x86_64__) || defined(__wasm__)
+  static _LIBCPP_CONSTEXPR const bool traps = is_same<decltype(+_Tp(0)), _Tp>::value;
 #  else
   static _LIBCPP_CONSTEXPR const bool traps = false;
 #  endif
diff --git a/lib/libcxx/include/list b/lib/libcxx/include/list
index 2896231203..a5c84cad51 100644
--- a/lib/libcxx/include/list
+++ b/lib/libcxx/include/list
@@ -228,15 +228,14 @@ template <class T, class Allocator, class Predicate>
 #  include <__ranges/concepts.h>
 #  include <__ranges/container_compatible_range.h>
 #  include <__ranges/from_range.h>
-#  include <__type_traits/conditional.h>
 #  include <__type_traits/container_traits.h>
 #  include <__type_traits/enable_if.h>
 #  include <__type_traits/is_allocator.h>
 #  include <__type_traits/is_nothrow_assignable.h>
 #  include <__type_traits/is_nothrow_constructible.h>
-#  include <__type_traits/is_pointer.h>
 #  include <__type_traits/is_same.h>
 #  include <__type_traits/type_identity.h>
+#  include <__utility/exception_guard.h>
 #  include <__utility/forward.h>
 #  include <__utility/move.h>
 #  include <__utility/swap.h>
@@ -275,17 +274,6 @@ template <class _Tp, class _VoidPtr>
 struct __list_node_pointer_traits {
   typedef __rebind_pointer_t<_VoidPtr, __list_node<_Tp, _VoidPtr> > __node_pointer;
   typedef __rebind_pointer_t<_VoidPtr, __list_node_base<_Tp, _VoidPtr> > __base_pointer;
-
-// TODO(LLVM 22): Remove this check
-#  ifndef _LIBCPP_ABI_LIST_REMOVE_NODE_POINTER_UB
-  static_assert(sizeof(__node_pointer) == sizeof(__node_pointer) && _LIBCPP_ALIGNOF(__base_pointer) ==
-                    _LIBCPP_ALIGNOF(__node_pointer),
-                "It looks like you are using std::list with a fancy pointer type that thas a different representation "
-                "depending on whether it points to a list base pointer or a list node pointer (both of which are "
-                "implementation details of the standard library). This means that your ABI is being broken between "
-                "LLVM 19 and LLVM 20. If you don't care about your ABI being broken, define the "
-                "_LIBCPP_ABI_LIST_REMOVE_NODE_POINTER_UB macro to silence this diagnostic.");
-#  endif
 };
 
 template <class _Tp, class _VoidPtr>
@@ -724,7 +712,7 @@ public:
   _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI explicit list(size_type __n, const allocator_type& __a);
 #  endif
   _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI list(size_type __n, const value_type& __x);
-  template <__enable_if_t<__is_allocator<_Alloc>::value, int> = 0>
+  template <__enable_if_t<__is_allocator_v<_Alloc>, int> = 0>
   _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI
   list(size_type __n, const value_type& __x, const allocator_type& __a)
       : __base(__a) {
@@ -786,57 +774,71 @@ public:
 
   _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void assign(size_type __n, const value_type& __x);
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI allocator_type get_allocator() const _NOEXCEPT;
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI allocator_type get_allocator() const _NOEXCEPT;
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return this->__size_; }
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT {
+    return this->__size_;
+  }
   [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT {
     return __base::empty();
   }
-  _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT {
     return std::min<size_type>(this->__node_alloc_max_size(), numeric_limits<difference_type >::max());
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI iterator begin() _NOEXCEPT { return __base::begin(); }
-  _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_iterator begin() const _NOEXCEPT { return __base::begin(); }
-  _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI iterator end() _NOEXCEPT { return __base::end(); }
-  _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_iterator end() const _NOEXCEPT { return __base::end(); }
-  _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_iterator cbegin() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI iterator begin() _NOEXCEPT {
     return __base::begin();
   }
-  _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_iterator cend() const _NOEXCEPT { return __base::end(); }
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_iterator begin() const _NOEXCEPT {
+    return __base::begin();
+  }
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI iterator end() _NOEXCEPT {
+    return __base::end();
+  }
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_iterator end() const _NOEXCEPT {
+    return __base::end();
+  }
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_iterator cbegin() const _NOEXCEPT {
+    return __base::begin();
+  }
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_iterator cend() const _NOEXCEPT {
+    return __base::end();
+  }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI reverse_iterator rbegin() _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI reverse_iterator rbegin() _NOEXCEPT {
     return reverse_iterator(end());
   }
-  _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rbegin() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_reverse_iterator
+  rbegin() const _NOEXCEPT {
     return const_reverse_iterator(end());
   }
-  _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI reverse_iterator rend() _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI reverse_iterator rend() _NOEXCEPT {
     return reverse_iterator(begin());
   }
-  _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rend() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rend() const _NOEXCEPT {
     return const_reverse_iterator(begin());
   }
-  _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crbegin() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_reverse_iterator
+  crbegin() const _NOEXCEPT {
     return const_reverse_iterator(end());
   }
-  _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crend() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crend() const _NOEXCEPT {
     return const_reverse_iterator(begin());
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI reference front() {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI reference front() {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(!empty(), "list::front called on empty list");
     return __base::__end_.__next_->__as_node()->__get_value();
   }
-  _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_reference front() const {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_reference front() const {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(!empty(), "list::front called on empty list");
     return __base::__end_.__next_->__as_node()->__get_value();
   }
-  _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI reference back() {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI reference back() {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(!empty(), "list::back called on empty list");
     return __base::__end_.__prev_->__as_node()->__get_value();
   }
-  _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_reference back() const {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_reference back() const {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(!empty(), "list::back called on empty list");
     return __base::__end_.__prev_->__as_node()->__get_value();
   }
@@ -1000,22 +1002,22 @@ private:
 
 #  if _LIBCPP_STD_VER >= 17
 template <class _InputIterator,
-          class _Alloc = allocator<__iter_value_type<_InputIterator>>,
+          class _Alloc = allocator<__iterator_value_type<_InputIterator>>,
           class        = enable_if_t<__has_input_iterator_category<_InputIterator>::value>,
-          class        = enable_if_t<__is_allocator<_Alloc>::value> >
-list(_InputIterator, _InputIterator) -> list<__iter_value_type<_InputIterator>, _Alloc>;
+          class        = enable_if_t<__is_allocator_v<_Alloc>>>
+list(_InputIterator, _InputIterator) -> list<__iterator_value_type<_InputIterator>, _Alloc>;
 
 template <class _InputIterator,
           class _Alloc,
           class = enable_if_t<__has_input_iterator_category<_InputIterator>::value>,
-          class = enable_if_t<__is_allocator<_Alloc>::value> >
-list(_InputIterator, _InputIterator, _Alloc) -> list<__iter_value_type<_InputIterator>, _Alloc>;
+          class = enable_if_t<__is_allocator_v<_Alloc>>>
+list(_InputIterator, _InputIterator, _Alloc) -> list<__iterator_value_type<_InputIterator>, _Alloc>;
 #  endif
 
 #  if _LIBCPP_STD_VER >= 23
 template <ranges::input_range _Range,
           class _Alloc = allocator<ranges::range_value_t<_Range>>,
-          class        = enable_if_t<__is_allocator<_Alloc>::value> >
+          class        = enable_if_t<__is_allocator_v<_Alloc>>>
 list(from_range_t, _Range&&, _Alloc = _Alloc()) -> list<ranges::range_value_t<_Range>, _Alloc>;
 #  endif
 
@@ -1233,14 +1235,7 @@ list<_Tp, _Alloc>::insert(const_iterator __p, size_type __n, const value_type& _
     ++__ds;
     __r          = iterator(__node->__as_link());
     iterator __e = __r;
-#  if _LIBCPP_HAS_EXCEPTIONS
-    try {
-#  endif // _LIBCPP_HAS_EXCEPTIONS
-      for (--__n; __n != 0; --__n, (void)++__e, ++__ds) {
-        __e.__ptr_->__next_ = this->__create_node(/* prev = */ __e.__ptr_, /* next = */ nullptr, __x)->__as_link();
-      }
-#  if _LIBCPP_HAS_EXCEPTIONS
-    } catch (...) {
+    auto __guard = std::__make_exception_guard([&] {
       while (true) {
         __base_pointer __prev    = __e.__ptr_->__prev_;
         __node_pointer __current = __e.__ptr_->__as_node();
@@ -1249,9 +1244,11 @@ list<_Tp, _Alloc>::insert(const_iterator __p, size_type __n, const value_type& _
           break;
         __e = iterator(__prev);
       }
-      throw;
+    });
+    for (--__n; __n != 0; --__n, (void)++__e, ++__ds) {
+      __e.__ptr_->__next_ = this->__create_node(/* prev = */ __e.__ptr_, /* next = */ nullptr, __x)->__as_link();
     }
-#  endif // _LIBCPP_HAS_EXCEPTIONS
+    __guard.__complete();
     __link_nodes(__p.__ptr_, __r.__ptr_, __e.__ptr_);
     this->__size_ += __ds;
   }
@@ -1276,14 +1273,7 @@ list<_Tp, _Alloc>::__insert_with_sentinel(const_iterator __p, _Iterator __f, _Se
     ++__ds;
     __r          = iterator(__node->__as_link());
     iterator __e = __r;
-#  if _LIBCPP_HAS_EXCEPTIONS
-    try {
-#  endif // _LIBCPP_HAS_EXCEPTIONS
-      for (++__f; __f != __l; ++__f, (void)++__e, ++__ds) {
-        __e.__ptr_->__next_ = this->__create_node(/* prev = */ __e.__ptr_, /* next = */ nullptr, *__f)->__as_link();
-      }
-#  if _LIBCPP_HAS_EXCEPTIONS
-    } catch (...) {
+    auto __guard = std::__make_exception_guard([&] {
       while (true) {
         __base_pointer __prev    = __e.__ptr_->__prev_;
         __node_pointer __current = __e.__ptr_->__as_node();
@@ -1292,9 +1282,11 @@ list<_Tp, _Alloc>::__insert_with_sentinel(const_iterator __p, _Iterator __f, _Se
           break;
         __e = iterator(__prev);
       }
-      throw;
+    });
+    for (++__f; __f != __l; ++__f, (void)++__e, ++__ds) {
+      __e.__ptr_->__next_ = this->__create_node(/* prev = */ __e.__ptr_, /* next = */ nullptr, *__f)->__as_link();
     }
-#  endif // _LIBCPP_HAS_EXCEPTIONS
+    __guard.__complete();
     __link_nodes(__p.__ptr_, __r.__ptr_, __e.__ptr_);
     this->__size_ += __ds;
   }
@@ -1452,14 +1444,7 @@ _LIBCPP_CONSTEXPR_SINCE_CXX26 void list<_Tp, _Alloc>::resize(size_type __n) {
     ++__ds;
     iterator __r = iterator(__node->__as_link());
     iterator __e = __r;
-#  if _LIBCPP_HAS_EXCEPTIONS
-    try {
-#  endif // _LIBCPP_HAS_EXCEPTIONS
-      for (--__n; __n != 0; --__n, (void)++__e, ++__ds) {
-        __e.__ptr_->__next_ = this->__create_node(/* prev = */ __e.__ptr_, /* next = */ nullptr)->__as_link();
-      }
-#  if _LIBCPP_HAS_EXCEPTIONS
-    } catch (...) {
+    auto __guard = std::__make_exception_guard([&] {
       while (true) {
         __base_pointer __prev    = __e.__ptr_->__prev_;
         __node_pointer __current = __e.__ptr_->__as_node();
@@ -1468,9 +1453,11 @@ _LIBCPP_CONSTEXPR_SINCE_CXX26 void list<_Tp, _Alloc>::resize(size_type __n) {
           break;
         __e = iterator(__prev);
       }
-      throw;
+    });
+    for (--__n; __n != 0; --__n, (void)++__e, ++__ds) {
+      __e.__ptr_->__next_ = this->__create_node(/* prev = */ __e.__ptr_, /* next = */ nullptr)->__as_link();
     }
-#  endif // _LIBCPP_HAS_EXCEPTIONS
+    __guard.__complete();
     __link_nodes_at_back(__r.__ptr_, __e.__ptr_);
     this->__size_ += __ds;
   }
@@ -1488,14 +1475,7 @@ _LIBCPP_CONSTEXPR_SINCE_CXX26 void list<_Tp, _Alloc>::resize(size_type __n, cons
     __base_pointer __nl = __node->__as_link();
     iterator __r        = iterator(__nl);
     iterator __e        = __r;
-#  if _LIBCPP_HAS_EXCEPTIONS
-    try {
-#  endif // _LIBCPP_HAS_EXCEPTIONS
-      for (--__n; __n != 0; --__n, (void)++__e, ++__ds) {
-        __e.__ptr_->__next_ = this->__create_node(/* prev = */ __e.__ptr_, /* next = */ nullptr, __x)->__as_link();
-      }
-#  if _LIBCPP_HAS_EXCEPTIONS
-    } catch (...) {
+    auto __guard        = std::__make_exception_guard([&] {
       while (true) {
         __base_pointer __prev    = __e.__ptr_->__prev_;
         __node_pointer __current = __e.__ptr_->__as_node();
@@ -1504,9 +1484,11 @@ _LIBCPP_CONSTEXPR_SINCE_CXX26 void list<_Tp, _Alloc>::resize(size_type __n, cons
           break;
         __e = iterator(__prev);
       }
-      throw;
+    });
+    for (--__n; __n != 0; --__n, (void)++__e, ++__ds) {
+      __e.__ptr_->__next_ = this->__create_node(/* prev = */ __e.__ptr_, /* next = */ nullptr, __x)->__as_link();
     }
-#  endif // _LIBCPP_HAS_EXCEPTIONS
+    __guard.__complete();
     __link_nodes(__base::__end_as_link(), __r.__ptr_, __e.__ptr_);
     this->__size_ += __ds;
   }
diff --git a/lib/libcxx/include/map b/lib/libcxx/include/map
index 3d88b32dd4..27678b710f 100644
--- a/lib/libcxx/include/map
+++ b/lib/libcxx/include/map
@@ -577,12 +577,12 @@ erase_if(multimap<Key, T, Compare, Allocator>& c, Predicate pred);  // C++20
 #  include <__algorithm/equal.h>
 #  include <__algorithm/lexicographical_compare.h>
 #  include <__algorithm/lexicographical_compare_three_way.h>
+#  include <__algorithm/specialized_algorithms.h>
 #  include <__assert>
 #  include <__config>
 #  include <__functional/binary_function.h>
 #  include <__functional/is_transparent.h>
 #  include <__functional/operations.h>
-#  include <__fwd/map.h>
 #  include <__iterator/erase_if_container.h>
 #  include <__iterator/iterator_traits.h>
 #  include <__iterator/ranges_iterator_traits.h>
@@ -590,19 +590,23 @@ erase_if(multimap<Key, T, Compare, Allocator>& c, Predicate pred);  // C++20
 #  include <__memory/addressof.h>
 #  include <__memory/allocator.h>
 #  include <__memory/allocator_traits.h>
+#  include <__memory/compressed_pair.h>
 #  include <__memory/pointer_traits.h>
 #  include <__memory/unique_ptr.h>
 #  include <__memory_resource/polymorphic_allocator.h>
 #  include <__node_handle>
+#  include <__ranges/access.h>
 #  include <__ranges/concepts.h>
 #  include <__ranges/container_compatible_range.h>
 #  include <__ranges/from_range.h>
 #  include <__tree>
 #  include <__type_traits/container_traits.h>
 #  include <__type_traits/is_allocator.h>
+#  include <__type_traits/make_transparent.h>
 #  include <__type_traits/remove_const.h>
 #  include <__type_traits/type_identity.h>
 #  include <__utility/forward.h>
+#  include <__utility/lazy_synth_three_way_comparator.h>
 #  include <__utility/pair.h>
 #  include <__utility/piecewise_construct.h>
 #  include <__utility/swap.h>
@@ -632,47 +636,9 @@ _LIBCPP_PUSH_MACROS
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-template <class _Key,
-          class _CP,
-          class _Compare,
-          bool = is_empty<_Compare>::value && !__libcpp_is_final<_Compare>::value>
-class __map_value_compare : private _Compare {
-public:
-  _LIBCPP_HIDE_FROM_ABI __map_value_compare() _NOEXCEPT_(is_nothrow_default_constructible<_Compare>::value)
-      : _Compare() {}
-  _LIBCPP_HIDE_FROM_ABI __map_value_compare(_Compare __c) _NOEXCEPT_(is_nothrow_copy_constructible<_Compare>::value)
-      : _Compare(__c) {}
-  _LIBCPP_HIDE_FROM_ABI const _Compare& key_comp() const _NOEXCEPT { return *this; }
-  _LIBCPP_HIDE_FROM_ABI bool operator()(const _CP& __x, const _CP& __y) const {
-    return static_cast<const _Compare&>(*this)(__x.first, __y.first);
-  }
-  _LIBCPP_HIDE_FROM_ABI bool operator()(const _CP& __x, const _Key& __y) const {
-    return static_cast<const _Compare&>(*this)(__x.first, __y);
-  }
-  _LIBCPP_HIDE_FROM_ABI bool operator()(const _Key& __x, const _CP& __y) const {
-    return static_cast<const _Compare&>(*this)(__x, __y.first);
-  }
-  _LIBCPP_HIDE_FROM_ABI void swap(__map_value_compare& __y) _NOEXCEPT_(__is_nothrow_swappable_v<_Compare>) {
-    using std::swap;
-    swap(static_cast<_Compare&>(*this), static_cast<_Compare&>(__y));
-  }
-
-#  if _LIBCPP_STD_VER >= 14
-  template <typename _K2>
-  _LIBCPP_HIDE_FROM_ABI bool operator()(const _K2& __x, const _CP& __y) const {
-    return static_cast<const _Compare&>(*this)(__x, __y.first);
-  }
-
-  template <typename _K2>
-  _LIBCPP_HIDE_FROM_ABI bool operator()(const _CP& __x, const _K2& __y) const {
-    return static_cast<const _Compare&>(*this)(__x.first, __y);
-  }
-#  endif
-};
-
 template <class _Key, class _CP, class _Compare>
-class __map_value_compare<_Key, _CP, _Compare, false> {
-  _Compare __comp_;
+class __map_value_compare {
+  _LIBCPP_COMPRESSED_ELEMENT(_Compare, __comp_);
 
 public:
   _LIBCPP_HIDE_FROM_ABI __map_value_compare() _NOEXCEPT_(is_nothrow_default_constructible<_Compare>::value)
@@ -684,7 +650,7 @@ public:
   _LIBCPP_HIDE_FROM_ABI bool operator()(const _CP& __x, const _CP& __y) const { return __comp_(__x.first, __y.first); }
   _LIBCPP_HIDE_FROM_ABI bool operator()(const _CP& __x, const _Key& __y) const { return __comp_(__x.first, __y); }
   _LIBCPP_HIDE_FROM_ABI bool operator()(const _Key& __x, const _CP& __y) const { return __comp_(__x, __y.first); }
-  void swap(__map_value_compare& __y) _NOEXCEPT_(__is_nothrow_swappable_v<_Compare>) {
+  _LIBCPP_HIDE_FROM_ABI void swap(__map_value_compare& __y) _NOEXCEPT_(__is_nothrow_swappable_v<_Compare>) {
     using std::swap;
     swap(__comp_, __y.__comp_);
   }
@@ -702,9 +668,58 @@ public:
 #  endif
 };
 
-template <class _Key, class _CP, class _Compare, bool __b>
+template <class _Key, class _MapValueT, class _Compare>
+struct __make_transparent<_Key, __map_value_compare<_Key, _MapValueT, _Compare> > {
+  using type _LIBCPP_NODEBUG = __map_value_compare<_Key, _MapValueT, __make_transparent_t<_Key, _Compare> >;
+};
+
+#  if _LIBCPP_STD_VER >= 14
+template <class _MapValueT, class _Key, class _Compare>
+struct __lazy_synth_three_way_comparator<__map_value_compare<_Key, _MapValueT, _Compare>, _MapValueT, _MapValueT> {
+  __lazy_synth_three_way_comparator<_Compare, _Key, _Key> __comp_;
+
+  __lazy_synth_three_way_comparator(
+      _LIBCPP_CTOR_LIFETIMEBOUND const __map_value_compare<_Key, _MapValueT, _Compare>& __comp)
+      : __comp_(__comp.key_comp()) {}
+
+  _LIBCPP_HIDE_FROM_ABI auto
+  operator()(_LIBCPP_LIFETIMEBOUND const _MapValueT& __lhs, _LIBCPP_LIFETIMEBOUND const _MapValueT& __rhs) const {
+    return __comp_(__lhs.first, __rhs.first);
+  }
+};
+
+template <class _MapValueT, class _Key, class _TransparentKey, class _Compare>
+struct __lazy_synth_three_way_comparator<__map_value_compare<_Key, _MapValueT, _Compare>, _TransparentKey, _MapValueT> {
+  __lazy_synth_three_way_comparator<_Compare, _TransparentKey, _Key> __comp_;
+
+  __lazy_synth_three_way_comparator(
+      _LIBCPP_CTOR_LIFETIMEBOUND const __map_value_compare<_Key, _MapValueT, _Compare>& __comp)
+      : __comp_(__comp.key_comp()) {}
+
+  _LIBCPP_HIDE_FROM_ABI auto
+  operator()(_LIBCPP_LIFETIMEBOUND const _TransparentKey& __lhs, _LIBCPP_LIFETIMEBOUND const _MapValueT& __rhs) const {
+    return __comp_(__lhs, __rhs.first);
+  }
+};
+
+template <class _MapValueT, class _Key, class _TransparentKey, class _Compare>
+struct __lazy_synth_three_way_comparator<__map_value_compare<_Key, _MapValueT, _Compare>, _MapValueT, _TransparentKey> {
+  __lazy_synth_three_way_comparator<_Compare, _Key, _TransparentKey> __comp_;
+
+  __lazy_synth_three_way_comparator(
+      _LIBCPP_CTOR_LIFETIMEBOUND const __map_value_compare<_Key, _MapValueT, _Compare>& __comp)
+      : __comp_(__comp.key_comp()) {}
+
+  _LIBCPP_HIDE_FROM_ABI auto
+  operator()(_LIBCPP_LIFETIMEBOUND const _MapValueT& __lhs, _LIBCPP_LIFETIMEBOUND const _TransparentKey& __rhs) const {
+    return __comp_(__lhs.first, __rhs);
+  }
+};
+#  endif // _LIBCPP_STD_VER >= 14
+
+template <class _Key, class _CP, class _Compare>
 inline _LIBCPP_HIDE_FROM_ABI void
-swap(__map_value_compare<_Key, _CP, _Compare, __b>& __x, __map_value_compare<_Key, _CP, _Compare, __b>& __y)
+swap(__map_value_compare<_Key, _CP, _Compare>& __x, __map_value_compare<_Key, _CP, _Compare>& __y)
     _NOEXCEPT_(_NOEXCEPT_(__x.swap(__y))) {
   __x.swap(__y);
 }
@@ -742,9 +757,9 @@ public:
 
   _LIBCPP_HIDE_FROM_ABI void operator()(pointer __p) _NOEXCEPT {
     if (__second_constructed)
-      __alloc_traits::destroy(__na_, std::addressof(__p->__value_.second));
+      __alloc_traits::destroy(__na_, std::addressof(__p->__get_value().second));
     if (__first_constructed)
-      __alloc_traits::destroy(__na_, std::addressof(__p->__value_.first));
+      __alloc_traits::destroy(__na_, std::addressof(__p->__get_value().first));
     if (__p)
       __alloc_traits::deallocate(__na_, __p, 1);
   }
@@ -804,8 +819,27 @@ public:
   friend class multimap;
   template <class>
   friend class __map_const_iterator;
+
+  template <class, class...>
+  friend struct __specialized_algorithm;
 };
 
+#  ifndef _LIBCPP_CXX03_LANG
+template <class _Alg, class _TreeIterator>
+struct __specialized_algorithm<_Alg, __iterator_pair<__map_iterator<_TreeIterator>, __map_iterator<_TreeIterator>>> {
+  using __base _LIBCPP_NODEBUG = __specialized_algorithm<_Alg, __iterator_pair<_TreeIterator, _TreeIterator>>;
+
+  static const bool __has_algorithm = __base::__has_algorithm;
+
+  using __iterator _LIBCPP_NODEBUG = __map_iterator<_TreeIterator>;
+
+  template <class... _Args>
+  _LIBCPP_HIDE_FROM_ABI static void operator()(__iterator __first, __iterator __last, _Args&&... __args) {
+    __base()(__first.__i_, __last.__i_, std::forward<_Args>(__args)...);
+  }
+};
+#  endif
+
 template <class _TreeIterator>
 class __map_const_iterator {
   _TreeIterator __i_;
@@ -859,9 +893,33 @@ public:
   friend class multimap;
   template <class, class, class>
   friend class __tree_const_iterator;
+
+  template <class, class...>
+  friend struct __specialized_algorithm;
 };
 
-template <class _Key, class _Tp, class _Compare, class _Allocator>
+#  ifndef _LIBCPP_CXX03_LANG
+template <class _Alg, class _TreeIterator>
+struct __specialized_algorithm<
+    _Alg,
+    __iterator_pair<__map_const_iterator<_TreeIterator>, __map_const_iterator<_TreeIterator>>> {
+  using __base _LIBCPP_NODEBUG = __specialized_algorithm<_Alg, __iterator_pair<_TreeIterator, _TreeIterator>>;
+
+  static const bool __has_algorithm = __base::__has_algorithm;
+
+  using __iterator _LIBCPP_NODEBUG = __map_const_iterator<_TreeIterator>;
+
+  template <class... _Args>
+  _LIBCPP_HIDE_FROM_ABI static void operator()(__iterator __first, __iterator __last, _Args&&... __args) {
+    __base()(__first.__i_, __last.__i_, std::forward<_Args>(__args)...);
+  }
+};
+#  endif
+
+template <class _Key, class _Tp, class _Compare = less<_Key>, class _Allocator = allocator<pair<const _Key, _Tp> > >
+class multimap;
+
+template <class _Key, class _Tp, class _Compare = less<_Key>, class _Allocator = allocator<pair<const _Key, _Tp> > >
 class map {
 public:
   // types:
@@ -970,17 +1028,17 @@ public:
       : map(from_range, std::forward<_Range>(__range), key_compare(), __a) {}
 #  endif
 
-  _LIBCPP_HIDE_FROM_ABI map(const map& __m) : __tree_(__m.__tree_) { insert(__m.begin(), __m.end()); }
+  _LIBCPP_HIDE_FROM_ABI map(const map& __m) = default;
 
   _LIBCPP_HIDE_FROM_ABI map& operator=(const map& __m) = default;
 
 #  ifndef _LIBCPP_CXX03_LANG
 
-  _LIBCPP_HIDE_FROM_ABI map(map&& __m) noexcept(is_nothrow_move_constructible<__base>::value) = default;
+  _LIBCPP_HIDE_FROM_ABI map(map&& __m) = default;
 
-  _LIBCPP_HIDE_FROM_ABI map(map&& __m, const allocator_type& __a);
+  _LIBCPP_HIDE_FROM_ABI map(map&& __m, const allocator_type& __a) : __tree_(std::move(__m.__tree_), __a) {}
 
-  _LIBCPP_HIDE_FROM_ABI map& operator=(map&& __m) noexcept(is_nothrow_move_assignable<__base>::value) = default;
+  _LIBCPP_HIDE_FROM_ABI map& operator=(map&& __m) = default;
 
   _LIBCPP_HIDE_FROM_ABI map(initializer_list<value_type> __il, const key_compare& __comp = key_compare())
       : __tree_(__vc(__comp)) {
@@ -998,7 +1056,8 @@ public:
 #    endif
 
   _LIBCPP_HIDE_FROM_ABI map& operator=(initializer_list<value_type> __il) {
-    __tree_.__assign_unique(__il.begin(), __il.end());
+    clear();
+    insert(__il.begin(), __il.end());
     return *this;
   }
 
@@ -1006,43 +1065,66 @@ public:
 
   _LIBCPP_HIDE_FROM_ABI explicit map(const allocator_type& __a) : __tree_(typename __base::allocator_type(__a)) {}
 
-  _LIBCPP_HIDE_FROM_ABI map(const map& __m, const allocator_type& __a)
-      : __tree_(__m.__tree_.value_comp(), typename __base::allocator_type(__a)) {
-    insert(__m.begin(), __m.end());
-  }
+  _LIBCPP_HIDE_FROM_ABI map(const map& __m, const allocator_type& __alloc) : __tree_(__m.__tree_, __alloc) {}
 
   _LIBCPP_HIDE_FROM_ABI ~map() { static_assert(sizeof(std::__diagnose_non_const_comparator<_Key, _Compare>()), ""); }
 
-  _LIBCPP_HIDE_FROM_ABI iterator begin() _NOEXCEPT { return __tree_.begin(); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator begin() const _NOEXCEPT { return __tree_.begin(); }
-  _LIBCPP_HIDE_FROM_ABI iterator end() _NOEXCEPT { return __tree_.end(); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator end() const _NOEXCEPT { return __tree_.end(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator begin() _NOEXCEPT { return __tree_.begin(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator begin() const _NOEXCEPT { return __tree_.begin(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator end() _NOEXCEPT { return __tree_.end(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator end() const _NOEXCEPT { return __tree_.end(); }
 
-  _LIBCPP_HIDE_FROM_ABI reverse_iterator rbegin() _NOEXCEPT { return reverse_iterator(end()); }
-  _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rbegin() const _NOEXCEPT { return const_reverse_iterator(end()); }
-  _LIBCPP_HIDE_FROM_ABI reverse_iterator rend() _NOEXCEPT { return reverse_iterator(begin()); }
-  _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rend() const _NOEXCEPT { return const_reverse_iterator(begin()); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI reverse_iterator rbegin() _NOEXCEPT { return reverse_iterator(end()); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rbegin() const _NOEXCEPT {
+    return const_reverse_iterator(end());
+  }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI reverse_iterator rend() _NOEXCEPT { return reverse_iterator(begin()); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rend() const _NOEXCEPT {
+    return const_reverse_iterator(begin());
+  }
 
-  _LIBCPP_HIDE_FROM_ABI const_iterator cbegin() const _NOEXCEPT { return begin(); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator cend() const _NOEXCEPT { return end(); }
-  _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crbegin() const _NOEXCEPT { return rbegin(); }
-  _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crend() const _NOEXCEPT { return rend(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator cbegin() const _NOEXCEPT { return begin(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator cend() const _NOEXCEPT { return end(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crbegin() const _NOEXCEPT { return rbegin(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crend() const _NOEXCEPT { return rend(); }
 
   [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __tree_.size() == 0; }
-  _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __tree_.size(); }
-  _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { return __tree_.max_size(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __tree_.size(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { return __tree_.max_size(); }
 
   _LIBCPP_HIDE_FROM_ABI mapped_type& operator[](const key_type& __k);
 #  ifndef _LIBCPP_CXX03_LANG
   _LIBCPP_HIDE_FROM_ABI mapped_type& operator[](key_type&& __k);
 #  endif
 
-  _LIBCPP_HIDE_FROM_ABI mapped_type& at(const key_type& __k);
-  _LIBCPP_HIDE_FROM_ABI const mapped_type& at(const key_type& __k) const;
+  template <class _Arg,
+            __enable_if_t<__is_transparently_comparable_v<_Compare, key_type, __remove_cvref_t<_Arg> >, int> = 0>
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI mapped_type& at(_Arg&& __arg) {
+    auto [_, __child] = __tree_.__find_equal(__arg);
+    if (__child == nullptr)
+      std::__throw_out_of_range("map::at:  key not found");
+    return static_cast<__node_pointer>(__child)->__get_value().second;
+  }
 
-  _LIBCPP_HIDE_FROM_ABI allocator_type get_allocator() const _NOEXCEPT { return allocator_type(__tree_.__alloc()); }
-  _LIBCPP_HIDE_FROM_ABI key_compare key_comp() const { return __tree_.value_comp().key_comp(); }
-  _LIBCPP_HIDE_FROM_ABI value_compare value_comp() const { return value_compare(__tree_.value_comp().key_comp()); }
+  template <class _Arg,
+            __enable_if_t<__is_transparently_comparable_v<_Compare, key_type, __remove_cvref_t<_Arg> >, int> = 0>
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const mapped_type& at(_Arg&& __arg) const {
+    auto [_, __child] = __tree_.__find_equal(__arg);
+    if (__child == nullptr)
+      std::__throw_out_of_range("map::at:  key not found");
+    return static_cast<__node_pointer>(__child)->__get_value().second;
+  }
+
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI mapped_type& at(const key_type& __k);
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const mapped_type& at(const key_type& __k) const;
+
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI allocator_type get_allocator() const _NOEXCEPT {
+    return allocator_type(__tree_.__alloc());
+  }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI key_compare key_comp() const { return __tree_.value_comp().key_comp(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI value_compare value_comp() const {
+    return value_compare(__tree_.value_comp().key_comp());
+  }
 
 #  ifndef _LIBCPP_CXX03_LANG
   template <class... _Args>
@@ -1052,7 +1134,7 @@ public:
 
   template <class... _Args>
   _LIBCPP_HIDE_FROM_ABI iterator emplace_hint(const_iterator __p, _Args&&... __args) {
-    return __tree_.__emplace_hint_unique(__p.__i_, std::forward<_Args>(__args)...);
+    return __tree_.__emplace_hint_unique(__p.__i_, std::forward<_Args>(__args)...).first;
   }
 
   template <class _Pp, __enable_if_t<is_constructible<value_type, _Pp>::value, int> = 0>
@@ -1062,7 +1144,7 @@ public:
 
   template <class _Pp, __enable_if_t<is_constructible<value_type, _Pp>::value, int> = 0>
   _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __pos, _Pp&& __p) {
-    return __tree_.__emplace_hint_unique(__pos.__i_, std::forward<_Pp>(__p));
+    return __tree_.__emplace_hint_unique(__pos.__i_, std::forward<_Pp>(__p)).first;
   }
 
 #  endif // _LIBCPP_CXX03_LANG
@@ -1070,7 +1152,7 @@ public:
   _LIBCPP_HIDE_FROM_ABI pair<iterator, bool> insert(const value_type& __v) { return __tree_.__emplace_unique(__v); }
 
   _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __p, const value_type& __v) {
-    return __tree_.__emplace_hint_unique(__p.__i_, __v);
+    return __tree_.__emplace_hint_unique(__p.__i_, __v).first;
   }
 
 #  ifndef _LIBCPP_CXX03_LANG
@@ -1079,25 +1161,21 @@ public:
   }
 
   _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __p, value_type&& __v) {
-    return __tree_.__emplace_hint_unique(__p.__i_, std::move(__v));
+    return __tree_.__emplace_hint_unique(__p.__i_, std::move(__v)).first;
   }
 
   _LIBCPP_HIDE_FROM_ABI void insert(initializer_list<value_type> __il) { insert(__il.begin(), __il.end()); }
 #  endif
 
   template <class _InputIterator>
-  _LIBCPP_HIDE_FROM_ABI void insert(_InputIterator __f, _InputIterator __l) {
-    for (const_iterator __e = cend(); __f != __l; ++__f)
-      insert(__e.__i_, *__f);
+  _LIBCPP_HIDE_FROM_ABI void insert(_InputIterator __first, _InputIterator __last) {
+    __tree_.__insert_range_unique(__first, __last);
   }
 
 #  if _LIBCPP_STD_VER >= 23
   template <_ContainerCompatibleRange<value_type> _Range>
   _LIBCPP_HIDE_FROM_ABI void insert_range(_Range&& __range) {
-    const_iterator __end = cend();
-    for (auto&& __element : __range) {
-      insert(__end.__i_, std::forward<decltype(__element)>(__element));
-    }
+    __tree_.__insert_range_unique(ranges::begin(__range), ranges::end(__range));
   }
 #  endif
 
@@ -1105,17 +1183,13 @@ public:
 
   template <class... _Args>
   _LIBCPP_HIDE_FROM_ABI pair<iterator, bool> try_emplace(const key_type& __k, _Args&&... __args) {
-    return __tree_.__emplace_unique_key_args(
-        __k,
-        std::piecewise_construct,
-        std::forward_as_tuple(__k),
-        std::forward_as_tuple(std::forward<_Args>(__args)...));
+    return __tree_.__emplace_unique(
+        std::piecewise_construct, std::forward_as_tuple(__k), std::forward_as_tuple(std::forward<_Args>(__args)...));
   }
 
   template <class... _Args>
   _LIBCPP_HIDE_FROM_ABI pair<iterator, bool> try_emplace(key_type&& __k, _Args&&... __args) {
-    return __tree_.__emplace_unique_key_args(
-        __k,
+    return __tree_.__emplace_unique(
         std::piecewise_construct,
         std::forward_as_tuple(std::move(__k)),
         std::forward_as_tuple(std::forward<_Args>(__args)...));
@@ -1124,9 +1198,8 @@ public:
   template <class... _Args>
   _LIBCPP_HIDE_FROM_ABI iterator try_emplace(const_iterator __h, const key_type& __k, _Args&&... __args) {
     return __tree_
-        .__emplace_hint_unique_key_args(
+        .__emplace_hint_unique(
             __h.__i_,
-            __k,
             std::piecewise_construct,
             std::forward_as_tuple(__k),
             std::forward_as_tuple(std::forward<_Args>(__args)...))
@@ -1136,9 +1209,8 @@ public:
   template <class... _Args>
   _LIBCPP_HIDE_FROM_ABI iterator try_emplace(const_iterator __h, key_type&& __k, _Args&&... __args) {
     return __tree_
-        .__emplace_hint_unique_key_args(
+        .__emplace_hint_unique(
             __h.__i_,
-            __k,
             std::piecewise_construct,
             std::forward_as_tuple(std::move(__k)),
             std::forward_as_tuple(std::forward<_Args>(__args)...))
@@ -1147,27 +1219,25 @@ public:
 
   template <class _Vp>
   _LIBCPP_HIDE_FROM_ABI pair<iterator, bool> insert_or_assign(const key_type& __k, _Vp&& __v) {
-    iterator __p = lower_bound(__k);
-    if (__p != end() && !key_comp()(__k, __p->first)) {
-      __p->second = std::forward<_Vp>(__v);
-      return std::make_pair(__p, false);
-    }
-    return std::make_pair(emplace_hint(__p, __k, std::forward<_Vp>(__v)), true);
+    auto __result              = __tree_.__emplace_unique(__k, std::forward<_Vp>(__v));
+    auto& [__iter, __inserted] = __result;
+    if (!__inserted)
+      __iter->second = std::forward<_Vp>(__v);
+    return __result;
   }
 
   template <class _Vp>
   _LIBCPP_HIDE_FROM_ABI pair<iterator, bool> insert_or_assign(key_type&& __k, _Vp&& __v) {
-    iterator __p = lower_bound(__k);
-    if (__p != end() && !key_comp()(__k, __p->first)) {
-      __p->second = std::forward<_Vp>(__v);
-      return std::make_pair(__p, false);
-    }
-    return std::make_pair(emplace_hint(__p, std::move(__k), std::forward<_Vp>(__v)), true);
+    auto __result              = __tree_.__emplace_unique(std::move(__k), std::forward<_Vp>(__v));
+    auto& [__iter, __inserted] = __result;
+    if (!__inserted)
+      __iter->second = std::forward<_Vp>(__v);
+    return __result;
   }
 
   template <class _Vp>
   _LIBCPP_HIDE_FROM_ABI iterator insert_or_assign(const_iterator __h, const key_type& __k, _Vp&& __v) {
-    auto [__r, __inserted] = __tree_.__emplace_hint_unique_key_args(__h.__i_, __k, __k, std::forward<_Vp>(__v));
+    auto [__r, __inserted] = __tree_.__emplace_hint_unique(__h.__i_, __k, std::forward<_Vp>(__v));
 
     if (!__inserted)
       __r->second = std::forward<_Vp>(__v);
@@ -1177,8 +1247,7 @@ public:
 
   template <class _Vp>
   _LIBCPP_HIDE_FROM_ABI iterator insert_or_assign(const_iterator __h, key_type&& __k, _Vp&& __v) {
-    auto [__r, __inserted] =
-        __tree_.__emplace_hint_unique_key_args(__h.__i_, __k, std::move(__k), std::forward<_Vp>(__v));
+    auto [__r, __inserted] = __tree_.__emplace_hint_unique(__h.__i_, std::move(__k), std::forward<_Vp>(__v));
 
     if (!__inserted)
       __r->second = std::forward<_Vp>(__v);
@@ -1207,10 +1276,10 @@ public:
                                         "node_type with incompatible allocator passed to map::insert()");
     return __tree_.template __node_handle_insert_unique<node_type>(__hint.__i_, std::move(__nh));
   }
-  _LIBCPP_HIDE_FROM_ABI node_type extract(key_type const& __key) {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI node_type extract(key_type const& __key) {
     return __tree_.template __node_handle_extract<node_type>(__key);
   }
-  _LIBCPP_HIDE_FROM_ABI node_type extract(const_iterator __it) {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI node_type extract(const_iterator __it) {
     return __tree_.template __node_handle_extract<node_type>(__it.__i_);
   }
   template <class _Compare2>
@@ -1241,75 +1310,105 @@ public:
 
   _LIBCPP_HIDE_FROM_ABI void swap(map& __m) _NOEXCEPT_(__is_nothrow_swappable_v<__base>) { __tree_.swap(__m.__tree_); }
 
-  _LIBCPP_HIDE_FROM_ABI iterator find(const key_type& __k) { return __tree_.find(__k); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator find(const key_type& __k) const { return __tree_.find(__k); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator find(const key_type& __k) { return __tree_.find(__k); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator find(const key_type& __k) const { return __tree_.find(__k); }
 #  if _LIBCPP_STD_VER >= 14
-  template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI iterator find(const _K2& __k) {
+  template <typename _K2,
+            enable_if_t<__is_transparent_v<_Compare, _K2> || __is_transparently_comparable_v<_Compare, key_type, _K2>,
+                        int> = 0>
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator find(const _K2& __k) {
     return __tree_.find(__k);
   }
-  template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI const_iterator find(const _K2& __k) const {
+  template <typename _K2,
+            enable_if_t<__is_transparent_v<_Compare, _K2> || __is_transparently_comparable_v<_Compare, key_type, _K2>,
+                        int> = 0>
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator find(const _K2& __k) const {
     return __tree_.find(__k);
   }
 #  endif
 
-  _LIBCPP_HIDE_FROM_ABI size_type count(const key_type& __k) const { return __tree_.__count_unique(__k); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type count(const key_type& __k) const {
+    return __tree_.__count_unique(__k);
+  }
 #  if _LIBCPP_STD_VER >= 14
   template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI size_type count(const _K2& __k) const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type count(const _K2& __k) const {
     return __tree_.__count_multi(__k);
   }
 #  endif
 
 #  if _LIBCPP_STD_VER >= 20
-  _LIBCPP_HIDE_FROM_ABI bool contains(const key_type& __k) const { return find(__k) != end(); }
-  template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI bool contains(const _K2& __k) const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool contains(const key_type& __k) const { return find(__k) != end(); }
+  template <typename _K2,
+            enable_if_t<__is_transparent_v<_Compare, _K2> || __is_transparently_comparable_v<_Compare, key_type, _K2>,
+                        int> = 0>
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool contains(const _K2& __k) const {
     return find(__k) != end();
   }
 #  endif // _LIBCPP_STD_VER >= 20
 
-  _LIBCPP_HIDE_FROM_ABI iterator lower_bound(const key_type& __k) { return __tree_.lower_bound(__k); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const key_type& __k) const { return __tree_.lower_bound(__k); }
-#  if _LIBCPP_STD_VER >= 14
-  template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI iterator lower_bound(const _K2& __k) {
-    return __tree_.lower_bound(__k);
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator lower_bound(const key_type& __k) {
+    return __tree_.__lower_bound_unique(__k);
   }
 
-  template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const _K2& __k) const {
-    return __tree_.lower_bound(__k);
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const key_type& __k) const {
+    return __tree_.__lower_bound_unique(__k);
+  }
+
+  // The transparent versions of the lookup functions use the _multi version, since a non-element key is allowed to
+  // match multiple elements.
+#  if _LIBCPP_STD_VER >= 14
+  template <typename _K2,
+            enable_if_t<__is_transparent_v<_Compare, _K2> || __is_transparently_comparable_v<_Compare, key_type, _K2>,
+                        int> = 0>
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator lower_bound(const _K2& __k) {
+    return __tree_.__lower_bound_multi(__k);
+  }
+
+  template <typename _K2,
+            enable_if_t<__is_transparent_v<_Compare, _K2> || __is_transparently_comparable_v<_Compare, key_type, _K2>,
+                        int> = 0>
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const _K2& __k) const {
+    return __tree_.__lower_bound_multi(__k);
   }
 #  endif
 
-  _LIBCPP_HIDE_FROM_ABI iterator upper_bound(const key_type& __k) { return __tree_.upper_bound(__k); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const key_type& __k) const { return __tree_.upper_bound(__k); }
-#  if _LIBCPP_STD_VER >= 14
-  template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI iterator upper_bound(const _K2& __k) {
-    return __tree_.upper_bound(__k);
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator upper_bound(const key_type& __k) {
+    return __tree_.__upper_bound_unique(__k);
   }
-  template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const _K2& __k) const {
-    return __tree_.upper_bound(__k);
+
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const key_type& __k) const {
+    return __tree_.__upper_bound_unique(__k);
+  }
+
+#  if _LIBCPP_STD_VER >= 14
+  template <typename _K2,
+            enable_if_t<__is_transparent_v<_Compare, _K2> || __is_transparently_comparable_v<_Compare, key_type, _K2>,
+                        int> = 0>
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator upper_bound(const _K2& __k) {
+    return __tree_.__upper_bound_multi(__k);
+  }
+  template <typename _K2,
+            enable_if_t<__is_transparent_v<_Compare, _K2> || __is_transparently_comparable_v<_Compare, key_type, _K2>,
+                        int> = 0>
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const _K2& __k) const {
+    return __tree_.__upper_bound_multi(__k);
   }
 #  endif
 
-  _LIBCPP_HIDE_FROM_ABI pair<iterator, iterator> equal_range(const key_type& __k) {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI pair<iterator, iterator> equal_range(const key_type& __k) {
     return __tree_.__equal_range_unique(__k);
   }
-  _LIBCPP_HIDE_FROM_ABI pair<const_iterator, const_iterator> equal_range(const key_type& __k) const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI pair<const_iterator, const_iterator> equal_range(const key_type& __k) const {
     return __tree_.__equal_range_unique(__k);
   }
 #  if _LIBCPP_STD_VER >= 14
   template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI pair<iterator, iterator> equal_range(const _K2& __k) {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI pair<iterator, iterator> equal_range(const _K2& __k) {
     return __tree_.__equal_range_multi(__k);
   }
   template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI pair<const_iterator, const_iterator> equal_range(const _K2& __k) const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI pair<const_iterator, const_iterator> equal_range(const _K2& __k) const {
     return __tree_.__equal_range_multi(__k);
   }
 #  endif
@@ -1319,7 +1418,6 @@ private:
   typedef typename __base::__node_allocator __node_allocator;
   typedef typename __base::__node_pointer __node_pointer;
   typedef typename __base::__node_base_pointer __node_base_pointer;
-  typedef typename __base::__parent_pointer __parent_pointer;
 
   typedef __map_node_destructor<__node_allocator> _Dp;
   typedef unique_ptr<__node, _Dp> __node_holder;
@@ -1327,6 +1425,8 @@ private:
 #  ifdef _LIBCPP_CXX03_LANG
   _LIBCPP_HIDE_FROM_ABI __node_holder __construct_node_with_key(const key_type& __k);
 #  endif
+
+  friend struct __specialized_algorithm<_Algorithm::__for_each, __single_range<map> >;
 };
 
 #  if _LIBCPP_STD_VER >= 17
@@ -1334,8 +1434,8 @@ template <class _InputIterator,
           class _Compare   = less<__iter_key_type<_InputIterator>>,
           class _Allocator = allocator<__iter_to_alloc_type<_InputIterator>>,
           class            = enable_if_t<__has_input_iterator_category<_InputIterator>::value, void>,
-          class            = enable_if_t<!__is_allocator<_Compare>::value, void>,
-          class            = enable_if_t<__is_allocator<_Allocator>::value, void>>
+          class            = enable_if_t<!__is_allocator_v<_Compare>>,
+          class            = enable_if_t<__is_allocator_v<_Allocator>>>
 map(_InputIterator, _InputIterator, _Compare = _Compare(), _Allocator = _Allocator())
     -> map<__iter_key_type<_InputIterator>, __iter_mapped_type<_InputIterator>, _Compare, _Allocator>;
 
@@ -1343,8 +1443,8 @@ map(_InputIterator, _InputIterator, _Compare = _Compare(), _Allocator = _Allocat
 template <ranges::input_range _Range,
           class _Compare   = less<__range_key_type<_Range>>,
           class _Allocator = allocator<__range_to_alloc_type<_Range>>,
-          class            = enable_if_t<!__is_allocator<_Compare>::value, void>,
-          class            = enable_if_t<__is_allocator<_Allocator>::value, void>>
+          class            = enable_if_t<!__is_allocator_v<_Compare>>,
+          class            = enable_if_t<__is_allocator_v<_Allocator>>>
 map(from_range_t, _Range&&, _Compare = _Compare(), _Allocator = _Allocator())
     -> map<__range_key_type<_Range>, __range_mapped_type<_Range>, _Compare, _Allocator>;
 #    endif
@@ -1353,16 +1453,15 @@ template <class _Key,
           class _Tp,
           class _Compare   = less<remove_const_t<_Key>>,
           class _Allocator = allocator<pair<const _Key, _Tp>>,
-          class            = enable_if_t<!__is_allocator<_Compare>::value, void>,
-          class            = enable_if_t<__is_allocator<_Allocator>::value, void>>
-map(initializer_list<pair<_Key, _Tp>>,
-    _Compare   = _Compare(),
-    _Allocator = _Allocator()) -> map<remove_const_t<_Key>, _Tp, _Compare, _Allocator>;
+          class            = enable_if_t<!__is_allocator_v<_Compare>>,
+          class            = enable_if_t<__is_allocator_v<_Allocator>>>
+map(initializer_list<pair<_Key, _Tp>>, _Compare = _Compare(), _Allocator = _Allocator())
+    -> map<remove_const_t<_Key>, _Tp, _Compare, _Allocator>;
 
 template <class _InputIterator,
           class _Allocator,
           class = enable_if_t<__has_input_iterator_category<_InputIterator>::value, void>,
-          class = enable_if_t<__is_allocator<_Allocator>::value, void>>
+          class = enable_if_t<__is_allocator_v<_Allocator>>>
 map(_InputIterator, _InputIterator, _Allocator)
     -> map<__iter_key_type<_InputIterator>,
            __iter_mapped_type<_InputIterator>,
@@ -1370,44 +1469,44 @@ map(_InputIterator, _InputIterator, _Allocator)
            _Allocator>;
 
 #    if _LIBCPP_STD_VER >= 23
-template <ranges::input_range _Range, class _Allocator, class = enable_if_t<__is_allocator<_Allocator>::value, void>>
+template <ranges::input_range _Range, class _Allocator, class = enable_if_t<__is_allocator_v<_Allocator>>>
 map(from_range_t, _Range&&, _Allocator)
     -> map<__range_key_type<_Range>, __range_mapped_type<_Range>, less<__range_key_type<_Range>>, _Allocator>;
 #    endif
 
-template <class _Key, class _Tp, class _Allocator, class = enable_if_t<__is_allocator<_Allocator>::value, void>>
-map(initializer_list<pair<_Key, _Tp>>,
-    _Allocator) -> map<remove_const_t<_Key>, _Tp, less<remove_const_t<_Key>>, _Allocator>;
+template <class _Key, class _Tp, class _Allocator, class = enable_if_t<__is_allocator_v<_Allocator>>>
+map(initializer_list<pair<_Key, _Tp>>, _Allocator)
+    -> map<remove_const_t<_Key>, _Tp, less<remove_const_t<_Key>>, _Allocator>;
+#  endif
+
+#  if _LIBCPP_STD_VER >= 14
+template <class _Key, class _Tp, class _Compare, class _Allocator>
+struct __specialized_algorithm<_Algorithm::__for_each, __single_range<map<_Key, _Tp, _Compare, _Allocator>>> {
+  using __map _LIBCPP_NODEBUG = map<_Key, _Tp, _Compare, _Allocator>;
+
+  static const bool __has_algorithm = true;
+
+  template <class _Map, class _Func, class _Proj>
+  _LIBCPP_HIDE_FROM_ABI static auto operator()(_Map&& __map, _Func __func, _Proj __proj) {
+    auto [_, __func2] = __specialized_algorithm<_Algorithm::__for_each, __single_range<typename __map::__base>>()(
+        __map.__tree_, std::move(__func), std::move(__proj));
+    return std::make_pair(__map.end(), std::move(__func2));
+  }
+};
 #  endif
 
 #  ifndef _LIBCPP_CXX03_LANG
-template <class _Key, class _Tp, class _Compare, class _Allocator>
-map<_Key, _Tp, _Compare, _Allocator>::map(map&& __m, const allocator_type& __a)
-    : __tree_(std::move(__m.__tree_), typename __base::allocator_type(__a)) {
-  if (__a != __m.get_allocator()) {
-    const_iterator __e = cend();
-    while (!__m.empty()) {
-      __tree_.__insert_unique_from_orphaned_node(__e.__i_, std::move(__m.__tree_.remove(__m.begin().__i_)->__value_));
-    }
-  }
-}
-
 template <class _Key, class _Tp, class _Compare, class _Allocator>
 _Tp& map<_Key, _Tp, _Compare, _Allocator>::operator[](const key_type& __k) {
-  return __tree_
-      .__emplace_unique_key_args(__k, std::piecewise_construct, std::forward_as_tuple(__k), std::forward_as_tuple())
+  return __tree_.__emplace_unique(std::piecewise_construct, std::forward_as_tuple(__k), std::forward_as_tuple())
       .first->second;
 }
 
 template <class _Key, class _Tp, class _Compare, class _Allocator>
 _Tp& map<_Key, _Tp, _Compare, _Allocator>::operator[](key_type&& __k) {
-  // TODO investigate this clang-tidy warning.
-  // NOLINTBEGIN(bugprone-use-after-move)
   return __tree_
-      .__emplace_unique_key_args(
-          __k, std::piecewise_construct, std::forward_as_tuple(std::move(__k)), std::forward_as_tuple())
+      .__emplace_unique(std::piecewise_construct, std::forward_as_tuple(std::move(__k)), std::forward_as_tuple())
       .first->second;
-  // NOLINTEND(bugprone-use-after-move)
 }
 
 #  else // _LIBCPP_CXX03_LANG
@@ -1417,44 +1516,41 @@ typename map<_Key, _Tp, _Compare, _Allocator>::__node_holder
 map<_Key, _Tp, _Compare, _Allocator>::__construct_node_with_key(const key_type& __k) {
   __node_allocator& __na = __tree_.__node_alloc();
   __node_holder __h(__node_traits::allocate(__na, 1), _Dp(__na));
-  __node_traits::construct(__na, std::addressof(__h->__value_.first), __k);
+  __node_traits::construct(__na, std::addressof(__h->__get_value().first), __k);
   __h.get_deleter().__first_constructed = true;
-  __node_traits::construct(__na, std::addressof(__h->__value_.second));
+  __node_traits::construct(__na, std::addressof(__h->__get_value().second));
   __h.get_deleter().__second_constructed = true;
   return __h;
 }
 
 template <class _Key, class _Tp, class _Compare, class _Allocator>
 _Tp& map<_Key, _Tp, _Compare, _Allocator>::operator[](const key_type& __k) {
-  __parent_pointer __parent;
-  __node_base_pointer& __child = __tree_.__find_equal(__parent, __k);
-  __node_pointer __r           = static_cast<__node_pointer>(__child);
+  auto [__parent, __child] = __tree_.__find_equal(__k);
+  __node_pointer __r       = static_cast<__node_pointer>(__child);
   if (__child == nullptr) {
     __node_holder __h = __construct_node_with_key(__k);
     __tree_.__insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__h.get()));
     __r = __h.release();
   }
-  return __r->__value_.second;
+  return __r->__get_value().second;
 }
 
 #  endif // _LIBCPP_CXX03_LANG
 
 template <class _Key, class _Tp, class _Compare, class _Allocator>
 _Tp& map<_Key, _Tp, _Compare, _Allocator>::at(const key_type& __k) {
-  __parent_pointer __parent;
-  __node_base_pointer& __child = __tree_.__find_equal(__parent, __k);
+  auto [_, __child] = __tree_.__find_equal(__k);
   if (__child == nullptr)
     std::__throw_out_of_range("map::at:  key not found");
-  return static_cast<__node_pointer>(__child)->__value_.second;
+  return static_cast<__node_pointer>(__child)->__get_value().second;
 }
 
 template <class _Key, class _Tp, class _Compare, class _Allocator>
 const _Tp& map<_Key, _Tp, _Compare, _Allocator>::at(const key_type& __k) const {
-  __parent_pointer __parent;
-  __node_base_pointer __child = __tree_.__find_equal(__parent, __k);
+  auto [_, __child] = __tree_.__find_equal(__k);
   if (__child == nullptr)
     std::__throw_out_of_range("map::at:  key not found");
-  return static_cast<__node_pointer>(__child)->__value_.second;
+  return static_cast<__node_pointer>(__child)->__get_value().second;
 }
 
 template <class _Key, class _Tp, class _Compare, class _Allocator>
@@ -1637,22 +1733,17 @@ public:
       : multimap(from_range, std::forward<_Range>(__range), key_compare(), __a) {}
 #  endif
 
-  _LIBCPP_HIDE_FROM_ABI multimap(const multimap& __m)
-      : __tree_(__m.__tree_.value_comp(),
-                __alloc_traits::select_on_container_copy_construction(__m.__tree_.__alloc())) {
-    insert(__m.begin(), __m.end());
-  }
+  _LIBCPP_HIDE_FROM_ABI multimap(const multimap& __m) = default;
 
   _LIBCPP_HIDE_FROM_ABI multimap& operator=(const multimap& __m) = default;
 
 #  ifndef _LIBCPP_CXX03_LANG
 
-  _LIBCPP_HIDE_FROM_ABI multimap(multimap&& __m) noexcept(is_nothrow_move_constructible<__base>::value) = default;
+  _LIBCPP_HIDE_FROM_ABI multimap(multimap&& __m) = default;
 
-  _LIBCPP_HIDE_FROM_ABI multimap(multimap&& __m, const allocator_type& __a);
+  _LIBCPP_HIDE_FROM_ABI multimap(multimap&& __m, const allocator_type& __a) : __tree_(std::move(__m.__tree_), __a) {}
 
-  _LIBCPP_HIDE_FROM_ABI multimap&
-  operator=(multimap&& __m) noexcept(is_nothrow_move_assignable<__base>::value) = default;
+  _LIBCPP_HIDE_FROM_ABI multimap& operator=(multimap&& __m) = default;
 
   _LIBCPP_HIDE_FROM_ABI multimap(initializer_list<value_type> __il, const key_compare& __comp = key_compare())
       : __tree_(__vc(__comp)) {
@@ -1671,7 +1762,8 @@ public:
 #    endif
 
   _LIBCPP_HIDE_FROM_ABI multimap& operator=(initializer_list<value_type> __il) {
-    __tree_.__assign_multi(__il.begin(), __il.end());
+    clear();
+    insert(__il.begin(), __il.end());
     return *this;
   }
 
@@ -1679,37 +1771,42 @@ public:
 
   _LIBCPP_HIDE_FROM_ABI explicit multimap(const allocator_type& __a) : __tree_(typename __base::allocator_type(__a)) {}
 
-  _LIBCPP_HIDE_FROM_ABI multimap(const multimap& __m, const allocator_type& __a)
-      : __tree_(__m.__tree_.value_comp(), typename __base::allocator_type(__a)) {
-    insert(__m.begin(), __m.end());
-  }
+  _LIBCPP_HIDE_FROM_ABI multimap(const multimap& __m, const allocator_type& __a) : __tree_(__m.__tree_, __a) {}
 
   _LIBCPP_HIDE_FROM_ABI ~multimap() {
     static_assert(sizeof(std::__diagnose_non_const_comparator<_Key, _Compare>()), "");
   }
 
-  _LIBCPP_HIDE_FROM_ABI iterator begin() _NOEXCEPT { return __tree_.begin(); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator begin() const _NOEXCEPT { return __tree_.begin(); }
-  _LIBCPP_HIDE_FROM_ABI iterator end() _NOEXCEPT { return __tree_.end(); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator end() const _NOEXCEPT { return __tree_.end(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator begin() _NOEXCEPT { return __tree_.begin(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator begin() const _NOEXCEPT { return __tree_.begin(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator end() _NOEXCEPT { return __tree_.end(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator end() const _NOEXCEPT { return __tree_.end(); }
 
-  _LIBCPP_HIDE_FROM_ABI reverse_iterator rbegin() _NOEXCEPT { return reverse_iterator(end()); }
-  _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rbegin() const _NOEXCEPT { return const_reverse_iterator(end()); }
-  _LIBCPP_HIDE_FROM_ABI reverse_iterator rend() _NOEXCEPT { return reverse_iterator(begin()); }
-  _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rend() const _NOEXCEPT { return const_reverse_iterator(begin()); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI reverse_iterator rbegin() _NOEXCEPT { return reverse_iterator(end()); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rbegin() const _NOEXCEPT {
+    return const_reverse_iterator(end());
+  }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI reverse_iterator rend() _NOEXCEPT { return reverse_iterator(begin()); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rend() const _NOEXCEPT {
+    return const_reverse_iterator(begin());
+  }
 
-  _LIBCPP_HIDE_FROM_ABI const_iterator cbegin() const _NOEXCEPT { return begin(); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator cend() const _NOEXCEPT { return end(); }
-  _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crbegin() const _NOEXCEPT { return rbegin(); }
-  _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crend() const _NOEXCEPT { return rend(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator cbegin() const _NOEXCEPT { return begin(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator cend() const _NOEXCEPT { return end(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crbegin() const _NOEXCEPT { return rbegin(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crend() const _NOEXCEPT { return rend(); }
 
   [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __tree_.size() == 0; }
-  _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __tree_.size(); }
-  _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { return __tree_.max_size(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __tree_.size(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { return __tree_.max_size(); }
 
-  _LIBCPP_HIDE_FROM_ABI allocator_type get_allocator() const _NOEXCEPT { return allocator_type(__tree_.__alloc()); }
-  _LIBCPP_HIDE_FROM_ABI key_compare key_comp() const { return __tree_.value_comp().key_comp(); }
-  _LIBCPP_HIDE_FROM_ABI value_compare value_comp() const { return value_compare(__tree_.value_comp().key_comp()); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI allocator_type get_allocator() const _NOEXCEPT {
+    return allocator_type(__tree_.__alloc());
+  }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI key_compare key_comp() const { return __tree_.value_comp().key_comp(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI value_compare value_comp() const {
+    return value_compare(__tree_.value_comp().key_comp());
+  }
 
 #  ifndef _LIBCPP_CXX03_LANG
 
@@ -1751,17 +1848,13 @@ public:
 
   template <class _InputIterator>
   _LIBCPP_HIDE_FROM_ABI void insert(_InputIterator __f, _InputIterator __l) {
-    for (const_iterator __e = cend(); __f != __l; ++__f)
-      __tree_.__emplace_hint_multi(__e.__i_, *__f);
+    __tree_.__insert_range_multi(__f, __l);
   }
 
 #  if _LIBCPP_STD_VER >= 23
   template <_ContainerCompatibleRange<value_type> _Range>
   _LIBCPP_HIDE_FROM_ABI void insert_range(_Range&& __range) {
-    const_iterator __end = cend();
-    for (auto&& __element : __range) {
-      __tree_.__emplace_hint_multi(__end.__i_, std::forward<decltype(__element)>(__element));
-    }
+    __tree_.__insert_range_multi(ranges::begin(__range), ranges::end(__range));
   }
 #  endif
 
@@ -1783,10 +1876,10 @@ public:
                                         "node_type with incompatible allocator passed to multimap::insert()");
     return __tree_.template __node_handle_insert_multi<node_type>(__hint.__i_, std::move(__nh));
   }
-  _LIBCPP_HIDE_FROM_ABI node_type extract(key_type const& __key) {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI node_type extract(key_type const& __key) {
     return __tree_.template __node_handle_extract<node_type>(__key);
   }
-  _LIBCPP_HIDE_FROM_ABI node_type extract(const_iterator __it) {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI node_type extract(const_iterator __it) {
     return __tree_.template __node_handle_extract<node_type>(__it.__i_);
   }
   template <class _Compare2>
@@ -1821,75 +1914,89 @@ public:
     __tree_.swap(__m.__tree_);
   }
 
-  _LIBCPP_HIDE_FROM_ABI iterator find(const key_type& __k) { return __tree_.find(__k); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator find(const key_type& __k) const { return __tree_.find(__k); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator find(const key_type& __k) { return __tree_.find(__k); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator find(const key_type& __k) const { return __tree_.find(__k); }
 #  if _LIBCPP_STD_VER >= 14
   template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI iterator find(const _K2& __k) {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator find(const _K2& __k) {
     return __tree_.find(__k);
   }
   template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI const_iterator find(const _K2& __k) const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator find(const _K2& __k) const {
     return __tree_.find(__k);
   }
 #  endif
 
-  _LIBCPP_HIDE_FROM_ABI size_type count(const key_type& __k) const { return __tree_.__count_multi(__k); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type count(const key_type& __k) const {
+    return __tree_.__count_multi(__k);
+  }
 #  if _LIBCPP_STD_VER >= 14
   template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI size_type count(const _K2& __k) const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type count(const _K2& __k) const {
     return __tree_.__count_multi(__k);
   }
 #  endif
 
 #  if _LIBCPP_STD_VER >= 20
-  _LIBCPP_HIDE_FROM_ABI bool contains(const key_type& __k) const { return find(__k) != end(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool contains(const key_type& __k) const { return find(__k) != end(); }
   template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI bool contains(const _K2& __k) const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool contains(const _K2& __k) const {
     return find(__k) != end();
   }
 #  endif // _LIBCPP_STD_VER >= 20
 
-  _LIBCPP_HIDE_FROM_ABI iterator lower_bound(const key_type& __k) { return __tree_.lower_bound(__k); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const key_type& __k) const { return __tree_.lower_bound(__k); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator lower_bound(const key_type& __k) {
+    return __tree_.__lower_bound_multi(__k);
+  }
+
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const key_type& __k) const {
+    return __tree_.__lower_bound_multi(__k);
+  }
+
 #  if _LIBCPP_STD_VER >= 14
   template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI iterator lower_bound(const _K2& __k) {
-    return __tree_.lower_bound(__k);
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator lower_bound(const _K2& __k) {
+    return __tree_.__lower_bound_multi(__k);
   }
 
   template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const _K2& __k) const {
-    return __tree_.lower_bound(__k);
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const _K2& __k) const {
+    return __tree_.__lower_bound_multi(__k);
   }
 #  endif
 
-  _LIBCPP_HIDE_FROM_ABI iterator upper_bound(const key_type& __k) { return __tree_.upper_bound(__k); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const key_type& __k) const { return __tree_.upper_bound(__k); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator upper_bound(const key_type& __k) {
+    return __tree_.__upper_bound_multi(__k);
+  }
+
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const key_type& __k) const {
+    return __tree_.__upper_bound_multi(__k);
+  }
+
 #  if _LIBCPP_STD_VER >= 14
   template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI iterator upper_bound(const _K2& __k) {
-    return __tree_.upper_bound(__k);
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator upper_bound(const _K2& __k) {
+    return __tree_.__upper_bound_multi(__k);
   }
   template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const _K2& __k) const {
-    return __tree_.upper_bound(__k);
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const _K2& __k) const {
+    return __tree_.__upper_bound_multi(__k);
   }
 #  endif
 
-  _LIBCPP_HIDE_FROM_ABI pair<iterator, iterator> equal_range(const key_type& __k) {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI pair<iterator, iterator> equal_range(const key_type& __k) {
     return __tree_.__equal_range_multi(__k);
   }
-  _LIBCPP_HIDE_FROM_ABI pair<const_iterator, const_iterator> equal_range(const key_type& __k) const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI pair<const_iterator, const_iterator> equal_range(const key_type& __k) const {
     return __tree_.__equal_range_multi(__k);
   }
 #  if _LIBCPP_STD_VER >= 14
   template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI pair<iterator, iterator> equal_range(const _K2& __k) {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI pair<iterator, iterator> equal_range(const _K2& __k) {
     return __tree_.__equal_range_multi(__k);
   }
   template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI pair<const_iterator, const_iterator> equal_range(const _K2& __k) const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI pair<const_iterator, const_iterator> equal_range(const _K2& __k) const {
     return __tree_.__equal_range_multi(__k);
   }
 #  endif
@@ -1901,6 +2008,8 @@ private:
 
   typedef __map_node_destructor<__node_allocator> _Dp;
   typedef unique_ptr<__node, _Dp> __node_holder;
+
+  friend struct __specialized_algorithm<_Algorithm::__for_each, __single_range<multimap> >;
 };
 
 #  if _LIBCPP_STD_VER >= 17
@@ -1908,8 +2017,8 @@ template <class _InputIterator,
           class _Compare   = less<__iter_key_type<_InputIterator>>,
           class _Allocator = allocator<__iter_to_alloc_type<_InputIterator>>,
           class            = enable_if_t<__has_input_iterator_category<_InputIterator>::value, void>,
-          class            = enable_if_t<!__is_allocator<_Compare>::value, void>,
-          class            = enable_if_t<__is_allocator<_Allocator>::value, void>>
+          class            = enable_if_t<!__is_allocator_v<_Compare>>,
+          class            = enable_if_t<__is_allocator_v<_Allocator>>>
 multimap(_InputIterator, _InputIterator, _Compare = _Compare(), _Allocator = _Allocator())
     -> multimap<__iter_key_type<_InputIterator>, __iter_mapped_type<_InputIterator>, _Compare, _Allocator>;
 
@@ -1917,8 +2026,8 @@ multimap(_InputIterator, _InputIterator, _Compare = _Compare(), _Allocator = _Al
 template <ranges::input_range _Range,
           class _Compare   = less<__range_key_type<_Range>>,
           class _Allocator = allocator<__range_to_alloc_type<_Range>>,
-          class            = enable_if_t<!__is_allocator<_Compare>::value, void>,
-          class            = enable_if_t<__is_allocator<_Allocator>::value, void>>
+          class            = enable_if_t<!__is_allocator_v<_Compare>>,
+          class            = enable_if_t<__is_allocator_v<_Allocator>>>
 multimap(from_range_t, _Range&&, _Compare = _Compare(), _Allocator = _Allocator())
     -> multimap<__range_key_type<_Range>, __range_mapped_type<_Range>, _Compare, _Allocator>;
 #    endif
@@ -1927,16 +2036,15 @@ template <class _Key,
           class _Tp,
           class _Compare   = less<remove_const_t<_Key>>,
           class _Allocator = allocator<pair<const _Key, _Tp>>,
-          class            = enable_if_t<!__is_allocator<_Compare>::value, void>,
-          class            = enable_if_t<__is_allocator<_Allocator>::value, void>>
-multimap(initializer_list<pair<_Key, _Tp>>,
-         _Compare   = _Compare(),
-         _Allocator = _Allocator()) -> multimap<remove_const_t<_Key>, _Tp, _Compare, _Allocator>;
+          class            = enable_if_t<!__is_allocator_v<_Compare>>,
+          class            = enable_if_t<__is_allocator_v<_Allocator>>>
+multimap(initializer_list<pair<_Key, _Tp>>, _Compare = _Compare(), _Allocator = _Allocator())
+    -> multimap<remove_const_t<_Key>, _Tp, _Compare, _Allocator>;
 
 template <class _InputIterator,
           class _Allocator,
           class = enable_if_t<__has_input_iterator_category<_InputIterator>::value, void>,
-          class = enable_if_t<__is_allocator<_Allocator>::value, void>>
+          class = enable_if_t<__is_allocator_v<_Allocator>>>
 multimap(_InputIterator, _InputIterator, _Allocator)
     -> multimap<__iter_key_type<_InputIterator>,
                 __iter_mapped_type<_InputIterator>,
@@ -1944,26 +2052,30 @@ multimap(_InputIterator, _InputIterator, _Allocator)
                 _Allocator>;
 
 #    if _LIBCPP_STD_VER >= 23
-template <ranges::input_range _Range, class _Allocator, class = enable_if_t<__is_allocator<_Allocator>::value, void>>
+template <ranges::input_range _Range, class _Allocator, class = enable_if_t<__is_allocator_v<_Allocator>>>
 multimap(from_range_t, _Range&&, _Allocator)
     -> multimap<__range_key_type<_Range>, __range_mapped_type<_Range>, less<__range_key_type<_Range>>, _Allocator>;
 #    endif
 
-template <class _Key, class _Tp, class _Allocator, class = enable_if_t<__is_allocator<_Allocator>::value, void>>
-multimap(initializer_list<pair<_Key, _Tp>>,
-         _Allocator) -> multimap<remove_const_t<_Key>, _Tp, less<remove_const_t<_Key>>, _Allocator>;
+template <class _Key, class _Tp, class _Allocator, class = enable_if_t<__is_allocator_v<_Allocator>>>
+multimap(initializer_list<pair<_Key, _Tp>>, _Allocator)
+    -> multimap<remove_const_t<_Key>, _Tp, less<remove_const_t<_Key>>, _Allocator>;
 #  endif
 
-#  ifndef _LIBCPP_CXX03_LANG
+#  if _LIBCPP_STD_VER >= 14
 template <class _Key, class _Tp, class _Compare, class _Allocator>
-multimap<_Key, _Tp, _Compare, _Allocator>::multimap(multimap&& __m, const allocator_type& __a)
-    : __tree_(std::move(__m.__tree_), typename __base::allocator_type(__a)) {
-  if (__a != __m.get_allocator()) {
-    const_iterator __e = cend();
-    while (!__m.empty())
-      __tree_.__insert_multi_from_orphaned_node(__e.__i_, std::move(__m.__tree_.remove(__m.begin().__i_)->__value_));
+struct __specialized_algorithm<_Algorithm::__for_each, __single_range<multimap<_Key, _Tp, _Compare, _Allocator>>> {
+  using __map _LIBCPP_NODEBUG = multimap<_Key, _Tp, _Compare, _Allocator>;
+
+  static const bool __has_algorithm = true;
+
+  template <class _Map, class _Func, class _Proj>
+  _LIBCPP_HIDE_FROM_ABI static auto operator()(_Map&& __map, _Func __func, _Proj __proj) {
+    auto [_, __func2] = __specialized_algorithm<_Algorithm::__for_each, __single_range<typename __map::__base>>()(
+        __map.__tree_, std::move(__func), std::move(__proj));
+    return std::make_pair(__map.end(), std::move(__func2));
   }
-}
+};
 #  endif
 
 template <class _Key, class _Tp, class _Compare, class _Allocator>
diff --git a/lib/libcxx/include/math.h b/lib/libcxx/include/math.h
index 929bef6385..1db61538e9 100644
--- a/lib/libcxx/include/math.h
+++ b/lib/libcxx/include/math.h
@@ -429,6 +429,25 @@ using std::__math::isnormal;
 using std::__math::isunordered;
 #      endif // _LIBCPP_MSVCRT
 
+#      if defined(_LIBCPP_MSVCRT) && _LIBCPP_STD_VER >= 20
+// MS UCRT incorrectly defines some functions in a way not working with integer types. Until C++20, this was worked
+// around by -fdelayed-template-parsing. Since C++20, we can use standard feature "requires" instead.
+
+// TODO: Remove the workaround once UCRT fixes these functions. Note that this doesn't seem planned as of 2025-07 per
+// https://developercommunity.visualstudio.com/t/10294165.
+
+using std::__math::__ucrt::isfinite;
+using std::__math::__ucrt::isgreater;
+using std::__math::__ucrt::isgreaterequal;
+using std::__math::__ucrt::isinf;
+using std::__math::__ucrt::isless;
+using std::__math::__ucrt::islessequal;
+using std::__math::__ucrt::islessgreater;
+using std::__math::__ucrt::isnan;
+using std::__math::__ucrt::isnormal;
+using std::__math::__ucrt::isunordered;
+#      endif // defined(_LIBCPP_MSVCRT) && _LIBCPP_STD_VER >= 20
+
 // We have to provide double overloads for <math.h> to work on platforms that don't provide the full set of math
 // functions. To make the overload set work with multiple functions that take the same arguments, we make our overloads
 // templates. Functions are preferred over function templates during overload resolution, which means that our overload
diff --git a/lib/libcxx/include/mdspan b/lib/libcxx/include/mdspan
index 5aeec1bcbf..32468a128d 100644
--- a/lib/libcxx/include/mdspan
+++ b/lib/libcxx/include/mdspan
@@ -450,11 +450,7 @@ namespace std {
 #  include <__config>
 
 #  if _LIBCPP_STD_VER >= 23
-#    include <__fwd/mdspan.h> // TODO(boomanaiden154): This is currently a
-                              // non-standard extension to include
-                              // std::dynamic_extent tracked by LWG issue 4275.
-                              // This comment should be deleted or the include
-                              // deleted upon resolution.
+#    include <__fwd/mdspan.h>
 #    include <__fwd/span.h>
 #    include <__mdspan/default_accessor.h>
 #    include <__mdspan/extents.h>
diff --git a/lib/libcxx/include/mutex b/lib/libcxx/include/mutex
index dc8e711f04..bec0185ede 100644
--- a/lib/libcxx/include/mutex
+++ b/lib/libcxx/include/mutex
@@ -229,12 +229,12 @@ public:
   recursive_mutex& operator=(const recursive_mutex&) = delete;
 
   void lock();
-  bool try_lock() _NOEXCEPT;
+  [[__nodiscard__]] bool try_lock() _NOEXCEPT;
   void unlock() _NOEXCEPT;
 
   typedef __libcpp_recursive_mutex_t* native_handle_type;
 
-  _LIBCPP_HIDE_FROM_ABI native_handle_type native_handle() { return &__m_; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI native_handle_type native_handle() { return &__m_; }
 };
 
 class _LIBCPP_EXPORTED_FROM_ABI timed_mutex {
@@ -251,14 +251,14 @@ public:
 
 public:
   void lock();
-  bool try_lock() _NOEXCEPT;
+  [[__nodiscard__]] bool try_lock() _NOEXCEPT;
   template <class _Rep, class _Period>
-  _LIBCPP_HIDE_FROM_ABI bool try_lock_for(const chrono::duration<_Rep, _Period>& __d) {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool try_lock_for(const chrono::duration<_Rep, _Period>& __d) {
     return try_lock_until(chrono::steady_clock::now() + __d);
   }
 
   template <class _Clock, class _Duration>
-  _LIBCPP_HIDE_FROM_ABI bool try_lock_until(const chrono::time_point<_Clock, _Duration>& __t) {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool try_lock_until(const chrono::time_point<_Clock, _Duration>& __t) {
     using namespace chrono;
     unique_lock<mutex> __lk(__m_);
     bool __no_timeout = _Clock::now() < __t;
@@ -288,14 +288,14 @@ public:
   recursive_timed_mutex& operator=(const recursive_timed_mutex&) = delete;
 
   void lock();
-  bool try_lock() _NOEXCEPT;
+  [[__nodiscard__]] bool try_lock() _NOEXCEPT;
   template <class _Rep, class _Period>
-  _LIBCPP_HIDE_FROM_ABI bool try_lock_for(const chrono::duration<_Rep, _Period>& __d) {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool try_lock_for(const chrono::duration<_Rep, _Period>& __d) {
     return try_lock_until(chrono::steady_clock::now() + __d);
   }
 
   template <class _Clock, class _Duration>
-  _LIBCPP_HIDE_FROM_ABI bool try_lock_until(const chrono::time_point<_Clock, _Duration>& __t) {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool try_lock_until(const chrono::time_point<_Clock, _Duration>& __t) {
     using namespace chrono;
     __thread_id __id = this_thread::get_id();
     unique_lock<mutex> __lk(__m_);
@@ -320,7 +320,7 @@ public:
 };
 
 template <class _L0, class _L1>
-_LIBCPP_HIDE_FROM_ABI int try_lock(_L0& __l0, _L1& __l1) {
+[[__nodiscard__]] _LIBCPP_NO_THREAD_SAFETY_ANALYSIS _LIBCPP_HIDE_FROM_ABI int try_lock(_L0& __l0, _L1& __l1) {
   unique_lock<_L0> __u0(__l0, try_to_lock_t());
   if (__u0.owns_lock()) {
     if (__l1.try_lock()) {
@@ -335,7 +335,8 @@ _LIBCPP_HIDE_FROM_ABI int try_lock(_L0& __l0, _L1& __l1) {
 #    ifndef _LIBCPP_CXX03_LANG
 
 template <class _L0, class _L1, class _L2, class... _L3>
-_LIBCPP_HIDE_FROM_ABI int try_lock(_L0& __l0, _L1& __l1, _L2& __l2, _L3&... __l3) {
+[[__nodiscard__]] _LIBCPP_NO_THREAD_SAFETY_ANALYSIS
+    _LIBCPP_HIDE_FROM_ABI int try_lock(_L0& __l0, _L1& __l1, _L2& __l2, _L3&... __l3) {
   int __r = 0;
   unique_lock<_L0> __u0(__l0, try_to_lock);
   if (__u0.owns_lock()) {
@@ -350,8 +351,11 @@ _LIBCPP_HIDE_FROM_ABI int try_lock(_L0& __l0, _L1& __l1, _L2& __l2, _L3&... __l3
 
 #    endif // _LIBCPP_CXX03_LANG
 
+// We're using unique_lock to implement the functions, which thread annotations don't support. So we have to disable
+// the analysis inside the function.
 template <class _L0, class _L1>
-_LIBCPP_HIDE_FROM_ABI void lock(_L0& __l0, _L1& __l1) {
+_LIBCPP_NO_THREAD_SAFETY_ANALYSIS _LIBCPP_HIDE_FROM_ABI void lock(_L0& __l0, _L1& __l1)
+    _LIBCPP_ACQUIRE_CAPABILITY(__l0, __l1) {
   while (true) {
     {
       unique_lock<_L0> __u0(__l0);
@@ -375,7 +379,7 @@ _LIBCPP_HIDE_FROM_ABI void lock(_L0& __l0, _L1& __l1) {
 #    ifndef _LIBCPP_CXX03_LANG
 
 template <class _L0, class _L1, class _L2, class... _L3>
-void __lock_first(int __i, _L0& __l0, _L1& __l1, _L2& __l2, _L3&... __l3) {
+_LIBCPP_NO_THREAD_SAFETY_ANALYSIS void __lock_first(int __i, _L0& __l0, _L1& __l1, _L2& __l2, _L3&... __l3) {
   while (true) {
     switch (__i) {
     case 0: {
@@ -410,8 +414,14 @@ void __lock_first(int __i, _L0& __l0, _L1& __l1, _L2& __l2, _L3&... __l3) {
   }
 }
 
+// We're using unique_lock to implement the functions, which thread annotations don't support. So we have to disable
+// the analysis inside the function.
 template <class _L0, class _L1, class _L2, class... _L3>
-inline _LIBCPP_HIDE_FROM_ABI void lock(_L0& __l0, _L1& __l1, _L2& __l2, _L3&... __l3) {
+_LIBCPP_NO_THREAD_SAFETY_ANALYSIS inline _LIBCPP_HIDE_FROM_ABI void lock(_L0& __l0, _L1& __l1, _L2& __l2, _L3&... __l3)
+#      if defined(_LIBCPP_CLANG_VER) && _LIBCPP_CLANG_VER >= 2101
+    _LIBCPP_ACQUIRE_CAPABILITY(__l0, __l1, __l2, __l3...)
+#      endif
+{
   std::__lock_first(0, __l0, __l1, __l2, __l3...);
 }
 
@@ -469,17 +479,14 @@ public:
 
   [[nodiscard]] _LIBCPP_HIDE_FROM_ABI scoped_lock(adopt_lock_t, _MArgs&... __margs) : __t_(__margs...) {}
 
-  _LIBCPP_HIDE_FROM_ABI ~scoped_lock() {
-    typedef typename __make_tuple_indices<sizeof...(_MArgs)>::type _Indices;
-    __unlock_unpack(_Indices{}, __t_);
-  }
+  _LIBCPP_HIDE_FROM_ABI ~scoped_lock() { __unlock_unpack(make_index_sequence<sizeof...(_MArgs)>(), __t_); }
 
   scoped_lock(scoped_lock const&)            = delete;
   scoped_lock& operator=(scoped_lock const&) = delete;
 
 private:
   template <size_t... _Indx>
-  _LIBCPP_HIDE_FROM_ABI static void __unlock_unpack(__tuple_indices<_Indx...>, _MutexTuple& __mt) {
+  _LIBCPP_HIDE_FROM_ABI static void __unlock_unpack(index_sequence<_Indx...>, _MutexTuple& __mt) {
     (std::get<_Indx>(__mt).unlock(), ...);
   }
 
@@ -494,6 +501,10 @@ _LIBCPP_END_NAMESPACE_STD
 
 _LIBCPP_POP_MACROS
 
+#  if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 23
+#    include <typeinfo>
+#  endif
+
 #  if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
 #    include <atomic>
 #    include <concepts>
@@ -507,7 +518,6 @@ _LIBCPP_POP_MACROS
 #    include <stdexcept>
 #    include <system_error>
 #    include <type_traits>
-#    include <typeinfo>
 #  endif
 #endif // __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)
 
diff --git a/lib/libcxx/include/optional b/lib/libcxx/include/optional
index e81bff50da..12fbcdfa5c 100644
--- a/lib/libcxx/include/optional
+++ b/lib/libcxx/include/optional
@@ -20,6 +20,11 @@ namespace std {
   template <class T>
     class optional;
 
+  template<class T>
+   constexpr bool ranges::enable_view<optional<T>> = true;
+  template<class T>
+   constexpr auto format_kind<optional<T>> = range_format::disabled;
+
   template<class T>
     concept is-derived-from-optional = requires(const T& t) {       // exposition only
       []<class U>(const optional<U>&){ }(t);
@@ -102,6 +107,8 @@ namespace std {
   class optional {
   public:
     using value_type = T;
+    using iterator               = implementation-defined; // see [optional.iterators]
+    using const_iterator         = implementation-defined; // see [optional.iterators]
 
     // [optional.ctor], constructors
     constexpr optional() noexcept;
@@ -112,7 +119,7 @@ namespace std {
       constexpr explicit optional(in_place_t, Args &&...);
     template<class U, class... Args>
       constexpr explicit optional(in_place_t, initializer_list<U>, Args &&...);
-    template<class U = T>
+    template<class U = remove_cv_t<T>>
       constexpr explicit(see-below) optional(U &&);
     template<class U>
       explicit(see-below) optional(const optional<U> &);                          // constexpr in C++20
@@ -126,7 +133,7 @@ namespace std {
     optional &operator=(nullopt_t) noexcept;                                      // constexpr in C++20
     constexpr optional &operator=(const optional &);
     constexpr optional &operator=(optional &&) noexcept(see below);
-    template<class U = T> optional &operator=(U &&);                              // constexpr in C++20
+    template<class U = remove_cv_t<T>> optional &operator=(U &&);                              // constexpr in C++20
     template<class U> optional &operator=(const optional<U> &);                   // constexpr in C++20
     template<class U> optional &operator=(optional<U> &&);                        // constexpr in C++20
     template<class... Args> T& emplace(Args &&...);                               // constexpr in C++20
@@ -135,6 +142,12 @@ namespace std {
     // [optional.swap], swap
     void swap(optional &) noexcept(see below ); // constexpr in C++20
 
+    // [optional.iterators], iterator support
+    constexpr iterator begin() noexcept;
+    constexpr const_iterator begin() const noexcept;
+    constexpr iterator end() noexcept;
+    constexpr const_iterator end() const noexcept;
+
     // [optional.observe], observers
     constexpr T const *operator->() const noexcept;
     constexpr T *operator->() noexcept;
@@ -148,8 +161,8 @@ namespace std {
     constexpr T &value() &;
     constexpr T &&value() &&;
     constexpr const T &&value() const &&;
-    template<class U> constexpr T value_or(U &&) const &;
-    template<class U> constexpr T value_or(U &&) &&;
+    template<class U = remove_cv_t<T>> constexpr T value_or(U &&) const &;
+    template<class U = remove_cv_t<T>> constexpr T value_or(U &&) &&;
 
     // [optional.monadic], monadic operations
     template<class F> constexpr auto and_then(F&& f) &;         // since C++23
@@ -173,6 +186,71 @@ namespace std {
   template<class T>
     optional(T) -> optional<T>;
 
+  template<class T>
+  class optional<T&> { // since C++26
+  public:
+    using value_type     = T;
+    using iterator       = implementation-defined;              // see [optional.ref.iterators]
+
+  public:
+    // [optional.ref.ctor], constructors
+    constexpr optional() noexcept = default;
+    constexpr optional(nullopt_t) noexcept : optional() {}
+    constexpr optional(const optional& rhs) noexcept = default;
+
+    template<class Arg>
+      constexpr explicit optional(in_place_t, Arg&& arg);
+    template<class U>
+      constexpr explicit(see below) optional(U&& u) noexcept(see below);
+    template<class U>
+      constexpr explicit(see below) optional(optional<U>& rhs) noexcept(see below);
+    template<class U>
+      constexpr explicit(see below) optional(const optional<U>& rhs) noexcept(see below);
+    template<class U>
+      constexpr explicit(see below) optional(optional<U>&& rhs) noexcept(see below);
+    template<class U>
+      constexpr explicit(see below) optional(const optional<U>&& rhs) noexcept(see below);
+
+    constexpr ~optional() = default;
+
+    // [optional.ref.assign], assignment
+    constexpr optional& operator=(nullopt_t) noexcept;
+    constexpr optional& operator=(const optional& rhs) noexcept = default;
+
+    template<class U> constexpr T& emplace(U&& u) noexcept(see below);
+
+    // [optional.ref.swap], swap
+    constexpr void swap(optional& rhs) noexcept;
+
+    // [optional.ref.iterators], iterator support
+    constexpr iterator begin() const noexcept;
+    constexpr iterator end() const noexcept;
+
+    // [optional.ref.observe], observers
+    constexpr T*       operator->() const noexcept;
+    constexpr T&       operator*() const noexcept;
+    constexpr explicit operator bool() const noexcept;
+    constexpr bool     has_value() const noexcept;
+    constexpr T&       value() const;                           // freestanding-deleted
+    template<class U = remove_cv_t<T>>
+      constexpr remove_cv_t<T> value_or(U&& u) const;
+
+    // [optional.ref.monadic], monadic operations
+    template<class F> constexpr auto and_then(F&& f) const;
+    template<class F> constexpr optional<invoke_result_t<F, T&>> transform(F&& f) const;
+    template<class F> constexpr optional or_else(F&& f) const;
+
+    // [optional.ref.mod], modifiers
+    constexpr void reset() noexcept;
+
+  private:
+    T* val = nullptr;                                           // exposition only
+
+    // [optional.ref.expos], exposition only helper functions
+    template<class U>
+      constexpr void convert-ref-init-val(U&& u);               // exposition only
+  };
+
 } // namespace std
 
 */
@@ -186,13 +264,19 @@ namespace std {
 #  include <__compare/three_way_comparable.h>
 #  include <__concepts/invocable.h>
 #  include <__config>
+#  include <__cstddef/ptrdiff_t.h>
 #  include <__exception/exception.h>
+#  include <__format/range_format.h>
 #  include <__functional/hash.h>
 #  include <__functional/invoke.h>
 #  include <__functional/unary_function.h>
 #  include <__fwd/functional.h>
+#  include <__iterator/bounded_iter.h>
+#  include <__iterator/wrap_iter.h>
 #  include <__memory/addressof.h>
 #  include <__memory/construct_at.h>
+#  include <__ranges/enable_borrowed_range.h>
+#  include <__ranges/enable_view.h>
 #  include <__tuple/sfinae_helpers.h>
 #  include <__type_traits/add_pointer.h>
 #  include <__type_traits/conditional.h>
@@ -200,6 +284,7 @@ namespace std {
 #  include <__type_traits/decay.h>
 #  include <__type_traits/disjunction.h>
 #  include <__type_traits/enable_if.h>
+#  include <__type_traits/integral_constant.h>
 #  include <__type_traits/invoke.h>
 #  include <__type_traits/is_array.h>
 #  include <__type_traits/is_assignable.h>
@@ -207,11 +292,11 @@ namespace std {
 #  include <__type_traits/is_convertible.h>
 #  include <__type_traits/is_core_convertible.h>
 #  include <__type_traits/is_destructible.h>
+#  include <__type_traits/is_function.h>
 #  include <__type_traits/is_nothrow_assignable.h>
 #  include <__type_traits/is_nothrow_constructible.h>
 #  include <__type_traits/is_object.h>
 #  include <__type_traits/is_reference.h>
-#  include <__type_traits/is_replaceable.h>
 #  include <__type_traits/is_same.h>
 #  include <__type_traits/is_scalar.h>
 #  include <__type_traits/is_swappable.h>
@@ -220,6 +305,7 @@ namespace std {
 #  include <__type_traits/is_trivially_destructible.h>
 #  include <__type_traits/is_trivially_relocatable.h>
 #  include <__type_traits/negation.h>
+#  include <__type_traits/reference_constructs_from_temporary.h>
 #  include <__type_traits/remove_const.h>
 #  include <__type_traits/remove_cv.h>
 #  include <__type_traits/remove_cvref.h>
@@ -255,7 +341,7 @@ public:
   _LIBCPP_HIDE_FROM_ABI bad_optional_access& operator=(const bad_optional_access&) _NOEXCEPT = default;
   // Get the key function ~bad_optional_access() into the dylib
   ~bad_optional_access() _NOEXCEPT override;
-  const char* what() const _NOEXCEPT override;
+  [[__nodiscard__]] const char* what() const _NOEXCEPT override;
 };
 
 } // namespace std
@@ -358,7 +444,7 @@ struct __optional_storage_base : __optional_destruct_base<_Tp> {
   using value_type             = _Tp;
   using __base::__base;
 
-  _LIBCPP_HIDE_FROM_ABI constexpr bool has_value() const noexcept { return this->__engaged_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool has_value() const noexcept { return this->__engaged_; }
 
   _LIBCPP_HIDE_FROM_ABI constexpr value_type& __get() & noexcept { return this->__val_; }
   _LIBCPP_HIDE_FROM_ABI constexpr const value_type& __get() const& noexcept { return this->__val_; }
@@ -390,58 +476,60 @@ struct __optional_storage_base : __optional_destruct_base<_Tp> {
         __construct(std::forward<_That>(__opt).__get());
     }
   }
+
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
+  __swap(__optional_storage_base& __rhs) noexcept(is_nothrow_move_constructible_v<_Tp> && is_nothrow_swappable_v<_Tp>) {
+    using std::swap;
+    if (this->has_value() == __rhs.has_value()) {
+      if (this->has_value())
+        swap(this->__get(), __rhs.__get());
+    } else {
+      if (this->has_value()) {
+        __rhs.__construct(std::move(this->__get()));
+        this->reset();
+      } else {
+        this->__construct(std::move(__rhs.__get()));
+        __rhs.reset();
+      }
+    }
+  }
 };
 
-// optional<T&> is currently required to be ill-formed. However, it may
-// be allowed in the future. For this reason, it has already been implemented
-// to ensure we can make the change in an ABI-compatible manner.
 template <class _Tp>
 struct __optional_storage_base<_Tp, true> {
   using value_type                 = _Tp;
   using __raw_type _LIBCPP_NODEBUG = remove_reference_t<_Tp>;
   __raw_type* __value_;
 
-  template <class _Up>
-  static _LIBCPP_HIDE_FROM_ABI constexpr bool __can_bind_reference() {
-    using _RawUp = __libcpp_remove_reference_t<_Up>;
-    using _UpPtr = _RawUp*;
-    using _RawTp = __libcpp_remove_reference_t<_Tp>;
-    using _TpPtr = _RawTp*;
-    using _CheckLValueArg =
-        integral_constant<bool,
-                          (is_lvalue_reference<_Up>::value && is_convertible<_UpPtr, _TpPtr>::value) ||
-                              is_same<_RawUp, reference_wrapper<_RawTp>>::value ||
-                              is_same<_RawUp, reference_wrapper<__remove_const_t<_RawTp>>>::value >;
-    return (is_lvalue_reference<_Tp>::value && _CheckLValueArg::value) ||
-           (is_rvalue_reference<_Tp>::value && !is_lvalue_reference<_Up>::value &&
-            is_convertible<_UpPtr, _TpPtr>::value);
-  }
-
   _LIBCPP_HIDE_FROM_ABI constexpr __optional_storage_base() noexcept : __value_(nullptr) {}
 
+  template <class _Up>
+  _LIBCPP_HIDE_FROM_ABI constexpr void __convert_init_ref_val(_Up&& __val) noexcept {
+    _Tp& __r(std::forward<_Up>(__val));
+    __value_ = std::addressof(__r);
+  }
+
   template <class _UArg>
-  _LIBCPP_HIDE_FROM_ABI constexpr explicit __optional_storage_base(in_place_t, _UArg&& __uarg)
-      : __value_(std::addressof(__uarg)) {
-    static_assert(__can_bind_reference<_UArg>(),
-                  "Attempted to construct a reference element in tuple from a "
+  _LIBCPP_HIDE_FROM_ABI constexpr explicit __optional_storage_base(in_place_t, _UArg&& __uarg) {
+    static_assert(!__reference_constructs_from_temporary_v<_Tp, _UArg>,
+                  "Attempted to construct a reference element in optional from a "
                   "possible temporary");
+    __convert_init_ref_val(std::forward<_UArg>(__uarg));
   }
 
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void reset() noexcept { __value_ = nullptr; }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr bool has_value() const noexcept { return __value_ != nullptr; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool has_value() const noexcept { return __value_ != nullptr; }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr value_type& __get() const& noexcept { return *__value_; }
-
-  _LIBCPP_HIDE_FROM_ABI constexpr value_type&& __get() const&& noexcept { return std::forward<value_type>(*__value_); }
+  _LIBCPP_HIDE_FROM_ABI constexpr value_type& __get() const noexcept { return *__value_; }
 
   template <class _UArg>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __construct(_UArg&& __val) {
     _LIBCPP_ASSERT_INTERNAL(!has_value(), "__construct called for engaged __optional_storage");
-    static_assert(__can_bind_reference<_UArg>(),
+    static_assert(!__reference_constructs_from_temporary_v<_Tp, _UArg>,
                   "Attempted to construct a reference element in tuple from a "
                   "possible temporary");
-    __value_ = std::addressof(__val);
+    __convert_init_ref_val(std::forward<_UArg>(__val));
   }
 
   template <class _That>
@@ -462,9 +550,13 @@ struct __optional_storage_base<_Tp, true> {
         __construct(std::forward<_That>(__opt).__get());
     }
   }
+
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __swap(__optional_storage_base& __rhs) noexcept {
+    std::swap(__value_, __rhs.__value_);
+  }
 };
 
-template <class _Tp, bool = is_trivially_copy_constructible<_Tp>::value>
+template <class _Tp, bool = is_trivially_copy_constructible_v<_Tp> || is_lvalue_reference_v<_Tp>>
 struct __optional_copy_base : __optional_storage_base<_Tp> {
   using __optional_storage_base<_Tp>::__optional_storage_base;
 };
@@ -484,7 +576,7 @@ struct __optional_copy_base<_Tp, false> : __optional_storage_base<_Tp> {
   _LIBCPP_HIDE_FROM_ABI __optional_copy_base& operator=(__optional_copy_base&&)      = default;
 };
 
-template <class _Tp, bool = is_trivially_move_constructible<_Tp>::value>
+template <class _Tp, bool = is_trivially_move_constructible_v<_Tp> || is_lvalue_reference_v<_Tp>>
 struct __optional_move_base : __optional_copy_base<_Tp> {
   using __optional_copy_base<_Tp>::__optional_copy_base;
 };
@@ -507,8 +599,9 @@ struct __optional_move_base<_Tp, false> : __optional_copy_base<_Tp> {
 };
 
 template <class _Tp,
-          bool = is_trivially_destructible<_Tp>::value && is_trivially_copy_constructible<_Tp>::value &&
-                 is_trivially_copy_assignable<_Tp>::value>
+          bool = (is_trivially_destructible_v<_Tp> && is_trivially_copy_constructible_v<_Tp> &&
+                  is_trivially_copy_assignable_v<_Tp>) ||
+                 is_lvalue_reference_v<_Tp>>
 struct __optional_copy_assign_base : __optional_move_base<_Tp> {
   using __optional_move_base<_Tp>::__optional_move_base;
 };
@@ -531,8 +624,9 @@ struct __optional_copy_assign_base<_Tp, false> : __optional_move_base<_Tp> {
 };
 
 template <class _Tp,
-          bool = is_trivially_destructible<_Tp>::value && is_trivially_move_constructible<_Tp>::value &&
-                 is_trivially_move_assignable<_Tp>::value>
+          bool = (is_trivially_destructible_v<_Tp> && is_trivially_move_constructible_v<_Tp> &&
+                  is_trivially_move_assignable_v<_Tp>) ||
+                 is_lvalue_reference_v<_Tp>>
 struct __optional_move_assign_base : __optional_copy_assign_base<_Tp> {
   using __optional_copy_assign_base<_Tp>::__optional_copy_assign_base;
 };
@@ -561,12 +655,24 @@ using __optional_sfinae_ctor_base_t _LIBCPP_NODEBUG =
 
 template <class _Tp>
 using __optional_sfinae_assign_base_t _LIBCPP_NODEBUG =
-    __sfinae_assign_base< (is_copy_constructible<_Tp>::value && is_copy_assignable<_Tp>::value),
-                          (is_move_constructible<_Tp>::value && is_move_assignable<_Tp>::value) >;
+    __sfinae_assign_base< (is_copy_constructible_v<_Tp> && is_copy_assignable_v<_Tp>) || is_lvalue_reference_v<_Tp>,
+                          (is_move_constructible_v<_Tp> && is_move_assignable_v<_Tp>) || is_lvalue_reference_v<_Tp>>;
 
 template <class _Tp>
 class optional;
 
+#    if _LIBCPP_STD_VER >= 26 && _LIBCPP_HAS_EXPERIMENTAL_OPTIONAL_ITERATOR
+template <class _Tp>
+constexpr bool ranges::enable_view<optional<_Tp>> = true;
+
+template <class _Tp>
+constexpr range_format format_kind<optional<_Tp>> = range_format::disabled;
+
+template <class _Tp>
+constexpr bool ranges::enable_borrowed_range<optional<_Tp&>> = true;
+
+#    endif
+
 #    if _LIBCPP_STD_VER >= 20
 
 template <class _Tp>
@@ -579,29 +685,150 @@ struct __is_std_optional : false_type {};
 template <class _Tp>
 struct __is_std_optional<optional<_Tp>> : true_type {};
 
+template <class _Tp, class... _Args>
+inline constexpr bool __is_constructible_for_optional_v = is_constructible_v<_Tp, _Args...>;
+
+template <class _Tp, class... _Args>
+struct __is_constructible_for_optional : bool_constant<__is_constructible_for_optional_v<_Tp, _Args...>> {};
+
+template <class _Tp, class _Up, class... _Args>
+inline constexpr bool __is_constructible_for_optional_initializer_list_v =
+    is_constructible_v<_Tp, initializer_list<_Up>&, _Args...>;
+
+#    if _LIBCPP_STD_VER >= 26
+template <class _Tp, class... _Args>
+inline constexpr bool __is_constructible_for_optional_v<_Tp&, _Args...> = false;
+template <class _Tp, class _Arg>
+inline constexpr bool __is_constructible_for_optional_v<_Tp&, _Arg> =
+    is_constructible_v<_Tp&, _Arg> && !reference_constructs_from_temporary_v<_Tp&, _Arg>;
+
+template <class _Tp, class _Up, class... _Args>
+inline constexpr bool __is_constructible_for_optional_initializer_list_v<_Tp&, _Up, _Args...> = false;
+#    endif
+
+template <class _Tp, class = void>
+struct __optional_iterator {};
+
+#    if _LIBCPP_STD_VER >= 26 && _LIBCPP_HAS_EXPERIMENTAL_OPTIONAL_ITERATOR
+
+template <class _Tp>
+struct __optional_iterator<_Tp, enable_if_t<is_object_v<_Tp>>> {
+private:
+  using __pointer _LIBCPP_NODEBUG       = add_pointer_t<_Tp>;
+  using __const_pointer _LIBCPP_NODEBUG = add_pointer_t<const _Tp>;
+
+public:
+#      ifdef _LIBCPP_ABI_BOUNDED_ITERATORS_IN_OPTIONAL
+  using iterator       = __bounded_iter<__wrap_iter<__pointer>>;
+  using const_iterator = __bounded_iter<__wrap_iter<__const_pointer>>;
+#      else
+  using iterator       = __wrap_iter<__pointer>;
+  using const_iterator = __wrap_iter<__const_pointer>;
+#      endif
+
+  // [optional.iterators], iterator support
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr iterator begin() noexcept {
+    auto& __derived_self = static_cast<optional<_Tp>&>(*this);
+    auto* __ptr          = std::addressof(__derived_self.__get());
+
+#      ifdef _LIBCPP_ABI_BOUNDED_ITERATORS_IN_OPTIONAL
+    return std::__make_bounded_iter(
+        __wrap_iter<__pointer>(__ptr),
+        __wrap_iter<__pointer>(__ptr),
+        __wrap_iter<__pointer>(__ptr) + (__derived_self.has_value() ? 1 : 0));
+#      else
+    return iterator(__ptr);
+#      endif
+  }
+
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const_iterator begin() const noexcept {
+    auto& __derived_self = static_cast<const optional<_Tp>&>(*this);
+    auto* __ptr          = std::addressof(__derived_self.__get());
+
+#      ifdef _LIBCPP_ABI_BOUNDED_ITERATORS_IN_OPTIONAL
+    return std::__make_bounded_iter(
+        __wrap_iter<__const_pointer>(__ptr),
+        __wrap_iter<__const_pointer>(__ptr),
+        __wrap_iter<__const_pointer>(__ptr) + (__derived_self.has_value() ? 1 : 0));
+#      else
+    return const_iterator(__ptr);
+#      endif
+  }
+
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr iterator end() noexcept {
+    return begin() + (static_cast<optional<_Tp>&>(*this).has_value() ? 1 : 0);
+  }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const_iterator end() const noexcept {
+    return begin() + (static_cast<const optional<_Tp>&>(*this).has_value() ? 1 : 0);
+  }
+};
+
+template <class _Tp>
+struct __optional_iterator<_Tp&, enable_if_t<is_object_v<_Tp> && !__is_unbounded_array_v<_Tp> >> {
+private:
+  using __pointer _LIBCPP_NODEBUG = add_pointer_t<_Tp>;
+
+public:
+#      ifdef _LIBCPP_ABI_BOUNDED_ITERATORS_IN_OPTIONAL
+  using iterator = __bounded_iter<__wrap_iter<__pointer>>;
+#      else
+  using iterator = __wrap_iter<__pointer>;
+#      endif
+
+  // [optional.ref.iterators], iterator support
+
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto begin() const noexcept {
+    auto& __derived_self = static_cast<const optional<_Tp&>&>(*this);
+    auto* __ptr          = __derived_self.has_value() ? std::addressof(__derived_self.__get()) : nullptr;
+
+#      ifdef _LIBCPP_ABI_BOUNDED_ITERATORS_IN_OPTIONAL
+    return std::__make_bounded_iter(
+        __wrap_iter<__pointer>(__ptr),
+        __wrap_iter<__pointer>(__ptr),
+        __wrap_iter<__pointer>(__ptr) + (__derived_self.has_value() ? 1 : 0));
+#      else
+    return iterator(__ptr);
+#      endif
+  }
+
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto end() const noexcept {
+    return begin() + (static_cast<const optional<_Tp&>&>(*this).has_value() ? 1 : 0);
+  }
+};
+
+#    endif // _LIBCPP_STD_VER >= 26 && _LIBCPP_HAS_EXPERIMENTAL_OPTIONAL_ITERATOR
+
 template <class _Tp>
 class _LIBCPP_DECLSPEC_EMPTY_BASES optional
     : private __optional_move_assign_base<_Tp>,
       private __optional_sfinae_ctor_base_t<_Tp>,
-      private __optional_sfinae_assign_base_t<_Tp> {
+      private __optional_sfinae_assign_base_t<_Tp>,
+      public __optional_iterator<_Tp> {
   using __base _LIBCPP_NODEBUG = __optional_move_assign_base<_Tp>;
 
 public:
-  using value_type = _Tp;
+  using value_type = __libcpp_remove_reference_t<_Tp>;
 
   using __trivially_relocatable _LIBCPP_NODEBUG =
       conditional_t<__libcpp_is_trivially_relocatable<_Tp>::value, optional, void>;
-  using __replaceable _LIBCPP_NODEBUG = conditional_t<__is_replaceable_v<_Tp>, optional, void>;
 
 private:
-  // Disable the reference extension using this static assert.
-  static_assert(!is_same_v<__remove_cvref_t<value_type>, in_place_t>,
+  static_assert(!is_same_v<__remove_cvref_t<_Tp>, in_place_t>,
                 "instantiation of optional with in_place_t is ill-formed");
-  static_assert(!is_same_v<__remove_cvref_t<value_type>, nullopt_t>,
-                "instantiation of optional with nullopt_t is ill-formed");
-  static_assert(!is_reference_v<value_type>, "instantiation of optional with a reference type is ill-formed");
-  static_assert(is_destructible_v<value_type>, "instantiation of optional with a non-destructible type is ill-formed");
-  static_assert(!is_array_v<value_type>, "instantiation of optional with an array type is ill-formed");
+  static_assert(!is_same_v<__remove_cvref_t<_Tp>, nullopt_t>, "instantiation of optional with nullopt_t is ill-formed");
+#    if _LIBCPP_STD_VER >= 26
+  static_assert(!is_rvalue_reference_v<_Tp>, "instantiation of optional with an rvalue reference type is ill-formed");
+#    else
+  static_assert(!is_reference_v<_Tp>, "instantiation of optional with a reference type is ill-formed");
+#    endif
+  static_assert(is_destructible_v<_Tp>, "instantiation of optional with a non-destructible type is ill-formed");
+  static_assert(!is_array_v<_Tp>, "instantiation of optional with an array type is ill-formed");
+
+#    if _LIBCPP_STD_VER >= 26
+  template <class _Up>
+  constexpr static bool __libcpp_opt_ref_ctor_deleted =
+      is_lvalue_reference_v<_Tp> && reference_constructs_from_temporary_v<_Tp, _Up>;
+#    endif
 
   // LWG2756: conditionally explicit conversion from _Up
   struct _CheckOptionalArgsConstructor {
@@ -674,45 +901,131 @@ public:
   _LIBCPP_HIDE_FROM_ABI constexpr optional(optional&&)      = default;
   _LIBCPP_HIDE_FROM_ABI constexpr optional(nullopt_t) noexcept {}
 
-  template <class _InPlaceT,
-            class... _Args,
-            enable_if_t<_And<_IsSame<_InPlaceT, in_place_t>, is_constructible<value_type, _Args...>>::value, int> = 0>
+  template <
+      class _InPlaceT,
+      class... _Args,
+      enable_if_t<_And<_IsSame<_InPlaceT, in_place_t>, __is_constructible_for_optional<_Tp, _Args...>>::value, int> = 0>
   _LIBCPP_HIDE_FROM_ABI constexpr explicit optional(_InPlaceT, _Args&&... __args)
       : __base(in_place, std::forward<_Args>(__args)...) {}
 
   template <class _Up,
             class... _Args,
-            enable_if_t<is_constructible_v<value_type, initializer_list<_Up>&, _Args...>, int> = 0>
+            enable_if_t<__is_constructible_for_optional_initializer_list_v<_Tp, _Up, _Args...>, int> = 0>
   _LIBCPP_HIDE_FROM_ABI constexpr explicit optional(in_place_t, initializer_list<_Up> __il, _Args&&... __args)
       : __base(in_place, __il, std::forward<_Args>(__args)...) {}
 
-  template <class _Up                                                                        = value_type,
-            enable_if_t<_CheckOptionalArgsCtor<_Up>::template __enable_implicit<_Up>(), int> = 0>
-  _LIBCPP_HIDE_FROM_ABI constexpr optional(_Up&& __v) : __base(in_place, std::forward<_Up>(__v)) {}
+  template <class _Up = _Tp, enable_if_t<_CheckOptionalArgsCtor<_Up>::template __enable_implicit<_Up>(), int> = 0>
+  _LIBCPP_HIDE_FROM_ABI constexpr optional(_Up&& __v)
+#    if _LIBCPP_STD_VER >= 26
+      noexcept(is_lvalue_reference_v<_Tp> && is_nothrow_constructible_v<_Tp&, _Up>)
+#    endif
+      : __base(in_place, std::forward<_Up>(__v)) {
+  }
 
-  template <class _Up, enable_if_t<_CheckOptionalArgsCtor<_Up>::template __enable_explicit<_Up>(), int> = 0>
-  _LIBCPP_HIDE_FROM_ABI constexpr explicit optional(_Up&& __v) : __base(in_place, std::forward<_Up>(__v)) {}
+  template <class _Up                                                                        = remove_cv_t<_Tp>,
+            enable_if_t<_CheckOptionalArgsCtor<_Up>::template __enable_explicit<_Up>(), int> = 0>
+  _LIBCPP_HIDE_FROM_ABI constexpr explicit optional(_Up&& __v)
+#    if _LIBCPP_STD_VER >= 26
+      noexcept(is_lvalue_reference_v<_Tp> && is_nothrow_constructible_v<_Tp&, _Up>)
+#    endif
+      : __base(in_place, std::forward<_Up>(__v)) {
+  }
 
   // LWG2756: conditionally explicit conversion from const optional<_Up>&
   template <class _Up, enable_if_t<_CheckOptionalLikeCtor<_Up, _Up const&>::template __enable_implicit<_Up>(), int> = 0>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 optional(const optional<_Up>& __v) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 optional(const optional<_Up>& __v)
+#    if _LIBCPP_STD_VER >= 26
+      noexcept(is_lvalue_reference_v<_Tp> && is_nothrow_constructible_v<_Tp&, _Up&>)
+#    endif
+  {
     this->__construct_from(__v);
   }
   template <class _Up, enable_if_t<_CheckOptionalLikeCtor<_Up, _Up const&>::template __enable_explicit<_Up>(), int> = 0>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 explicit optional(const optional<_Up>& __v) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 explicit optional(const optional<_Up>& __v)
+#    if _LIBCPP_STD_VER >= 26
+      noexcept(is_lvalue_reference_v<_Tp> && is_nothrow_constructible_v<_Tp&, _Up&>)
+#    endif
+  {
     this->__construct_from(__v);
   }
 
   // LWG2756: conditionally explicit conversion from optional<_Up>&&
   template <class _Up, enable_if_t<_CheckOptionalLikeCtor<_Up, _Up&&>::template __enable_implicit<_Up>(), int> = 0>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 optional(optional<_Up>&& __v) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 optional(optional<_Up>&& __v)
+#    if _LIBCPP_STD_VER >= 26
+      noexcept(is_lvalue_reference_v<_Tp> && is_nothrow_constructible_v<_Tp&, _Up>)
+#    endif
+  {
     this->__construct_from(std::move(__v));
   }
   template <class _Up, enable_if_t<_CheckOptionalLikeCtor<_Up, _Up&&>::template __enable_explicit<_Up>(), int> = 0>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 explicit optional(optional<_Up>&& __v) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 explicit optional(optional<_Up>&& __v)
+#    if _LIBCPP_STD_VER >= 26
+      noexcept(is_lvalue_reference_v<_Tp> && is_nothrow_constructible_v<_Tp&, _Up>)
+#    endif
+  {
     this->__construct_from(std::move(__v));
   }
 
+  // deleted optional<T&> constructors and additional optional<T&> constructors
+#    if _LIBCPP_STD_VER >= 26
+  // optional(U&&)
+  template <class _Up = _Tp, enable_if_t<_CheckOptionalArgsCtor<_Up>::template __enable_implicit<_Up>(), int> = 0>
+    requires __libcpp_opt_ref_ctor_deleted<_Up>
+  optional(_Up&&) = delete;
+
+  template <class _Up                                                                        = remove_cv_t<_Tp>,
+            enable_if_t<_CheckOptionalArgsCtor<_Up>::template __enable_explicit<_Up>(), int> = 0>
+    requires __libcpp_opt_ref_ctor_deleted<_Up>
+  explicit optional(_Up&&) = delete;
+
+  // optional(optional<U>& rhs)
+  template <class _Up>
+    requires(!__libcpp_opt_ref_ctor_deleted<_Up>) && (!is_same_v<remove_cvref_t<_Tp>, optional<_Up>>) &&
+            (!is_same_v<_Tp&, _Up>) && is_constructible_v<_Tp&, _Up&>
+  _LIBCPP_HIDE_FROM_ABI constexpr explicit(!is_convertible_v<_Up&, _Tp&>)
+      optional(optional<_Up>& __rhs) noexcept(is_nothrow_constructible_v<_Tp&, _Up&>) {
+    this->__construct_from(__rhs);
+  }
+
+  template <class _Up>
+    requires __libcpp_opt_ref_ctor_deleted<_Up> && (!is_same_v<remove_cvref_t<_Tp>, optional<_Up>>) &&
+                 (!is_same_v<_Tp&, _Up>) && is_constructible_v<_Tp&, _Up&>
+  constexpr explicit optional(optional<_Up>& __rhs) noexcept = delete;
+
+  // optional(const optional<U>&)
+  template <class _Up, enable_if_t<_CheckOptionalLikeCtor<_Up, _Up const&>::template __enable_implicit<_Up>(), int> = 0>
+    requires __libcpp_opt_ref_ctor_deleted<_Up>
+  optional(const optional<_Up>&) = delete;
+
+  template <class _Up, enable_if_t<_CheckOptionalLikeCtor<_Up, _Up const&>::template __enable_explicit<_Up>(), int> = 0>
+    requires __libcpp_opt_ref_ctor_deleted<_Up>
+  explicit optional(const optional<_Up>&) = delete;
+
+  // optional(optional<U>&&)
+  template <class _Up, enable_if_t<_CheckOptionalLikeCtor<_Up, _Up&&>::template __enable_implicit<_Up>(), int> = 0>
+    requires __libcpp_opt_ref_ctor_deleted<_Up>
+  optional(optional<_Up>&&) = delete;
+
+  template <class _Up, enable_if_t<_CheckOptionalLikeCtor<_Up, _Up&&>::template __enable_explicit<_Up>(), int> = 0>
+    requires __libcpp_opt_ref_ctor_deleted<_Up>
+  explicit optional(optional<_Up>&&) = delete;
+
+  // optional(const optional<U>&&)
+  template <class _Up>
+    requires(!__libcpp_opt_ref_ctor_deleted<_Up>) && (!is_same_v<remove_cvref_t<_Tp>, optional<_Up>>) &&
+            (!is_same_v<_Tp&, _Up>) && is_constructible_v<_Tp&, _Up>
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 explicit(!is_convertible_v<const _Up, _Tp&>)
+      optional(const optional<_Up>&& __v) noexcept(is_nothrow_constructible_v<_Tp&, const _Up>) {
+    this->__construct_from(std::move(__v));
+  }
+
+  template <class _Up>
+    requires __libcpp_opt_ref_ctor_deleted<_Up> && (!is_same_v<remove_cvref_t<_Tp>, optional<_Up>>) &&
+                 (!is_same_v<_Tp&, _Up>) && is_constructible_v<_Tp&, _Up>
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 optional(const optional<_Up>&& __v) noexcept = delete;
+#    endif
+
 #    if _LIBCPP_STD_VER >= 23
   template <class _Tag,
             class _Fp,
@@ -731,11 +1044,12 @@ public:
   _LIBCPP_HIDE_FROM_ABI constexpr optional& operator=(optional&&)      = default;
 
   // LWG2756
-  template <class _Up        = value_type,
+  template <class _Up        = remove_cv_t<_Tp>,
             enable_if_t<_And<_IsNotSame<__remove_cvref_t<_Up>, optional>,
-                             _Or<_IsNotSame<__remove_cvref_t<_Up>, value_type>, _Not<is_scalar<value_type>>>,
-                             is_constructible<value_type, _Up>,
-                             is_assignable<value_type&, _Up>>::value,
+                             _Or<_IsNotSame<__remove_cvref_t<_Up>, _Tp>, _Not<is_scalar<_Tp>>>,
+                             is_constructible<_Tp, _Up>,
+                             is_assignable<_Tp&, _Up>,
+                             is_object<_Tp>>::value,
                         int> = 0>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 optional& operator=(_Up&& __v) {
     if (this->has_value())
@@ -759,8 +1073,12 @@ public:
     return *this;
   }
 
-  template <class... _Args, enable_if_t<is_constructible_v<value_type, _Args...>, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Tp& emplace(_Args&&... __args) {
+  template <class... _Args, enable_if_t<__is_constructible_for_optional_v<_Tp, _Args...>, int> = 0>
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Tp& emplace(_Args&&... __args)
+#    if _LIBCPP_STD_VER >= 26
+      noexcept(is_lvalue_reference_v<_Tp> && is_nothrow_constructible_v<_Tp, _Args...>)
+#    endif
+  {
     reset();
     this->__construct(std::forward<_Args>(__args)...);
     return this->__get();
@@ -768,56 +1086,72 @@ public:
 
   template <class _Up,
             class... _Args,
-            enable_if_t<is_constructible_v<value_type, initializer_list<_Up>&, _Args...>, int> = 0>
+            enable_if_t<__is_constructible_for_optional_initializer_list_v<_Tp, _Up, _Args...>, int> = 0>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Tp& emplace(initializer_list<_Up> __il, _Args&&... __args) {
     reset();
     this->__construct(__il, std::forward<_Args>(__args)...);
     return this->__get();
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
-  swap(optional& __opt) noexcept(is_nothrow_move_constructible_v<value_type> && is_nothrow_swappable_v<value_type>) {
-    if (this->has_value() == __opt.has_value()) {
-      using std::swap;
-      if (this->has_value())
-        swap(this->__get(), __opt.__get());
-    } else {
-      if (this->has_value()) {
-        __opt.__construct(std::move(this->__get()));
-        reset();
-      } else {
-        this->__construct(std::move(__opt.__get()));
-        __opt.reset();
-      }
-    }
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void swap(optional& __opt) noexcept(
+      (is_nothrow_move_constructible_v<_Tp> && is_nothrow_swappable_v<_Tp>)
+#    if _LIBCPP_STD_VER >= 26
+      || is_lvalue_reference_v<_Tp>
+#    endif
+  ) {
+    this->__swap(__opt);
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr add_pointer_t<value_type const> operator->() const noexcept {
+  _LIBCPP_HIDE_FROM_ABI constexpr add_pointer_t<_Tp const> operator->() const noexcept
+#    if _LIBCPP_STD_VER >= 26
+    requires(is_object_v<_Tp>)
+#    endif
+  {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(this->has_value(), "optional operator-> called on a disengaged value");
     return std::addressof(this->__get());
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr add_pointer_t<value_type> operator->() noexcept {
+  _LIBCPP_HIDE_FROM_ABI constexpr add_pointer_t<_Tp> operator->() noexcept
+#    if _LIBCPP_STD_VER >= 26
+    requires(is_object_v<_Tp>)
+#    endif
+  {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(this->has_value(), "optional operator-> called on a disengaged value");
     return std::addressof(this->__get());
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr const value_type& operator*() const& noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const _Tp& operator*() const& noexcept
+#    if _LIBCPP_STD_VER >= 26
+    requires(is_object_v<_Tp>)
+#    endif
+  {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(this->has_value(), "optional operator* called on a disengaged value");
     return this->__get();
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr value_type& operator*() & noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp& operator*() & noexcept
+#    if _LIBCPP_STD_VER >= 26
+    requires(is_object_v<_Tp>)
+#    endif
+  {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(this->has_value(), "optional operator* called on a disengaged value");
     return this->__get();
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr value_type&& operator*() && noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp&& operator*() && noexcept
+#    if _LIBCPP_STD_VER >= 26
+    requires(is_object_v<_Tp>)
+#    endif
+  {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(this->has_value(), "optional operator* called on a disengaged value");
     return std::move(this->__get());
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr const value_type&& operator*() const&& noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const _Tp&& operator*() const&& noexcept
+#    if _LIBCPP_STD_VER >= 26
+    requires(is_object_v<_Tp>)
+#    endif
+  {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(this->has_value(), "optional operator* called on a disengaged value");
     return std::move(this->__get());
   }
@@ -827,48 +1161,73 @@ public:
   using __base::__get;
   using __base::has_value;
 
-  _LIBCPP_HIDE_FROM_ABI constexpr value_type const& value() const& {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp const& value() const&
+#    if _LIBCPP_STD_VER >= 26
+    requires(is_object_v<_Tp>)
+#    endif
+  {
     if (!this->has_value())
       std::__throw_bad_optional_access();
     return this->__get();
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr value_type& value() & {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp& value() &
+#    if _LIBCPP_STD_VER >= 26
+    requires(is_object_v<_Tp>)
+#    endif
+  {
     if (!this->has_value())
       std::__throw_bad_optional_access();
     return this->__get();
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr value_type&& value() && {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp&& value() &&
+#    if _LIBCPP_STD_VER >= 26
+    requires(is_object_v<_Tp>)
+#    endif
+  {
     if (!this->has_value())
       std::__throw_bad_optional_access();
     return std::move(this->__get());
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr value_type const&& value() const&& {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp const&& value() const&&
+#    if _LIBCPP_STD_VER >= 26
+    requires(is_object_v<_Tp>)
+#    endif
+  {
     if (!this->has_value())
       std::__throw_bad_optional_access();
     return std::move(this->__get());
   }
 
-  template <class _Up>
-  _LIBCPP_HIDE_FROM_ABI constexpr value_type value_or(_Up&& __v) const& {
-    static_assert(is_copy_constructible_v<value_type>, "optional<T>::value_or: T must be copy constructible");
-    static_assert(is_convertible_v<_Up, value_type>, "optional<T>::value_or: U must be convertible to T");
-    return this->has_value() ? this->__get() : static_cast<value_type>(std::forward<_Up>(__v));
+  template <class _Up = remove_cv_t<_Tp>>
+#    if _LIBCPP_STD_VER >= 26
+    requires(is_object_v<_Tp>)
+#    endif
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp value_or(_Up&& __v) const& {
+    static_assert(is_copy_constructible_v<_Tp>, "optional<T>::value_or: T must be copy constructible");
+    static_assert(is_convertible_v<_Up, _Tp>, "optional<T>::value_or: U must be convertible to T");
+    return this->has_value() ? this->__get() : static_cast<_Tp>(std::forward<_Up>(__v));
   }
 
-  template <class _Up>
-  _LIBCPP_HIDE_FROM_ABI constexpr value_type value_or(_Up&& __v) && {
-    static_assert(is_move_constructible_v<value_type>, "optional<T>::value_or: T must be move constructible");
-    static_assert(is_convertible_v<_Up, value_type>, "optional<T>::value_or: U must be convertible to T");
-    return this->has_value() ? std::move(this->__get()) : static_cast<value_type>(std::forward<_Up>(__v));
+  template <class _Up = remove_cv_t<_Tp>>
+#    if _LIBCPP_STD_VER >= 26
+    requires(is_object_v<_Tp>)
+#    endif
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp value_or(_Up&& __v) && {
+    static_assert(is_move_constructible_v<_Tp>, "optional<T>::value_or: T must be move constructible");
+    static_assert(is_convertible_v<_Up, _Tp>, "optional<T>::value_or: U must be convertible to T");
+    return this->has_value() ? std::move(this->__get()) : static_cast<_Tp>(std::forward<_Up>(__v));
   }
 
 #    if _LIBCPP_STD_VER >= 23
   template <class _Func>
-  _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) & {
-    using _Up = invoke_result_t<_Func, value_type&>;
+#      if _LIBCPP_STD_VER >= 26
+    requires(is_object_v<_Tp>)
+#      endif
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) & {
+    using _Up = invoke_result_t<_Func, _Tp&>;
     static_assert(__is_std_optional<remove_cvref_t<_Up>>::value,
                   "Result of f(value()) must be a specialization of std::optional");
     if (*this)
@@ -877,8 +1236,11 @@ public:
   }
 
   template <class _Func>
-  _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) const& {
-    using _Up = invoke_result_t<_Func, const value_type&>;
+#      if _LIBCPP_STD_VER >= 26
+    requires(is_object_v<_Tp>)
+#      endif
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) const& {
+    using _Up = invoke_result_t<_Func, const _Tp&>;
     static_assert(__is_std_optional<remove_cvref_t<_Up>>::value,
                   "Result of f(value()) must be a specialization of std::optional");
     if (*this)
@@ -887,8 +1249,11 @@ public:
   }
 
   template <class _Func>
-  _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) && {
-    using _Up = invoke_result_t<_Func, value_type&&>;
+#      if _LIBCPP_STD_VER >= 26
+    requires(is_object_v<_Tp>)
+#      endif
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) && {
+    using _Up = invoke_result_t<_Func, _Tp&&>;
     static_assert(__is_std_optional<remove_cvref_t<_Up>>::value,
                   "Result of f(std::move(value())) must be a specialization of std::optional");
     if (*this)
@@ -897,8 +1262,11 @@ public:
   }
 
   template <class _Func>
-  _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) const&& {
-    using _Up = invoke_result_t<_Func, const value_type&&>;
+#      if _LIBCPP_STD_VER >= 26
+    requires(is_object_v<_Tp>)
+#      endif
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) const&& {
+    using _Up = invoke_result_t<_Func, const _Tp&&>;
     static_assert(__is_std_optional<remove_cvref_t<_Up>>::value,
                   "Result of f(std::move(value())) must be a specialization of std::optional");
     if (*this)
@@ -907,8 +1275,11 @@ public:
   }
 
   template <class _Func>
-  _LIBCPP_HIDE_FROM_ABI constexpr auto transform(_Func&& __f) & {
-    using _Up = remove_cv_t<invoke_result_t<_Func, value_type&>>;
+#      if _LIBCPP_STD_VER >= 26
+    requires(is_object_v<_Tp>)
+#      endif
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto transform(_Func&& __f) & {
+    using _Up = remove_cv_t<invoke_result_t<_Func, _Tp&>>;
     static_assert(!is_array_v<_Up>, "Result of f(value()) should not be an Array");
     static_assert(!is_same_v<_Up, in_place_t>, "Result of f(value()) should not be std::in_place_t");
     static_assert(!is_same_v<_Up, nullopt_t>, "Result of f(value()) should not be std::nullopt_t");
@@ -919,8 +1290,11 @@ public:
   }
 
   template <class _Func>
-  _LIBCPP_HIDE_FROM_ABI constexpr auto transform(_Func&& __f) const& {
-    using _Up = remove_cv_t<invoke_result_t<_Func, const value_type&>>;
+#      if _LIBCPP_STD_VER >= 26
+    requires(is_object_v<_Tp>)
+#      endif
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto transform(_Func&& __f) const& {
+    using _Up = remove_cv_t<invoke_result_t<_Func, const _Tp&>>;
     static_assert(!is_array_v<_Up>, "Result of f(value()) should not be an Array");
     static_assert(!is_same_v<_Up, in_place_t>, "Result of f(value()) should not be std::in_place_t");
     static_assert(!is_same_v<_Up, nullopt_t>, "Result of f(value()) should not be std::nullopt_t");
@@ -931,8 +1305,11 @@ public:
   }
 
   template <class _Func>
-  _LIBCPP_HIDE_FROM_ABI constexpr auto transform(_Func&& __f) && {
-    using _Up = remove_cv_t<invoke_result_t<_Func, value_type&&>>;
+#      if _LIBCPP_STD_VER >= 26
+    requires(is_object_v<_Tp>)
+#      endif
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto transform(_Func&& __f) && {
+    using _Up = remove_cv_t<invoke_result_t<_Func, _Tp&&>>;
     static_assert(!is_array_v<_Up>, "Result of f(std::move(value())) should not be an Array");
     static_assert(!is_same_v<_Up, in_place_t>, "Result of f(std::move(value())) should not be std::in_place_t");
     static_assert(!is_same_v<_Up, nullopt_t>, "Result of f(std::move(value())) should not be std::nullopt_t");
@@ -943,8 +1320,11 @@ public:
   }
 
   template <class _Func>
-  _LIBCPP_HIDE_FROM_ABI constexpr auto transform(_Func&& __f) const&& {
-    using _Up = remove_cvref_t<invoke_result_t<_Func, const value_type&&>>;
+#      if _LIBCPP_STD_VER >= 26
+    requires(is_object_v<_Tp>)
+#      endif
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto transform(_Func&& __f) const&& {
+    using _Up = remove_cv_t<invoke_result_t<_Func, const _Tp&&>>;
     static_assert(!is_array_v<_Up>, "Result of f(std::move(value())) should not be an Array");
     static_assert(!is_same_v<_Up, in_place_t>, "Result of f(std::move(value())) should not be std::in_place_t");
     static_assert(!is_same_v<_Up, nullopt_t>, "Result of f(std::move(value())) should not be std::nullopt_t");
@@ -955,8 +1335,11 @@ public:
   }
 
   template <invocable _Func>
-  _LIBCPP_HIDE_FROM_ABI constexpr optional or_else(_Func&& __f) const&
-    requires is_copy_constructible_v<value_type>
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr optional or_else(_Func&& __f) const&
+    requires is_copy_constructible_v<_Tp>
+#      if _LIBCPP_STD_VER >= 26
+             && (is_object_v<_Tp>)
+#      endif
   {
     static_assert(is_same_v<remove_cvref_t<invoke_result_t<_Func>>, optional>,
                   "Result of f() should be the same type as this optional");
@@ -966,8 +1349,11 @@ public:
   }
 
   template <invocable _Func>
-  _LIBCPP_HIDE_FROM_ABI constexpr optional or_else(_Func&& __f) &&
-    requires is_move_constructible_v<value_type>
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr optional or_else(_Func&& __f) &&
+    requires is_move_constructible_v<_Tp>
+#      if _LIBCPP_STD_VER >= 26
+             && (is_object_v<_Tp>)
+#      endif
   {
     static_assert(is_same_v<remove_cvref_t<invoke_result_t<_Func>>, optional>,
                   "Result of f() should be the same type as this optional");
@@ -978,6 +1364,78 @@ public:
 #    endif // _LIBCPP_STD_VER >= 23
 
   using __base::reset;
+
+// optional<T&> overloads
+#    if _LIBCPP_STD_VER >= 26
+
+  _LIBCPP_HIDE_FROM_ABI constexpr add_pointer_t<_Tp> operator->() const noexcept
+    requires(is_lvalue_reference_v<_Tp>)
+  {
+    _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(this->has_value(), "optional operator-> called on a disengaged value");
+    return std::addressof(this->__get());
+  }
+
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp& operator*() const noexcept
+    requires(is_lvalue_reference_v<_Tp>)
+  {
+    _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(this->has_value(), "optional operator* called on a disengaged value");
+    return this->__get();
+  }
+
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp& value() const
+    requires(is_lvalue_reference_v<_Tp>)
+  {
+    if (!this->has_value())
+      std::__throw_bad_optional_access();
+    return this->__get();
+  }
+
+  template <class _Up = remove_cvref_t<_Tp>>
+    requires(is_lvalue_reference_v<_Tp> && is_object_v<__libcpp_remove_reference_t<_Tp>> &&
+             !is_array_v<__libcpp_remove_reference_t<_Tp>>)
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr decay_t<_Tp> value_or(_Up&& __v) const {
+    static_assert(
+        is_constructible_v<remove_cvref_t<_Tp>, _Tp&>, "optional<T&>::value_or: remove_cv_t<T> must be constructible");
+    static_assert(
+        is_convertible_v<_Up, remove_cvref_t<_Tp>>, "optional<T&>::value_or: U must be convertible to remove_cv_t<T>");
+    return this->has_value() ? this->__get() : static_cast<remove_cvref_t<_Tp>>(std::forward<_Up>(__v));
+  }
+
+  template <class _Func>
+    requires(is_lvalue_reference_v<_Tp>)
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) const {
+    using _Up = invoke_result_t<_Func, _Tp&>;
+    static_assert(__is_std_optional<remove_cvref_t<_Up>>::value,
+                  "Result of f(value()) must be a specialization of std::optional");
+    if (*this)
+      return std::invoke(std::forward<_Func>(__f), value());
+    return remove_cvref_t<_Up>();
+  }
+
+  template <class _Func>
+    requires(is_lvalue_reference_v<_Tp>)
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr optional<remove_cv_t<invoke_result_t<_Func, _Tp&>>>
+  transform(_Func&& __f) const {
+    using _Up = remove_cvref_t<invoke_result_t<_Func, _Tp&>>;
+    static_assert(!is_array_v<_Up>, "Result of f(value()) should not be an Array");
+    static_assert(!is_same_v<_Up, in_place_t>, "Result of f(value()) should not be std::in_place_t");
+    static_assert(!is_same_v<_Up, nullopt_t>, "Result of f(value()) should not be std::nullopt_t");
+    static_assert(is_object_v<_Up>, "Result of f(value()) should be an object type");
+    if (*this)
+      return optional<_Up>(__optional_construct_from_invoke_tag{}, std::forward<_Func>(__f), value());
+    return optional<_Up>();
+  }
+
+  template <invocable _Func>
+    requires(is_lvalue_reference_v<_Tp>)
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr optional or_else(_Func&& __f) const {
+    static_assert(is_same_v<remove_cvref_t<invoke_result_t<_Func>>, optional>,
+                  "Result of f() should be the same type as this optional");
+    if (*this)
+      return *this;
+    return std::forward<_Func>(__f)();
+  }
+#    endif
 };
 
 template <class _Tp>
@@ -1154,7 +1612,9 @@ template <
     enable_if_t<__is_core_convertible_v<decltype(std::declval<const _Tp&>() == std::declval<const _Up&>()), bool>,
                 int> = 0>
 _LIBCPP_HIDE_FROM_ABI constexpr bool operator==(const optional<_Tp>& __x, const _Up& __v) {
-  return static_cast<bool>(__x) ? *__x == __v : false;
+  if (__x.has_value())
+    return *__x == __v;
+  return false;
 }
 
 template <
@@ -1163,7 +1623,9 @@ template <
     enable_if_t<__is_core_convertible_v<decltype(std::declval<const _Tp&>() == std::declval<const _Up&>()), bool>,
                 int> = 0>
 _LIBCPP_HIDE_FROM_ABI constexpr bool operator==(const _Tp& __v, const optional<_Up>& __x) {
-  return static_cast<bool>(__x) ? __v == *__x : false;
+  if (__x.has_value())
+    return __v == *__x;
+  return false;
 }
 
 template <
@@ -1172,7 +1634,9 @@ template <
     enable_if_t<__is_core_convertible_v<decltype(std::declval<const _Tp&>() != std::declval<const _Up&>()), bool>,
                 int> = 0>
 _LIBCPP_HIDE_FROM_ABI constexpr bool operator!=(const optional<_Tp>& __x, const _Up& __v) {
-  return static_cast<bool>(__x) ? *__x != __v : true;
+  if (__x.has_value())
+    return *__x != __v;
+  return true;
 }
 
 template <
@@ -1181,7 +1645,9 @@ template <
     enable_if_t<__is_core_convertible_v<decltype(std::declval<const _Tp&>() != std::declval<const _Up&>()), bool>,
                 int> = 0>
 _LIBCPP_HIDE_FROM_ABI constexpr bool operator!=(const _Tp& __v, const optional<_Up>& __x) {
-  return static_cast<bool>(__x) ? __v != *__x : true;
+  if (__x.has_value())
+    return __v != *__x;
+  return true;
 }
 
 template < class _Tp,
@@ -1189,7 +1655,9 @@ template < class _Tp,
            enable_if_t<__is_core_convertible_v<decltype(std::declval<const _Tp&>() < std::declval<const _Up&>()), bool>,
                        int> = 0>
 _LIBCPP_HIDE_FROM_ABI constexpr bool operator<(const optional<_Tp>& __x, const _Up& __v) {
-  return static_cast<bool>(__x) ? *__x < __v : true;
+  if (__x.has_value())
+    return *__x < __v;
+  return true;
 }
 
 template < class _Tp,
@@ -1197,7 +1665,9 @@ template < class _Tp,
            enable_if_t<__is_core_convertible_v<decltype(std::declval<const _Tp&>() < std::declval<const _Up&>()), bool>,
                        int> = 0>
 _LIBCPP_HIDE_FROM_ABI constexpr bool operator<(const _Tp& __v, const optional<_Up>& __x) {
-  return static_cast<bool>(__x) ? __v < *__x : false;
+  if (__x.has_value())
+    return __v < *__x;
+  return false;
 }
 
 template <
@@ -1206,7 +1676,9 @@ template <
     enable_if_t<__is_core_convertible_v<decltype(std::declval<const _Tp&>() <= std::declval<const _Up&>()), bool>,
                 int> = 0>
 _LIBCPP_HIDE_FROM_ABI constexpr bool operator<=(const optional<_Tp>& __x, const _Up& __v) {
-  return static_cast<bool>(__x) ? *__x <= __v : true;
+  if (__x.has_value())
+    return *__x <= __v;
+  return true;
 }
 
 template <
@@ -1215,7 +1687,9 @@ template <
     enable_if_t<__is_core_convertible_v<decltype(std::declval<const _Tp&>() <= std::declval<const _Up&>()), bool>,
                 int> = 0>
 _LIBCPP_HIDE_FROM_ABI constexpr bool operator<=(const _Tp& __v, const optional<_Up>& __x) {
-  return static_cast<bool>(__x) ? __v <= *__x : false;
+  if (__x.has_value())
+    return __v <= *__x;
+  return false;
 }
 
 template < class _Tp,
@@ -1223,7 +1697,9 @@ template < class _Tp,
            enable_if_t<__is_core_convertible_v<decltype(std::declval<const _Tp&>() > std::declval<const _Up&>()), bool>,
                        int> = 0>
 _LIBCPP_HIDE_FROM_ABI constexpr bool operator>(const optional<_Tp>& __x, const _Up& __v) {
-  return static_cast<bool>(__x) ? *__x > __v : false;
+  if (__x.has_value())
+    return *__x > __v;
+  return false;
 }
 
 template < class _Tp,
@@ -1231,7 +1707,9 @@ template < class _Tp,
            enable_if_t<__is_core_convertible_v<decltype(std::declval<const _Tp&>() > std::declval<const _Up&>()), bool>,
                        int> = 0>
 _LIBCPP_HIDE_FROM_ABI constexpr bool operator>(const _Tp& __v, const optional<_Up>& __x) {
-  return static_cast<bool>(__x) ? __v > *__x : true;
+  if (__x.has_value())
+    return __v > *__x;
+  return true;
 }
 
 template <
@@ -1240,7 +1718,9 @@ template <
     enable_if_t<__is_core_convertible_v<decltype(std::declval<const _Tp&>() >= std::declval<const _Up&>()), bool>,
                 int> = 0>
 _LIBCPP_HIDE_FROM_ABI constexpr bool operator>=(const optional<_Tp>& __x, const _Up& __v) {
-  return static_cast<bool>(__x) ? *__x >= __v : false;
+  if (__x.has_value())
+    return *__x >= __v;
+  return false;
 }
 
 template <
@@ -1249,7 +1729,9 @@ template <
     enable_if_t<__is_core_convertible_v<decltype(std::declval<const _Tp&>() >= std::declval<const _Up&>()), bool>,
                 int> = 0>
 _LIBCPP_HIDE_FROM_ABI constexpr bool operator>=(const _Tp& __v, const optional<_Up>& __x) {
-  return static_cast<bool>(__x) ? __v >= *__x : true;
+  if (__x.has_value())
+    return __v >= *__x;
+  return true;
 }
 
 #    if _LIBCPP_STD_VER >= 20
@@ -1264,23 +1746,36 @@ operator<=>(const optional<_Tp>& __x, const _Up& __v) {
 #    endif // _LIBCPP_STD_VER >= 20
 
 template <class _Tp, enable_if_t< is_move_constructible_v<_Tp> && is_swappable_v<_Tp>, int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
-swap(optional<_Tp>& __x, optional<_Tp>& __y) noexcept(noexcept(__x.swap(__y))) {
+inline _LIBCPP_HIDE_FROM_ABI
+_LIBCPP_CONSTEXPR_SINCE_CXX20 void swap(optional<_Tp>& __x, optional<_Tp>& __y) noexcept(noexcept(__x.swap(__y))) {
   __x.swap(__y);
 }
 
-template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI constexpr optional<decay_t<_Tp>> make_optional(_Tp&& __v) {
+struct __make_optional_barrier_tag {
+  explicit __make_optional_barrier_tag() = default;
+};
+
+template <
+#    if _LIBCPP_STD_VER >= 26
+    __make_optional_barrier_tag = __make_optional_barrier_tag{},
+#    endif
+    class _Tp,
+    enable_if_t<is_constructible_v<decay_t<_Tp>, _Tp>, int> = 0>
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr optional<decay_t<_Tp>> make_optional(_Tp&& __v) {
   return optional<decay_t<_Tp>>(std::forward<_Tp>(__v));
 }
 
-template <class _Tp, class... _Args>
-_LIBCPP_HIDE_FROM_ABI constexpr optional<_Tp> make_optional(_Args&&... __args) {
+template <class _Tp, class... _Args, enable_if_t<__is_constructible_for_optional_v<_Tp, _Args...>, int> = 0>
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr optional<_Tp> make_optional(_Args&&... __args) {
   return optional<_Tp>(in_place, std::forward<_Args>(__args)...);
 }
 
-template <class _Tp, class _Up, class... _Args>
-_LIBCPP_HIDE_FROM_ABI constexpr optional<_Tp> make_optional(initializer_list<_Up> __il, _Args&&... __args) {
+template <class _Tp,
+          class _Up,
+          class... _Args,
+          enable_if_t<__is_constructible_for_optional_initializer_list_v<_Tp, _Up, _Args...>, int> = 0>
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr optional<_Tp>
+make_optional(initializer_list<_Up> __il, _Args&&... __args) {
   return optional<_Tp>(in_place, __il, std::forward<_Args>(__args)...);
 }
 
@@ -1291,7 +1786,7 @@ struct hash< __enable_hash_helper<optional<_Tp>, remove_const_t<_Tp>> > {
   _LIBCPP_DEPRECATED_IN_CXX17 typedef size_t result_type;
 #    endif
 
-  _LIBCPP_HIDE_FROM_ABI size_t operator()(const optional<_Tp>& __opt) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI size_t operator()(const optional<_Tp>& __opt) const {
     return static_cast<bool>(__opt) ? hash<remove_const_t<_Tp>>()(*__opt) : 0;
   }
 };
diff --git a/lib/libcxx/include/print b/lib/libcxx/include/print
index be05d30e01..0ff314c22d 100644
--- a/lib/libcxx/include/print
+++ b/lib/libcxx/include/print
@@ -329,7 +329,8 @@ __vprint_unicode([[maybe_unused]] FILE* __stream,
 } // namespace __print
 
 template <class... _Args>
-_LIBCPP_HIDE_FROM_ABI void print(FILE* __stream, format_string<_Args...> __fmt, _Args&&... __args) {
+_LIBCPP_HIDE_FROM_ABI void
+print(FILE* _LIBCPP_DIAGNOSE_NULLPTR __stream, format_string<_Args...> __fmt, _Args&&... __args) {
 #    if _LIBCPP_HAS_UNICODE
   if constexpr (__print::__use_unicode_execution_charset)
     __print::__vprint_unicode(__stream, __fmt.get(), std::make_format_args(__args...), false);
@@ -346,7 +347,8 @@ _LIBCPP_HIDE_FROM_ABI void print(format_string<_Args...> __fmt, _Args&&... __arg
 }
 
 template <class... _Args>
-_LIBCPP_HIDE_FROM_ABI void println(FILE* __stream, format_string<_Args...> __fmt, _Args&&... __args) {
+_LIBCPP_HIDE_FROM_ABI void
+println(FILE* _LIBCPP_DIAGNOSE_NULLPTR __stream, format_string<_Args...> __fmt, _Args&&... __args) {
 #    if _LIBCPP_HAS_UNICODE
   // Note the wording in the Standard is inefficient. The output of
   // std::format is a std::string which is then copied. This solution
@@ -361,7 +363,7 @@ _LIBCPP_HIDE_FROM_ABI void println(FILE* __stream, format_string<_Args...> __fmt
 }
 
 template <class = void> // TODO PRINT template or availability markup fires too eagerly (http://llvm.org/PR61563).
-_LIBCPP_HIDE_FROM_ABI inline void println(FILE* __stream) {
+_LIBCPP_HIDE_FROM_ABI inline void println(FILE* _LIBCPP_DIAGNOSE_NULLPTR __stream) {
   std::print(__stream, "\n");
 }
 
@@ -377,7 +379,8 @@ _LIBCPP_HIDE_FROM_ABI void println(format_string<_Args...> __fmt, _Args&&... __a
 
 #    if _LIBCPP_HAS_UNICODE
 template <class = void> // TODO PRINT template or availability markup fires too eagerly (http://llvm.org/PR61563).
-_LIBCPP_HIDE_FROM_ABI inline void vprint_unicode(FILE* __stream, string_view __fmt, format_args __args) {
+_LIBCPP_HIDE_FROM_ABI inline void
+vprint_unicode(FILE* _LIBCPP_DIAGNOSE_NULLPTR __stream, string_view __fmt, format_args __args) {
   __print::__vprint_unicode(__stream, __fmt, __args, false);
 }
 
@@ -389,7 +392,8 @@ _LIBCPP_HIDE_FROM_ABI inline void vprint_unicode(string_view __fmt, format_args
 #    endif // _LIBCPP_HAS_UNICODE
 
 template <class = void> // TODO PRINT template or availability markup fires too eagerly (http://llvm.org/PR61563).
-_LIBCPP_HIDE_FROM_ABI inline void vprint_nonunicode(FILE* __stream, string_view __fmt, format_args __args) {
+_LIBCPP_HIDE_FROM_ABI inline void
+vprint_nonunicode(FILE* _LIBCPP_DIAGNOSE_NULLPTR __stream, string_view __fmt, format_args __args) {
   __print::__vprint_nonunicode(__stream, __fmt, __args, false);
 }
 
diff --git a/lib/libcxx/include/queue b/lib/libcxx/include/queue
index c33afc892d..a1686bc7c5 100644
--- a/lib/libcxx/include/queue
+++ b/lib/libcxx/include/queue
@@ -376,12 +376,12 @@ public:
 #  endif // _LIBCPP_CXX03_LANG
 
   [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool empty() const { return c.empty(); }
-  _LIBCPP_HIDE_FROM_ABI size_type size() const { return c.size(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type size() const { return c.size(); }
 
-  _LIBCPP_HIDE_FROM_ABI reference front() { return c.front(); }
-  _LIBCPP_HIDE_FROM_ABI const_reference front() const { return c.front(); }
-  _LIBCPP_HIDE_FROM_ABI reference back() { return c.back(); }
-  _LIBCPP_HIDE_FROM_ABI const_reference back() const { return c.back(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI reference front() { return c.front(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_reference front() const { return c.front(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI reference back() { return c.back(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_reference back() const { return c.back(); }
 
   _LIBCPP_HIDE_FROM_ABI void push(const value_type& __v) { c.push_back(__v); }
 #  ifndef _LIBCPP_CXX03_LANG
@@ -437,19 +437,19 @@ public:
 };
 
 #  if _LIBCPP_STD_VER >= 17
-template <class _Container, class = enable_if_t<!__is_allocator<_Container>::value> >
+template <class _Container, class = enable_if_t<!__is_allocator_v<_Container>>>
 queue(_Container) -> queue<typename _Container::value_type, _Container>;
 
 template <class _Container,
           class _Alloc,
-          class = enable_if_t<!__is_allocator<_Container>::value>,
+          class = enable_if_t<!__is_allocator_v<_Container>>,
           class = enable_if_t<uses_allocator<_Container, _Alloc>::value> >
 queue(_Container, _Alloc) -> queue<typename _Container::value_type, _Container>;
 #  endif
 
 #  if _LIBCPP_STD_VER >= 23
 template <class _InputIterator, __enable_if_t<__has_input_iterator_category<_InputIterator>::value, int> = 0>
-queue(_InputIterator, _InputIterator) -> queue<__iter_value_type<_InputIterator>>;
+queue(_InputIterator, _InputIterator) -> queue<__iterator_value_type<_InputIterator>>;
 
 template <ranges::input_range _Range>
 queue(from_range_t, _Range&&) -> queue<ranges::range_value_t<_Range>>;
@@ -457,11 +457,11 @@ queue(from_range_t, _Range&&) -> queue<ranges::range_value_t<_Range>>;
 template <class _InputIterator,
           class _Alloc,
           __enable_if_t<__has_input_iterator_category<_InputIterator>::value, int> = 0,
-          __enable_if_t<__is_allocator<_Alloc>::value, int>                        = 0>
+          __enable_if_t<__is_allocator_v<_Alloc>, int>                             = 0>
 queue(_InputIterator, _InputIterator, _Alloc)
-    -> queue<__iter_value_type<_InputIterator>, deque<__iter_value_type<_InputIterator>, _Alloc>>;
+    -> queue<__iterator_value_type<_InputIterator>, deque<__iterator_value_type<_InputIterator>, _Alloc>>;
 
-template <ranges::input_range _Range, class _Alloc, __enable_if_t<__is_allocator<_Alloc>::value, int> = 0>
+template <ranges::input_range _Range, class _Alloc, __enable_if_t<__is_allocator_v<_Alloc>, int> = 0>
 queue(from_range_t, _Range&&, _Alloc)
     -> queue<ranges::range_value_t<_Range>, deque<ranges::range_value_t<_Range>, _Alloc>>;
 #  endif
@@ -664,8 +664,10 @@ public:
 #  endif
 
   [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI bool empty() const { return c.empty(); }
-  _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI size_type size() const { return c.size(); }
-  _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_reference top() const { return c.front(); }
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI size_type size() const { return c.size(); }
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_reference top() const {
+    return c.front();
+  }
 
   _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void push(const value_type& __v);
 #  ifndef _LIBCPP_CXX03_LANG
@@ -700,45 +702,45 @@ public:
 #  if _LIBCPP_STD_VER >= 17
 template <class _Compare,
           class _Container,
-          class = enable_if_t<!__is_allocator<_Compare>::value>,
-          class = enable_if_t<!__is_allocator<_Container>::value> >
+          class = enable_if_t<!__is_allocator_v<_Compare>>,
+          class = enable_if_t<!__is_allocator_v<_Container>>>
 priority_queue(_Compare, _Container) -> priority_queue<typename _Container::value_type, _Container, _Compare>;
 
 template <class _InputIterator,
-          class _Compare   = less<__iter_value_type<_InputIterator>>,
-          class _Container = vector<__iter_value_type<_InputIterator>>,
+          class _Compare   = less<__iterator_value_type<_InputIterator>>,
+          class _Container = vector<__iterator_value_type<_InputIterator>>,
           class            = enable_if_t<__has_input_iterator_category<_InputIterator>::value>,
-          class            = enable_if_t<!__is_allocator<_Compare>::value>,
-          class            = enable_if_t<!__is_allocator<_Container>::value> >
+          class            = enable_if_t<!__is_allocator_v<_Compare>>,
+          class            = enable_if_t<!__is_allocator_v<_Container>>>
 priority_queue(_InputIterator, _InputIterator, _Compare = _Compare(), _Container = _Container())
-    -> priority_queue<__iter_value_type<_InputIterator>, _Container, _Compare>;
+    -> priority_queue<__iterator_value_type<_InputIterator>, _Container, _Compare>;
 
 template <class _Compare,
           class _Container,
           class _Alloc,
-          class = enable_if_t<!__is_allocator<_Compare>::value>,
-          class = enable_if_t<!__is_allocator<_Container>::value>,
-          class = enable_if_t<uses_allocator<_Container, _Alloc>::value> >
+          class = enable_if_t<!__is_allocator_v<_Compare>>,
+          class = enable_if_t<!__is_allocator_v<_Container>>,
+          class = enable_if_t<uses_allocator<_Container, _Alloc>::value>>
 priority_queue(_Compare, _Container, _Alloc) -> priority_queue<typename _Container::value_type, _Container, _Compare>;
 
 template <class _InputIterator,
           class _Allocator,
           class = enable_if_t<__has_input_iterator_category<_InputIterator>::value>,
-          class = enable_if_t<__is_allocator<_Allocator>::value> >
+          class = enable_if_t<__is_allocator_v<_Allocator>>>
 priority_queue(_InputIterator, _InputIterator, _Allocator)
-    -> priority_queue<__iter_value_type<_InputIterator>,
-                      vector<__iter_value_type<_InputIterator>, _Allocator>,
-                      less<__iter_value_type<_InputIterator>>>;
+    -> priority_queue<__iterator_value_type<_InputIterator>,
+                      vector<__iterator_value_type<_InputIterator>, _Allocator>,
+                      less<__iterator_value_type<_InputIterator>>>;
 
 template <class _InputIterator,
           class _Compare,
           class _Allocator,
           class = enable_if_t<__has_input_iterator_category<_InputIterator>::value>,
-          class = enable_if_t<!__is_allocator<_Compare>::value>,
-          class = enable_if_t<__is_allocator<_Allocator>::value> >
+          class = enable_if_t<!__is_allocator_v<_Compare>>,
+          class = enable_if_t<__is_allocator_v<_Allocator>>>
 priority_queue(_InputIterator, _InputIterator, _Compare, _Allocator)
-    -> priority_queue<__iter_value_type<_InputIterator>,
-                      vector<__iter_value_type<_InputIterator>, _Allocator>,
+    -> priority_queue<__iterator_value_type<_InputIterator>,
+                      vector<__iterator_value_type<_InputIterator>, _Allocator>,
                       _Compare>;
 
 template <class _InputIterator,
@@ -746,8 +748,8 @@ template <class _InputIterator,
           class _Container,
           class _Alloc,
           class = enable_if_t<__has_input_iterator_category<_InputIterator>::value>,
-          class = enable_if_t<!__is_allocator<_Compare>::value>,
-          class = enable_if_t<!__is_allocator<_Container>::value>,
+          class = enable_if_t<!__is_allocator_v<_Compare>>,
+          class = enable_if_t<!__is_allocator_v<_Container>>,
           class = enable_if_t<uses_allocator<_Container, _Alloc>::value> >
 priority_queue(_InputIterator, _InputIterator, _Compare, _Container, _Alloc)
     -> priority_queue<typename _Container::value_type, _Container, _Compare>;
@@ -757,19 +759,19 @@ priority_queue(_InputIterator, _InputIterator, _Compare, _Container, _Alloc)
 
 template <ranges::input_range _Range,
           class _Compare = less<ranges::range_value_t<_Range>>,
-          class          = enable_if_t<!__is_allocator<_Compare>::value>>
+          class          = enable_if_t<!__is_allocator_v<_Compare>>>
 priority_queue(from_range_t, _Range&&, _Compare = _Compare())
     -> priority_queue<ranges::range_value_t<_Range>, vector<ranges::range_value_t<_Range>>, _Compare>;
 
 template <ranges::input_range _Range,
           class _Compare,
           class _Alloc,
-          class = enable_if_t<!__is_allocator<_Compare>::value>,
-          class = enable_if_t<__is_allocator<_Alloc>::value>>
+          class = enable_if_t<!__is_allocator_v<_Compare>>,
+          class = enable_if_t<__is_allocator_v<_Alloc>>>
 priority_queue(from_range_t, _Range&&, _Compare, _Alloc)
     -> priority_queue<ranges::range_value_t<_Range>, vector<ranges::range_value_t<_Range>, _Alloc>, _Compare>;
 
-template <ranges::input_range _Range, class _Alloc, class = enable_if_t<__is_allocator<_Alloc>::value>>
+template <ranges::input_range _Range, class _Alloc, class = enable_if_t<__is_allocator_v<_Alloc>>>
 priority_queue(from_range_t, _Range&&, _Alloc)
     -> priority_queue<ranges::range_value_t<_Range>, vector<ranges::range_value_t<_Range>, _Alloc>>;
 
diff --git a/lib/libcxx/include/ranges b/lib/libcxx/include/ranges
index 2a6321bd2c..9f725b12ac 100644
--- a/lib/libcxx/include/ranges
+++ b/lib/libcxx/include/ranges
@@ -116,6 +116,10 @@ namespace std::ranges {
   // [range.dangling], dangling iterator handling
   struct dangling;
 
+  // [range.elementsof], class template elements_of
+  template<range R, class Allocator = allocator<byte>>
+    struct elements_of;
+
   template<range R>
     using borrowed_iterator_t = see below;
 
@@ -267,6 +271,11 @@ namespace std::ranges {
   template<class W, class Bound>
     inline constexpr bool enable_borrowed_range<iota_view<W, Bound>> = true;
 
+  namespace views {
+    inline constexpr unspecified iota = unspecified;
+    inline constexpr unspecified indices = unspecified; // Since C++26
+  }
+
   // [range.repeat], repeat view
   template<class T>
     concept integer-like-with-usable-difference-type =  // exposition only
@@ -339,6 +348,41 @@ namespace std::ranges {
 
   namespace views { inline constexpr unspecified zip = unspecified; }       // C++23
 
+  // [range.zip.transform], zip transform view
+  template<move_constructible F, input_range... Views>
+    requires (view<Views> && ...) && (sizeof...(Views) > 0) && is_object_v<F> &&
+             regular_invocable<F&, range_reference_t<Views>...> &&
+             can-reference<invoke_result_t<F&, range_reference_t<Views>...>>
+  class zip_transform_view;                                                         // C++23
+
+  namespace views { inline constexpr unspecified zip_transform = unspecified; }     // C++23
+
+  // [range.adjacent], adjacent view
+  template<forward_range V, size_t N>
+    requires view<V> && (N > 0)
+  class adjacent_view;
+
+  template<class V, size_t N>
+    constexpr bool enable_borrowed_range<adjacent_view<V, N>> =
+      enable_borrowed_range<V>;
+
+  namespace views {
+    template<size_t N>
+      constexpr unspecified adjacent = unspecified;
+    inline constexpr auto pairwise = adjacent<2>;
+  }
+
+  // [range.adjacent.transform], adjacent transform view
+  template<forward_range V, move_constructible F, size_t N>
+    requires see below
+  class adjacent_transform_view;
+
+  namespace views {
+    template<size_t N>
+      constexpr unspecified adjacent_transform = unspecified;
+    inline constexpr auto pairwise_transform = adjacent_transform<2>;
+  }
+
   // [range.as.rvalue]
   template <view V>
     requires input_range<V>
@@ -404,6 +448,7 @@ namespace std {
 #    include <__ranges/data.h>
 #    include <__ranges/drop_view.h>
 #    include <__ranges/drop_while_view.h>
+#    include <__ranges/elements_of.h>
 #    include <__ranges/elements_view.h>
 #    include <__ranges/empty.h>
 #    include <__ranges/empty_view.h>
@@ -433,12 +478,15 @@ namespace std {
 #  endif
 
 #  if _LIBCPP_STD_VER >= 23
+#    include <__ranges/adjacent_transform_view.h>
+#    include <__ranges/adjacent_view.h>
 #    include <__ranges/as_rvalue_view.h>
 #    include <__ranges/chunk_by_view.h>
 #    include <__ranges/from_range.h>
 #    include <__ranges/join_with_view.h>
 #    include <__ranges/repeat_view.h>
 #    include <__ranges/to.h>
+#    include <__ranges/zip_transform_view.h>
 #    include <__ranges/zip_view.h>
 #  endif
 
diff --git a/lib/libcxx/include/regex b/lib/libcxx/include/regex
index bbc21e244d..620a75f35d 100644
--- a/lib/libcxx/include/regex
+++ b/lib/libcxx/include/regex
@@ -986,7 +986,7 @@ public:
   explicit regex_error(regex_constants::error_type __ecode);
   _LIBCPP_HIDE_FROM_ABI regex_error(const regex_error&) _NOEXCEPT = default;
   ~regex_error() _NOEXCEPT override;
-  _LIBCPP_HIDE_FROM_ABI regex_constants::error_type code() const { return __code_; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI regex_constants::error_type code() const { return __code_; }
 };
 
 template <regex_constants::error_type _Ev>
@@ -1004,7 +1004,7 @@ public:
   typedef _CharT char_type;
   typedef basic_string<char_type> string_type;
   typedef locale locale_type;
-#    if defined(__BIONIC__) || defined(_NEWLIB_VERSION)
+#    if defined(__BIONIC__) || _LIBCPP_LIBC_NEWLIB
   // Originally bionic's ctype_base used its own ctype masks because the
   // builtin ctype implementation wasn't in libc++ yet. Bionic's ctype mask
   // was only 8 bits wide and already saturated, so it used a wider type here
@@ -1013,9 +1013,7 @@ public:
   // implementation, but this was not updated to match. Since then Android has
   // needed to maintain a stable libc++ ABI, and this can't be changed without
   // an ABI break.
-  // We also need this workaround for newlib since _NEWLIB_VERSION is not
-  // defined yet inside __config, so we can't set the
-  // _LIBCPP_PROVIDES_DEFAULT_RUNE_TABLE macro. Additionally, newlib is
+  // We also need this workaround for newlib since newlib is
   // often used for space constrained environments, so it makes sense not to
   // duplicate the ctype table.
   typedef uint16_t char_class_type;
@@ -2120,7 +2118,7 @@ public:
       __ranges_.push_back(
           std::make_pair(__traits_.transform(__b.begin(), __b.end()), __traits_.transform(__e.begin(), __e.end())));
     } else {
-      if (__b.size() != 1 || __e.size() != 1)
+      if (__b.size() != 1 || __e.size() != 1 || char_traits<typename string_type::value_type>::lt(__e[0], __b[0]))
         std::__throw_regex_error<regex_constants::error_range>();
       if (__icase_) {
         __b[0] = __traits_.translate_nocase(__b[0]);
@@ -2414,8 +2412,8 @@ public:
 #    endif // _LIBCPP_CXX03_LANG
 
   // const operations:
-  _LIBCPP_HIDE_FROM_ABI unsigned mark_count() const { return __marked_count_; }
-  _LIBCPP_HIDE_FROM_ABI flag_type flags() const { return __flags_; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI unsigned mark_count() const { return __marked_count_; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI flag_type flags() const { return __flags_; }
 
   // locale:
   _LIBCPP_HIDE_FROM_ABI locale_type imbue(locale_type __loc) {
@@ -2423,7 +2421,7 @@ public:
     __start_.reset();
     return __traits_.imbue(__loc);
   }
-  _LIBCPP_HIDE_FROM_ABI locale_type getloc() const { return __traits_.getloc(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI locale_type getloc() const { return __traits_.getloc(); }
 
   // swap:
   void swap(basic_regex& __r);
@@ -4208,17 +4206,17 @@ public:
 
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR sub_match() : matched() {}
 
-  _LIBCPP_HIDE_FROM_ABI difference_type length() const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI difference_type length() const {
     return matched ? std::distance(this->first, this->second) : 0;
   }
-  _LIBCPP_HIDE_FROM_ABI string_type str() const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI string_type str() const {
     return matched ? string_type(this->first, this->second) : string_type();
   }
   _LIBCPP_HIDE_FROM_ABI operator string_type() const { return str(); }
 
-  _LIBCPP_HIDE_FROM_ABI int compare(const sub_match& __s) const { return str().compare(__s.str()); }
-  _LIBCPP_HIDE_FROM_ABI int compare(const string_type& __s) const { return str().compare(__s); }
-  _LIBCPP_HIDE_FROM_ABI int compare(const value_type* __s) const { return str().compare(__s); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI int compare(const sub_match& __s) const { return str().compare(__s.str()); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI int compare(const string_type& __s) const { return str().compare(__s); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI int compare(const value_type* __s) const { return str().compare(__s); }
 
   _LIBCPP_HIDE_FROM_ABI void swap(sub_match& __s) _NOEXCEPT_(__is_nothrow_swappable_v<_BidirectionalIterator>) {
     this->pair<_BidirectionalIterator, _BidirectionalIterator>::swap(__s);
@@ -4583,49 +4581,53 @@ public:
   _LIBCPP_HIDE_FROM_ABI bool ready() const { return __ready_; }
 
   // size:
-  _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __matches_.size(); }
-  _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { return __matches_.max_size(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __matches_.size(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { return __matches_.max_size(); }
   [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return size() == 0; }
 
   // element access:
-  _LIBCPP_HIDE_FROM_ABI difference_type length(size_type __sub = 0) const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI difference_type length(size_type __sub = 0) const {
     // If the match results are not ready, this will return `0`.
     _LIBCPP_ASSERT_PEDANTIC(ready(), "match_results::length() called when not ready");
     return (*this)[__sub].length();
   }
-  _LIBCPP_HIDE_FROM_ABI difference_type position(size_type __sub = 0) const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI difference_type position(size_type __sub = 0) const {
     // If the match results are not ready, this will return the result of subtracting two default-constructed iterators
     // (which is typically a well-defined operation).
     _LIBCPP_ASSERT_PEDANTIC(ready(), "match_results::position() called when not ready");
     return std::distance(__position_start_, (*this)[__sub].first);
   }
-  _LIBCPP_HIDE_FROM_ABI string_type str(size_type __sub = 0) const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI string_type str(size_type __sub = 0) const {
     // If the match results are not ready, this will return an empty string.
     _LIBCPP_ASSERT_PEDANTIC(ready(), "match_results::str() called when not ready");
     return (*this)[__sub].str();
   }
-  _LIBCPP_HIDE_FROM_ABI const_reference operator[](size_type __n) const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_reference operator[](size_type __n) const {
     // If the match results are not ready, this call will be equivalent to calling this function with `__n >= size()`,
     // returning an empty subrange.
     _LIBCPP_ASSERT_PEDANTIC(ready(), "match_results::operator[]() called when not ready");
     return __n < __matches_.size() ? __matches_[__n] : __unmatched_;
   }
 
-  _LIBCPP_HIDE_FROM_ABI const_reference prefix() const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_reference prefix() const {
     // If the match results are not ready, this will return a default-constructed empty `__suffix_`.
     _LIBCPP_ASSERT_PEDANTIC(ready(), "match_results::prefix() called when not ready");
     return __prefix_;
   }
-  _LIBCPP_HIDE_FROM_ABI const_reference suffix() const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_reference suffix() const {
     // If the match results are not ready, this will return a default-constructed empty `__suffix_`.
     _LIBCPP_ASSERT_PEDANTIC(ready(), "match_results::suffix() called when not ready");
     return __suffix_;
   }
 
-  _LIBCPP_HIDE_FROM_ABI const_iterator begin() const { return empty() ? __matches_.end() : __matches_.begin(); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator end() const { return __matches_.end(); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator cbegin() const { return empty() ? __matches_.end() : __matches_.begin(); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator cend() const { return __matches_.end(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator begin() const {
+    return empty() ? __matches_.end() : __matches_.begin();
+  }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator end() const { return __matches_.end(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator cbegin() const {
+    return empty() ? __matches_.end() : __matches_.begin();
+  }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator cend() const { return __matches_.end(); }
 
   // format:
   template <class _OutputIter>
@@ -4641,14 +4643,14 @@ public:
     return format(__output_iter, __fmt.data(), __fmt.data() + __fmt.size(), __flags);
   }
   template <class _ST, class _SA>
-  _LIBCPP_HIDE_FROM_ABI basic_string<char_type, _ST, _SA>
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI basic_string<char_type, _ST, _SA>
   format(const basic_string<char_type, _ST, _SA>& __fmt,
          regex_constants::match_flag_type __flags = regex_constants::format_default) const {
     basic_string<char_type, _ST, _SA> __r;
     format(std::back_inserter(__r), __fmt.data(), __fmt.data() + __fmt.size(), __flags);
     return __r;
   }
-  _LIBCPP_HIDE_FROM_ABI string_type
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI string_type
   format(const char_type* __fmt, regex_constants::match_flag_type __flags = regex_constants::format_default) const {
     string_type __r;
     format(std::back_inserter(__r), __fmt, __fmt + char_traits<char_type>::length(__fmt), __flags);
@@ -4656,7 +4658,7 @@ public:
   }
 
   // allocator:
-  _LIBCPP_HIDE_FROM_ABI allocator_type get_allocator() const { return __matches_.get_allocator(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI allocator_type get_allocator() const { return __matches_.get_allocator(); }
 
   // swap:
   void swap(match_results& __m);
@@ -5377,7 +5379,7 @@ public:
   _LIBCPP_HIDE_FROM_ABI bool operator!=(const regex_iterator& __x) const { return !(*this == __x); }
 #    endif
 
-  _LIBCPP_HIDE_FROM_ABI reference operator*() const { return __match_; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI reference operator*() const { return __match_; }
   _LIBCPP_HIDE_FROM_ABI pointer operator->() const { return std::addressof(__match_); }
 
   regex_iterator& operator++();
@@ -5558,7 +5560,7 @@ public:
   _LIBCPP_HIDE_FROM_ABI bool operator!=(const regex_token_iterator& __x) const { return !(*this == __x); }
 #    endif
 
-  _LIBCPP_HIDE_FROM_ABI const value_type& operator*() const { return *__result_; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const value_type& operator*() const { return *__result_; }
   _LIBCPP_HIDE_FROM_ABI const value_type* operator->() const { return __result_; }
 
   regex_token_iterator& operator++();
diff --git a/lib/libcxx/include/scoped_allocator b/lib/libcxx/include/scoped_allocator
index 7b8a9c9739..a469d4afea 100644
--- a/lib/libcxx/include/scoped_allocator
+++ b/lib/libcxx/include/scoped_allocator
@@ -382,13 +382,17 @@ public:
   // scoped_allocator_adaptor& operator=(scoped_allocator_adaptor&&) = default;
   // ~scoped_allocator_adaptor() = default;
 
-  _LIBCPP_HIDE_FROM_ABI inner_allocator_type& inner_allocator() _NOEXCEPT { return _Base::inner_allocator(); }
-  _LIBCPP_HIDE_FROM_ABI const inner_allocator_type& inner_allocator() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI inner_allocator_type& inner_allocator() _NOEXCEPT {
+    return _Base::inner_allocator();
+  }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const inner_allocator_type& inner_allocator() const _NOEXCEPT {
     return _Base::inner_allocator();
   }
 
-  _LIBCPP_HIDE_FROM_ABI outer_allocator_type& outer_allocator() _NOEXCEPT { return _Base::outer_allocator(); }
-  _LIBCPP_HIDE_FROM_ABI const outer_allocator_type& outer_allocator() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI outer_allocator_type& outer_allocator() _NOEXCEPT {
+    return _Base::outer_allocator();
+  }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const outer_allocator_type& outer_allocator() const _NOEXCEPT {
     return _Base::outer_allocator();
   }
 
@@ -403,7 +407,7 @@ public:
     allocator_traits<outer_allocator_type>::deallocate(outer_allocator(), __p, __n);
   }
 
-  _LIBCPP_HIDE_FROM_ABI size_type max_size() const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type max_size() const {
     return allocator_traits<outer_allocator_type>::max_size(outer_allocator());
   }
 
@@ -434,10 +438,10 @@ public:
         piecewise_construct,
         __transform_tuple(typename __uses_alloc_ctor< _T1, inner_allocator_type&, _Args1... >::type(),
                           std::move(__x),
-                          typename __make_tuple_indices<sizeof...(_Args1)>::type{}),
+                          __index_sequence_for<_Args1...>()),
         __transform_tuple(typename __uses_alloc_ctor< _T2, inner_allocator_type&, _Args2... >::type(),
                           std::move(__y),
-                          typename __make_tuple_indices<sizeof...(_Args2)>::type{}));
+                          __index_sequence_for<_Args2...>()));
   }
 
   template <class _T1, class _T2>
@@ -473,7 +477,8 @@ public:
     allocator_traits<typename _OM::type>::destroy(_OM()(outer_allocator()), __p);
   }
 
-  _LIBCPP_HIDE_FROM_ABI scoped_allocator_adaptor select_on_container_copy_construction() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI scoped_allocator_adaptor
+  select_on_container_copy_construction() const _NOEXCEPT {
     return _Base::select_on_container_copy_construction();
   }
 
@@ -503,20 +508,20 @@ private:
 
   template <class... _Args, size_t... _Idx>
   _LIBCPP_HIDE_FROM_ABI tuple<_Args&&...>
-  __transform_tuple(integral_constant<int, 0>, tuple<_Args...>&& __t, __tuple_indices<_Idx...>) {
+  __transform_tuple(integral_constant<int, 0>, tuple<_Args...>&& __t, __index_sequence<_Idx...>) {
     return std::forward_as_tuple(std::get<_Idx>(std::move(__t))...);
   }
 
   template <class... _Args, size_t... _Idx>
   _LIBCPP_HIDE_FROM_ABI tuple<allocator_arg_t, inner_allocator_type&, _Args&&...>
-  __transform_tuple(integral_constant<int, 1>, tuple<_Args...>&& __t, __tuple_indices<_Idx...>) {
+  __transform_tuple(integral_constant<int, 1>, tuple<_Args...>&& __t, __index_sequence<_Idx...>) {
     using _Tup = tuple<allocator_arg_t, inner_allocator_type&, _Args&&...>;
     return _Tup(allocator_arg, inner_allocator(), std::get<_Idx>(std::move(__t))...);
   }
 
   template <class... _Args, size_t... _Idx>
   _LIBCPP_HIDE_FROM_ABI tuple<_Args&&..., inner_allocator_type&>
-  __transform_tuple(integral_constant<int, 2>, tuple<_Args...>&& __t, __tuple_indices<_Idx...>) {
+  __transform_tuple(integral_constant<int, 2>, tuple<_Args...>&& __t, __index_sequence<_Idx...>) {
     using _Tup = tuple<_Args&&..., inner_allocator_type&>;
     return _Tup(std::get<_Idx>(std::move(__t))..., inner_allocator());
   }
diff --git a/lib/libcxx/include/semaphore b/lib/libcxx/include/semaphore
index fb3bcfd209..d411d205cd 100644
--- a/lib/libcxx/include/semaphore
+++ b/lib/libcxx/include/semaphore
@@ -55,12 +55,11 @@ using binary_semaphore = counting_semaphore<1>; // since C++20
 #    include <__assert>
 #    include <__atomic/atomic.h>
 #    include <__atomic/atomic_sync.h>
+#    include <__atomic/atomic_sync_timed.h>
 #    include <__atomic/memory_order.h>
 #    include <__chrono/time_point.h>
 #    include <__cstddef/ptrdiff_t.h>
-#    include <__thread/poll_with_backoff.h>
 #    include <__thread/support.h>
-#    include <__thread/timed_backoff_policy.h>
 #    include <limits>
 #    include <version>
 
@@ -90,7 +89,7 @@ class __atomic_semaphore_base {
 
 public:
   _LIBCPP_HIDE_FROM_ABI constexpr explicit __atomic_semaphore_base(ptrdiff_t __count) : __a_(__count) {}
-  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void release(ptrdiff_t __update = 1) {
+  _LIBCPP_HIDE_FROM_ABI void release(ptrdiff_t __update = 1) {
     auto __old = __a_.fetch_add(__update, memory_order_release);
     _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
         __update <= _LIBCPP_SEMAPHORE_MAX - __old, "update is greater than the expected value");
@@ -98,26 +97,26 @@ public:
       __a_.notify_all();
     }
   }
-  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void acquire() {
+  _LIBCPP_HIDE_FROM_ABI void acquire() {
     std::__atomic_wait_unless(__a_, memory_order_relaxed, [this](ptrdiff_t& __old) {
       return __try_acquire_impl(__old);
     });
   }
   template <class _Rep, class _Period>
-  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI bool
-  try_acquire_for(chrono::duration<_Rep, _Period> const& __rel_time) {
+  _LIBCPP_HIDE_FROM_ABI bool try_acquire_for(chrono::duration<_Rep, _Period> const& __rel_time) {
     if (__rel_time == chrono::duration<_Rep, _Period>::zero())
       return try_acquire();
-    auto const __poll_fn = [this]() { return try_acquire(); };
-    return std::__libcpp_thread_poll_with_backoff(__poll_fn, __libcpp_timed_backoff_policy(), __rel_time);
+
+    return std::__atomic_wait_unless_with_timeout(
+        __a_, memory_order_relaxed, [this](ptrdiff_t& __old) { return __try_acquire_impl(__old); }, __rel_time);
   }
-  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI bool try_acquire() {
+  _LIBCPP_HIDE_FROM_ABI bool try_acquire() {
     auto __old = __a_.load(memory_order_relaxed);
     return __try_acquire_impl(__old);
   }
 
 private:
-  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI bool __try_acquire_impl(ptrdiff_t& __old) {
+  _LIBCPP_HIDE_FROM_ABI bool __try_acquire_impl(ptrdiff_t& __old) {
     while (true) {
       if (__old == 0)
         return false;
@@ -134,7 +133,7 @@ class counting_semaphore {
 public:
   static_assert(__least_max_value >= 0, "The least maximum value must be a positive number");
 
-  static constexpr ptrdiff_t max() noexcept { return __least_max_value; }
+  [[nodiscard]] static constexpr ptrdiff_t max() noexcept { return __least_max_value; }
 
   _LIBCPP_HIDE_FROM_ABI constexpr explicit counting_semaphore(ptrdiff_t __count) : __semaphore_(__count) {
     _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
@@ -151,20 +150,18 @@ public:
   counting_semaphore(const counting_semaphore&)            = delete;
   counting_semaphore& operator=(const counting_semaphore&) = delete;
 
-  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void release(ptrdiff_t __update = 1) {
+  _LIBCPP_HIDE_FROM_ABI void release(ptrdiff_t __update = 1) {
     _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(__update >= 0, "counting_semaphore:release called with a negative value");
     __semaphore_.release(__update);
   }
-  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void acquire() { __semaphore_.acquire(); }
+  _LIBCPP_HIDE_FROM_ABI void acquire() { __semaphore_.acquire(); }
   template <class _Rep, class _Period>
-  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI bool
-  try_acquire_for(chrono::duration<_Rep, _Period> const& __rel_time) {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool try_acquire_for(chrono::duration<_Rep, _Period> const& __rel_time) {
     return __semaphore_.try_acquire_for(chrono::duration_cast<chrono::nanoseconds>(__rel_time));
   }
-  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI bool try_acquire() { return __semaphore_.try_acquire(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool try_acquire() { return __semaphore_.try_acquire(); }
   template <class _Clock, class _Duration>
-  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI bool
-  try_acquire_until(chrono::time_point<_Clock, _Duration> const& __abs_time) {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool try_acquire_until(chrono::time_point<_Clock, _Duration> const& __abs_time) {
     auto const __current = _Clock::now();
     if (__current >= __abs_time)
       return try_acquire();
diff --git a/lib/libcxx/include/set b/lib/libcxx/include/set
index 1f2fd7fc91..265c2f6cb6 100644
--- a/lib/libcxx/include/set
+++ b/lib/libcxx/include/set
@@ -518,19 +518,19 @@ erase_if(multiset<Key, Compare, Allocator>& c, Predicate pred);  // C++20
 #  include <__algorithm/equal.h>
 #  include <__algorithm/lexicographical_compare.h>
 #  include <__algorithm/lexicographical_compare_three_way.h>
+#  include <__algorithm/specialized_algorithms.h>
 #  include <__assert>
 #  include <__config>
 #  include <__functional/is_transparent.h>
 #  include <__functional/operations.h>
-#  include <__fwd/set.h>
 #  include <__iterator/erase_if_container.h>
 #  include <__iterator/iterator_traits.h>
-#  include <__iterator/ranges_iterator_traits.h>
 #  include <__iterator/reverse_iterator.h>
 #  include <__memory/allocator.h>
 #  include <__memory/allocator_traits.h>
 #  include <__memory_resource/polymorphic_allocator.h>
 #  include <__node_handle>
+#  include <__ranges/access.h>
 #  include <__ranges/concepts.h>
 #  include <__ranges/container_compatible_range.h>
 #  include <__ranges/from_range.h>
@@ -538,7 +538,6 @@ erase_if(multiset<Key, Compare, Allocator>& c, Predicate pred);  // C++20
 #  include <__type_traits/container_traits.h>
 #  include <__type_traits/enable_if.h>
 #  include <__type_traits/is_allocator.h>
-#  include <__type_traits/is_nothrow_assignable.h>
 #  include <__type_traits/is_nothrow_constructible.h>
 #  include <__type_traits/is_same.h>
 #  include <__type_traits/is_swappable.h>
@@ -570,7 +569,10 @@ _LIBCPP_PUSH_MACROS
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-template <class _Key, class _Compare, class _Allocator>
+template <class _Key, class _Compare = less<_Key>, class _Allocator = allocator<_Key> >
+class multiset;
+
+template <class _Key, class _Compare = less<_Key>, class _Allocator = allocator<_Key> >
 class set {
 public:
   // types:
@@ -660,22 +662,20 @@ public:
       : set(from_range, std::forward<_Range>(__range), key_compare(), __a) {}
 #  endif
 
-  _LIBCPP_HIDE_FROM_ABI set(const set& __s) : __tree_(__s.__tree_) { insert(__s.begin(), __s.end()); }
+  _LIBCPP_HIDE_FROM_ABI set(const set& __s) = default;
 
   _LIBCPP_HIDE_FROM_ABI set& operator=(const set& __s) = default;
 
 #  ifndef _LIBCPP_CXX03_LANG
-  _LIBCPP_HIDE_FROM_ABI set(set&& __s) noexcept(is_nothrow_move_constructible<__base>::value) = default;
+  _LIBCPP_HIDE_FROM_ABI set(set&& __s) = default;
 #  endif // _LIBCPP_CXX03_LANG
 
   _LIBCPP_HIDE_FROM_ABI explicit set(const allocator_type& __a) : __tree_(__a) {}
 
-  _LIBCPP_HIDE_FROM_ABI set(const set& __s, const allocator_type& __a) : __tree_(__s.__tree_.value_comp(), __a) {
-    insert(__s.begin(), __s.end());
-  }
+  _LIBCPP_HIDE_FROM_ABI set(const set& __s, const allocator_type& __alloc) : __tree_(__s.__tree_, __alloc) {}
 
 #  ifndef _LIBCPP_CXX03_LANG
-  _LIBCPP_HIDE_FROM_ABI set(set&& __s, const allocator_type& __a);
+  _LIBCPP_HIDE_FROM_ABI set(set&& __s, const allocator_type& __alloc) : __tree_(std::move(__s.__tree_), __alloc) {}
 
   _LIBCPP_HIDE_FROM_ABI set(initializer_list<value_type> __il, const value_compare& __comp = value_compare())
       : __tree_(__comp) {
@@ -693,36 +693,38 @@ public:
 #    endif
 
   _LIBCPP_HIDE_FROM_ABI set& operator=(initializer_list<value_type> __il) {
-    __tree_.__assign_unique(__il.begin(), __il.end());
+    clear();
+    insert(__il.begin(), __il.end());
     return *this;
   }
 
-  _LIBCPP_HIDE_FROM_ABI set& operator=(set&& __s) noexcept(is_nothrow_move_assignable<__base>::value) {
-    __tree_ = std::move(__s.__tree_);
-    return *this;
-  }
+  _LIBCPP_HIDE_FROM_ABI set& operator=(set&& __s) = default;
 #  endif // _LIBCPP_CXX03_LANG
 
   _LIBCPP_HIDE_FROM_ABI ~set() { static_assert(sizeof(std::__diagnose_non_const_comparator<_Key, _Compare>()), ""); }
 
-  _LIBCPP_HIDE_FROM_ABI iterator begin() _NOEXCEPT { return __tree_.begin(); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator begin() const _NOEXCEPT { return __tree_.begin(); }
-  _LIBCPP_HIDE_FROM_ABI iterator end() _NOEXCEPT { return __tree_.end(); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator end() const _NOEXCEPT { return __tree_.end(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator begin() _NOEXCEPT { return __tree_.begin(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator begin() const _NOEXCEPT { return __tree_.begin(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator end() _NOEXCEPT { return __tree_.end(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator end() const _NOEXCEPT { return __tree_.end(); }
 
-  _LIBCPP_HIDE_FROM_ABI reverse_iterator rbegin() _NOEXCEPT { return reverse_iterator(end()); }
-  _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rbegin() const _NOEXCEPT { return const_reverse_iterator(end()); }
-  _LIBCPP_HIDE_FROM_ABI reverse_iterator rend() _NOEXCEPT { return reverse_iterator(begin()); }
-  _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rend() const _NOEXCEPT { return const_reverse_iterator(begin()); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI reverse_iterator rbegin() _NOEXCEPT { return reverse_iterator(end()); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rbegin() const _NOEXCEPT {
+    return const_reverse_iterator(end());
+  }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI reverse_iterator rend() _NOEXCEPT { return reverse_iterator(begin()); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rend() const _NOEXCEPT {
+    return const_reverse_iterator(begin());
+  }
 
-  _LIBCPP_HIDE_FROM_ABI const_iterator cbegin() const _NOEXCEPT { return begin(); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator cend() const _NOEXCEPT { return end(); }
-  _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crbegin() const _NOEXCEPT { return rbegin(); }
-  _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crend() const _NOEXCEPT { return rend(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator cbegin() const _NOEXCEPT { return begin(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator cend() const _NOEXCEPT { return end(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crbegin() const _NOEXCEPT { return rbegin(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crend() const _NOEXCEPT { return rend(); }
 
   [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __tree_.size() == 0; }
-  _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __tree_.size(); }
-  _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { return __tree_.max_size(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __tree_.size(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { return __tree_.max_size(); }
 
   // modifiers:
 #  ifndef _LIBCPP_CXX03_LANG
@@ -732,28 +734,24 @@ public:
   }
   template <class... _Args>
   _LIBCPP_HIDE_FROM_ABI iterator emplace_hint(const_iterator __p, _Args&&... __args) {
-    return __tree_.__emplace_hint_unique(__p, std::forward<_Args>(__args)...);
+    return __tree_.__emplace_hint_unique(__p, std::forward<_Args>(__args)...).first;
   }
 #  endif // _LIBCPP_CXX03_LANG
 
   _LIBCPP_HIDE_FROM_ABI pair<iterator, bool> insert(const value_type& __v) { return __tree_.__emplace_unique(__v); }
   _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __p, const value_type& __v) {
-    return __tree_.__emplace_hint_unique(__p, __v);
+    return __tree_.__emplace_hint_unique(__p, __v).first;
   }
 
   template <class _InputIterator>
-  _LIBCPP_HIDE_FROM_ABI void insert(_InputIterator __f, _InputIterator __l) {
-    for (const_iterator __e = cend(); __f != __l; ++__f)
-      __tree_.__emplace_hint_unique(__e, *__f);
+  _LIBCPP_HIDE_FROM_ABI void insert(_InputIterator __first, _InputIterator __last) {
+    __tree_.__insert_range_unique(__first, __last);
   }
 
 #  if _LIBCPP_STD_VER >= 23
   template <_ContainerCompatibleRange<value_type> _Range>
   _LIBCPP_HIDE_FROM_ABI void insert_range(_Range&& __range) {
-    const_iterator __end = cend();
-    for (auto&& __element : __range) {
-      __tree_.__emplace_hint_unique(__end, std::forward<decltype(__element)>(__element));
-    }
+    __tree_.__insert_range_unique(ranges::begin(__range), ranges::end(__range));
   }
 #  endif
 
@@ -763,7 +761,7 @@ public:
   }
 
   _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __p, value_type&& __v) {
-    return __tree_.__emplace_hint_unique(__p, std::move(__v));
+    return __tree_.__emplace_hint_unique(__p, std::move(__v)).first;
   }
 
   _LIBCPP_HIDE_FROM_ABI void insert(initializer_list<value_type> __il) { insert(__il.begin(), __il.end()); }
@@ -785,10 +783,10 @@ public:
                                         "node_type with incompatible allocator passed to set::insert()");
     return __tree_.template __node_handle_insert_unique<node_type>(__hint, std::move(__nh));
   }
-  _LIBCPP_HIDE_FROM_ABI node_type extract(key_type const& __key) {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI node_type extract(key_type const& __key) {
     return __tree_.template __node_handle_extract<node_type>(__key);
   }
-  _LIBCPP_HIDE_FROM_ABI node_type extract(const_iterator __it) {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI node_type extract(const_iterator __it) {
     return __tree_.template __node_handle_extract<node_type>(__it);
   }
   template <class _Compare2>
@@ -819,101 +817,120 @@ public:
 
   _LIBCPP_HIDE_FROM_ABI void swap(set& __s) _NOEXCEPT_(__is_nothrow_swappable_v<__base>) { __tree_.swap(__s.__tree_); }
 
-  _LIBCPP_HIDE_FROM_ABI allocator_type get_allocator() const _NOEXCEPT { return __tree_.__alloc(); }
-  _LIBCPP_HIDE_FROM_ABI key_compare key_comp() const { return __tree_.value_comp(); }
-  _LIBCPP_HIDE_FROM_ABI value_compare value_comp() const { return __tree_.value_comp(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI allocator_type get_allocator() const _NOEXCEPT { return __tree_.__alloc(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI key_compare key_comp() const { return __tree_.value_comp(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI value_compare value_comp() const { return __tree_.value_comp(); }
 
   // set operations:
-  _LIBCPP_HIDE_FROM_ABI iterator find(const key_type& __k) { return __tree_.find(__k); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator find(const key_type& __k) const { return __tree_.find(__k); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator find(const key_type& __k) { return __tree_.find(__k); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator find(const key_type& __k) const { return __tree_.find(__k); }
 #  if _LIBCPP_STD_VER >= 14
   template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI iterator find(const _K2& __k) {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator find(const _K2& __k) {
     return __tree_.find(__k);
   }
   template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI const_iterator find(const _K2& __k) const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator find(const _K2& __k) const {
     return __tree_.find(__k);
   }
 #  endif
 
-  _LIBCPP_HIDE_FROM_ABI size_type count(const key_type& __k) const { return __tree_.__count_unique(__k); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type count(const key_type& __k) const {
+    return __tree_.__count_unique(__k);
+  }
 #  if _LIBCPP_STD_VER >= 14
   template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI size_type count(const _K2& __k) const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type count(const _K2& __k) const {
     return __tree_.__count_multi(__k);
   }
 #  endif
 
 #  if _LIBCPP_STD_VER >= 20
-  _LIBCPP_HIDE_FROM_ABI bool contains(const key_type& __k) const { return find(__k) != end(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool contains(const key_type& __k) const { return find(__k) != end(); }
   template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI bool contains(const _K2& __k) const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool contains(const _K2& __k) const {
     return find(__k) != end();
   }
 #  endif // _LIBCPP_STD_VER >= 20
 
-  _LIBCPP_HIDE_FROM_ABI iterator lower_bound(const key_type& __k) { return __tree_.lower_bound(__k); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const key_type& __k) const { return __tree_.lower_bound(__k); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator lower_bound(const key_type& __k) {
+    return __tree_.__lower_bound_unique(__k);
+  }
+
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const key_type& __k) const {
+    return __tree_.__lower_bound_unique(__k);
+  }
+
+  // The transparent versions of the lookup functions use the _multi version, since a non-element key is allowed to
+  // match multiple elements.
 #  if _LIBCPP_STD_VER >= 14
   template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI iterator lower_bound(const _K2& __k) {
-    return __tree_.lower_bound(__k);
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator lower_bound(const _K2& __k) {
+    return __tree_.__lower_bound_multi(__k);
   }
 
   template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const _K2& __k) const {
-    return __tree_.lower_bound(__k);
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const _K2& __k) const {
+    return __tree_.__lower_bound_multi(__k);
   }
 #  endif
 
-  _LIBCPP_HIDE_FROM_ABI iterator upper_bound(const key_type& __k) { return __tree_.upper_bound(__k); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const key_type& __k) const { return __tree_.upper_bound(__k); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator upper_bound(const key_type& __k) {
+    return __tree_.__upper_bound_unique(__k);
+  }
+
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const key_type& __k) const {
+    return __tree_.__upper_bound_unique(__k);
+  }
+
 #  if _LIBCPP_STD_VER >= 14
   template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI iterator upper_bound(const _K2& __k) {
-    return __tree_.upper_bound(__k);
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator upper_bound(const _K2& __k) {
+    return __tree_.__upper_bound_multi(__k);
   }
   template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const _K2& __k) const {
-    return __tree_.upper_bound(__k);
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const _K2& __k) const {
+    return __tree_.__upper_bound_multi(__k);
   }
 #  endif
 
-  _LIBCPP_HIDE_FROM_ABI pair<iterator, iterator> equal_range(const key_type& __k) {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI pair<iterator, iterator> equal_range(const key_type& __k) {
     return __tree_.__equal_range_unique(__k);
   }
-  _LIBCPP_HIDE_FROM_ABI pair<const_iterator, const_iterator> equal_range(const key_type& __k) const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI pair<const_iterator, const_iterator> equal_range(const key_type& __k) const {
     return __tree_.__equal_range_unique(__k);
   }
 #  if _LIBCPP_STD_VER >= 14
   template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI pair<iterator, iterator> equal_range(const _K2& __k) {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI pair<iterator, iterator> equal_range(const _K2& __k) {
     return __tree_.__equal_range_multi(__k);
   }
   template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI pair<const_iterator, const_iterator> equal_range(const _K2& __k) const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI pair<const_iterator, const_iterator> equal_range(const _K2& __k) const {
     return __tree_.__equal_range_multi(__k);
   }
 #  endif
+
+  template <class, class...>
+  friend struct __specialized_algorithm;
 };
 
 #  if _LIBCPP_STD_VER >= 17
 template <class _InputIterator,
-          class _Compare   = less<__iter_value_type<_InputIterator>>,
-          class _Allocator = allocator<__iter_value_type<_InputIterator>>,
+          class _Compare   = less<__iterator_value_type<_InputIterator>>,
+          class _Allocator = allocator<__iterator_value_type<_InputIterator>>,
           class            = enable_if_t<__has_input_iterator_category<_InputIterator>::value, void>,
-          class            = enable_if_t<__is_allocator<_Allocator>::value, void>,
-          class            = enable_if_t<!__is_allocator<_Compare>::value, void>>
+          class            = enable_if_t<__is_allocator_v<_Allocator>>,
+          class            = enable_if_t<!__is_allocator_v<_Compare>>>
 set(_InputIterator, _InputIterator, _Compare = _Compare(), _Allocator = _Allocator())
-    -> set<__iter_value_type<_InputIterator>, _Compare, _Allocator>;
+    -> set<__iterator_value_type<_InputIterator>, _Compare, _Allocator>;
 
 #    if _LIBCPP_STD_VER >= 23
 template <ranges::input_range _Range,
           class _Compare   = less<ranges::range_value_t<_Range>>,
           class _Allocator = allocator<ranges::range_value_t<_Range>>,
-          class            = enable_if_t<__is_allocator<_Allocator>::value, void>,
-          class            = enable_if_t<!__is_allocator<_Compare>::value, void>>
+          class            = enable_if_t<__is_allocator_v<_Allocator>>,
+          class            = enable_if_t<!__is_allocator_v<_Compare>>>
 set(from_range_t, _Range&&, _Compare = _Compare(), _Allocator = _Allocator())
     -> set<ranges::range_value_t<_Range>, _Compare, _Allocator>;
 #    endif
@@ -921,41 +938,43 @@ set(from_range_t, _Range&&, _Compare = _Compare(), _Allocator = _Allocator())
 template <class _Key,
           class _Compare   = less<_Key>,
           class _Allocator = allocator<_Key>,
-          class            = enable_if_t<!__is_allocator<_Compare>::value, void>,
-          class            = enable_if_t<__is_allocator<_Allocator>::value, void>>
+          class            = enable_if_t<!__is_allocator_v<_Compare>>,
+          class            = enable_if_t<__is_allocator_v<_Allocator>>>
 set(initializer_list<_Key>, _Compare = _Compare(), _Allocator = _Allocator()) -> set<_Key, _Compare, _Allocator>;
 
 template <class _InputIterator,
           class _Allocator,
           class = enable_if_t<__has_input_iterator_category<_InputIterator>::value, void>,
-          class = enable_if_t<__is_allocator<_Allocator>::value, void>>
-set(_InputIterator,
-    _InputIterator,
-    _Allocator) -> set<__iter_value_type<_InputIterator>, less<__iter_value_type<_InputIterator>>, _Allocator>;
+          class = enable_if_t<__is_allocator_v<_Allocator>>>
+set(_InputIterator, _InputIterator, _Allocator)
+    -> set<__iterator_value_type<_InputIterator>, less<__iterator_value_type<_InputIterator>>, _Allocator>;
 
 #    if _LIBCPP_STD_VER >= 23
-template <ranges::input_range _Range, class _Allocator, class = enable_if_t<__is_allocator<_Allocator>::value, void>>
-set(from_range_t,
-    _Range&&,
-    _Allocator) -> set<ranges::range_value_t<_Range>, less<ranges::range_value_t<_Range>>, _Allocator>;
+template <ranges::input_range _Range, class _Allocator, class = enable_if_t<__is_allocator_v<_Allocator>>>
+set(from_range_t, _Range&&, _Allocator)
+    -> set<ranges::range_value_t<_Range>, less<ranges::range_value_t<_Range>>, _Allocator>;
 #    endif
 
-template <class _Key, class _Allocator, class = enable_if_t<__is_allocator<_Allocator>::value, void>>
+template <class _Key, class _Allocator, class = enable_if_t<__is_allocator_v<_Allocator>>>
 set(initializer_list<_Key>, _Allocator) -> set<_Key, less<_Key>, _Allocator>;
 #  endif
 
-#  ifndef _LIBCPP_CXX03_LANG
+#  if _LIBCPP_STD_VER >= 14
+template <class _Alg, class _Key, class _Compare, class _Allocator>
+struct __specialized_algorithm<_Alg, __single_range<set<_Key, _Compare, _Allocator>>> {
+  using __set _LIBCPP_NODEBUG = set<_Key, _Compare, _Allocator>;
 
-template <class _Key, class _Compare, class _Allocator>
-set<_Key, _Compare, _Allocator>::set(set&& __s, const allocator_type& __a) : __tree_(std::move(__s.__tree_), __a) {
-  if (__a != __s.get_allocator()) {
-    const_iterator __e = cend();
-    while (!__s.empty())
-      insert(__e, std::move(__s.__tree_.remove(__s.begin())->__value_));
+  static const bool __has_algorithm =
+      __specialized_algorithm<_Alg, __single_range<typename __set::__base>>::__has_algorithm;
+
+  // set's begin() and end() are identical with and without const qualification
+  template <class... _Args>
+  _LIBCPP_HIDE_FROM_ABI static auto operator()(const __set& __set, _Args&&... __args) {
+    return __specialized_algorithm<_Alg, __single_range<typename __set::__base>>()(
+        __set.__tree_, std::forward<_Args>(__args)...);
   }
-}
-
-#  endif // _LIBCPP_CXX03_LANG
+};
+#  endif
 
 template <class _Key, class _Compare, class _Allocator>
 inline _LIBCPP_HIDE_FROM_ABI bool
@@ -1119,24 +1138,17 @@ public:
       : multiset(from_range, std::forward<_Range>(__range), key_compare(), __a) {}
 #  endif
 
-  _LIBCPP_HIDE_FROM_ABI multiset(const multiset& __s)
-      : __tree_(__s.__tree_.value_comp(),
-                __alloc_traits::select_on_container_copy_construction(__s.__tree_.__alloc())) {
-    insert(__s.begin(), __s.end());
-  }
+  _LIBCPP_HIDE_FROM_ABI multiset(const multiset& __s) = default;
 
   _LIBCPP_HIDE_FROM_ABI multiset& operator=(const multiset& __s) = default;
 
 #  ifndef _LIBCPP_CXX03_LANG
-  _LIBCPP_HIDE_FROM_ABI multiset(multiset&& __s) noexcept(is_nothrow_move_constructible<__base>::value) = default;
+  _LIBCPP_HIDE_FROM_ABI multiset(multiset&& __s) = default;
 
-  _LIBCPP_HIDE_FROM_ABI multiset(multiset&& __s, const allocator_type& __a);
+  _LIBCPP_HIDE_FROM_ABI multiset(multiset&& __s, const allocator_type& __a) : __tree_(std::move(__s.__tree_), __a) {}
 #  endif // _LIBCPP_CXX03_LANG
   _LIBCPP_HIDE_FROM_ABI explicit multiset(const allocator_type& __a) : __tree_(__a) {}
-  _LIBCPP_HIDE_FROM_ABI multiset(const multiset& __s, const allocator_type& __a)
-      : __tree_(__s.__tree_.value_comp(), __a) {
-    insert(__s.begin(), __s.end());
-  }
+  _LIBCPP_HIDE_FROM_ABI multiset(const multiset& __s, const allocator_type& __a) : __tree_(__s.__tree_, __a) {}
 
 #  ifndef _LIBCPP_CXX03_LANG
   _LIBCPP_HIDE_FROM_ABI multiset(initializer_list<value_type> __il, const value_compare& __comp = value_compare())
@@ -1156,38 +1168,40 @@ public:
 #    endif
 
   _LIBCPP_HIDE_FROM_ABI multiset& operator=(initializer_list<value_type> __il) {
-    __tree_.__assign_multi(__il.begin(), __il.end());
+    clear();
+    insert(__il.begin(), __il.end());
     return *this;
   }
 
-  _LIBCPP_HIDE_FROM_ABI multiset& operator=(multiset&& __s) _NOEXCEPT_(is_nothrow_move_assignable<__base>::value) {
-    __tree_ = std::move(__s.__tree_);
-    return *this;
-  }
+  _LIBCPP_HIDE_FROM_ABI multiset& operator=(multiset&& __s) = default;
 #  endif // _LIBCPP_CXX03_LANG
 
   _LIBCPP_HIDE_FROM_ABI ~multiset() {
     static_assert(sizeof(std::__diagnose_non_const_comparator<_Key, _Compare>()), "");
   }
 
-  _LIBCPP_HIDE_FROM_ABI iterator begin() _NOEXCEPT { return __tree_.begin(); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator begin() const _NOEXCEPT { return __tree_.begin(); }
-  _LIBCPP_HIDE_FROM_ABI iterator end() _NOEXCEPT { return __tree_.end(); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator end() const _NOEXCEPT { return __tree_.end(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator begin() _NOEXCEPT { return __tree_.begin(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator begin() const _NOEXCEPT { return __tree_.begin(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator end() _NOEXCEPT { return __tree_.end(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator end() const _NOEXCEPT { return __tree_.end(); }
 
-  _LIBCPP_HIDE_FROM_ABI reverse_iterator rbegin() _NOEXCEPT { return reverse_iterator(end()); }
-  _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rbegin() const _NOEXCEPT { return const_reverse_iterator(end()); }
-  _LIBCPP_HIDE_FROM_ABI reverse_iterator rend() _NOEXCEPT { return reverse_iterator(begin()); }
-  _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rend() const _NOEXCEPT { return const_reverse_iterator(begin()); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI reverse_iterator rbegin() _NOEXCEPT { return reverse_iterator(end()); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rbegin() const _NOEXCEPT {
+    return const_reverse_iterator(end());
+  }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI reverse_iterator rend() _NOEXCEPT { return reverse_iterator(begin()); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rend() const _NOEXCEPT {
+    return const_reverse_iterator(begin());
+  }
 
-  _LIBCPP_HIDE_FROM_ABI const_iterator cbegin() const _NOEXCEPT { return begin(); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator cend() const _NOEXCEPT { return end(); }
-  _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crbegin() const _NOEXCEPT { return rbegin(); }
-  _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crend() const _NOEXCEPT { return rend(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator cbegin() const _NOEXCEPT { return begin(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator cend() const _NOEXCEPT { return end(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crbegin() const _NOEXCEPT { return rbegin(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crend() const _NOEXCEPT { return rend(); }
 
   [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __tree_.size() == 0; }
-  _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __tree_.size(); }
-  _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { return __tree_.max_size(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __tree_.size(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { return __tree_.max_size(); }
 
   // modifiers:
 #  ifndef _LIBCPP_CXX03_LANG
@@ -1207,18 +1221,14 @@ public:
   }
 
   template <class _InputIterator>
-  _LIBCPP_HIDE_FROM_ABI void insert(_InputIterator __f, _InputIterator __l) {
-    for (const_iterator __e = cend(); __f != __l; ++__f)
-      __tree_.__emplace_hint_multi(__e, *__f);
+  _LIBCPP_HIDE_FROM_ABI void insert(_InputIterator __first, _InputIterator __last) {
+    __tree_.__insert_range_multi(__first, __last);
   }
 
 #  if _LIBCPP_STD_VER >= 23
   template <_ContainerCompatibleRange<value_type> _Range>
   _LIBCPP_HIDE_FROM_ABI void insert_range(_Range&& __range) {
-    const_iterator __end = cend();
-    for (auto&& __element : __range) {
-      __tree_.__emplace_hint_multi(__end, std::forward<decltype(__element)>(__element));
-    }
+    __tree_.__insert_range_multi(ranges::begin(__range), ranges::end(__range));
   }
 #  endif
 
@@ -1248,10 +1258,10 @@ public:
                                         "node_type with incompatible allocator passed to multiset::insert()");
     return __tree_.template __node_handle_insert_multi<node_type>(__hint, std::move(__nh));
   }
-  _LIBCPP_HIDE_FROM_ABI node_type extract(key_type const& __key) {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI node_type extract(key_type const& __key) {
     return __tree_.template __node_handle_extract<node_type>(__key);
   }
-  _LIBCPP_HIDE_FROM_ABI node_type extract(const_iterator __it) {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI node_type extract(const_iterator __it) {
     return __tree_.template __node_handle_extract<node_type>(__it);
   }
   template <class _Compare2>
@@ -1284,101 +1294,118 @@ public:
     __tree_.swap(__s.__tree_);
   }
 
-  _LIBCPP_HIDE_FROM_ABI allocator_type get_allocator() const _NOEXCEPT { return __tree_.__alloc(); }
-  _LIBCPP_HIDE_FROM_ABI key_compare key_comp() const { return __tree_.value_comp(); }
-  _LIBCPP_HIDE_FROM_ABI value_compare value_comp() const { return __tree_.value_comp(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI allocator_type get_allocator() const _NOEXCEPT { return __tree_.__alloc(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI key_compare key_comp() const { return __tree_.value_comp(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI value_compare value_comp() const { return __tree_.value_comp(); }
 
   // set operations:
-  _LIBCPP_HIDE_FROM_ABI iterator find(const key_type& __k) { return __tree_.find(__k); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator find(const key_type& __k) const { return __tree_.find(__k); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator find(const key_type& __k) { return __tree_.find(__k); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator find(const key_type& __k) const { return __tree_.find(__k); }
 #  if _LIBCPP_STD_VER >= 14
   template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI iterator find(const _K2& __k) {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator find(const _K2& __k) {
     return __tree_.find(__k);
   }
   template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI const_iterator find(const _K2& __k) const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator find(const _K2& __k) const {
     return __tree_.find(__k);
   }
 #  endif
 
-  _LIBCPP_HIDE_FROM_ABI size_type count(const key_type& __k) const { return __tree_.__count_multi(__k); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type count(const key_type& __k) const {
+    return __tree_.__count_multi(__k);
+  }
 #  if _LIBCPP_STD_VER >= 14
   template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI size_type count(const _K2& __k) const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type count(const _K2& __k) const {
     return __tree_.__count_multi(__k);
   }
 #  endif
 
 #  if _LIBCPP_STD_VER >= 20
-  _LIBCPP_HIDE_FROM_ABI bool contains(const key_type& __k) const { return find(__k) != end(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool contains(const key_type& __k) const { return find(__k) != end(); }
   template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI bool contains(const _K2& __k) const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool contains(const _K2& __k) const {
     return find(__k) != end();
   }
 #  endif // _LIBCPP_STD_VER >= 20
 
-  _LIBCPP_HIDE_FROM_ABI iterator lower_bound(const key_type& __k) { return __tree_.lower_bound(__k); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const key_type& __k) const { return __tree_.lower_bound(__k); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator lower_bound(const key_type& __k) {
+    return __tree_.__lower_bound_multi(__k);
+  }
+
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const key_type& __k) const {
+    return __tree_.__lower_bound_multi(__k);
+  }
+
 #  if _LIBCPP_STD_VER >= 14
   template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI iterator lower_bound(const _K2& __k) {
-    return __tree_.lower_bound(__k);
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator lower_bound(const _K2& __k) {
+    return __tree_.__lower_bound_multi(__k);
   }
 
   template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const _K2& __k) const {
-    return __tree_.lower_bound(__k);
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const _K2& __k) const {
+    return __tree_.__lower_bound_multi(__k);
   }
 #  endif
 
-  _LIBCPP_HIDE_FROM_ABI iterator upper_bound(const key_type& __k) { return __tree_.upper_bound(__k); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const key_type& __k) const { return __tree_.upper_bound(__k); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator upper_bound(const key_type& __k) {
+    return __tree_.__upper_bound_multi(__k);
+  }
+
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const key_type& __k) const {
+    return __tree_.__upper_bound_multi(__k);
+  }
+
 #  if _LIBCPP_STD_VER >= 14
   template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI iterator upper_bound(const _K2& __k) {
-    return __tree_.upper_bound(__k);
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator upper_bound(const _K2& __k) {
+    return __tree_.__upper_bound_multi(__k);
   }
   template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const _K2& __k) const {
-    return __tree_.upper_bound(__k);
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const _K2& __k) const {
+    return __tree_.__upper_bound_multi(__k);
   }
 #  endif
 
-  _LIBCPP_HIDE_FROM_ABI pair<iterator, iterator> equal_range(const key_type& __k) {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI pair<iterator, iterator> equal_range(const key_type& __k) {
     return __tree_.__equal_range_multi(__k);
   }
-  _LIBCPP_HIDE_FROM_ABI pair<const_iterator, const_iterator> equal_range(const key_type& __k) const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI pair<const_iterator, const_iterator> equal_range(const key_type& __k) const {
     return __tree_.__equal_range_multi(__k);
   }
 #  if _LIBCPP_STD_VER >= 14
   template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI pair<iterator, iterator> equal_range(const _K2& __k) {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI pair<iterator, iterator> equal_range(const _K2& __k) {
     return __tree_.__equal_range_multi(__k);
   }
   template <typename _K2, enable_if_t<__is_transparent_v<_Compare, _K2>, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI pair<const_iterator, const_iterator> equal_range(const _K2& __k) const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI pair<const_iterator, const_iterator> equal_range(const _K2& __k) const {
     return __tree_.__equal_range_multi(__k);
   }
 #  endif
+
+  template <class, class...>
+  friend struct __specialized_algorithm;
 };
 
 #  if _LIBCPP_STD_VER >= 17
 template <class _InputIterator,
-          class _Compare   = less<__iter_value_type<_InputIterator>>,
-          class _Allocator = allocator<__iter_value_type<_InputIterator>>,
+          class _Compare   = less<__iterator_value_type<_InputIterator>>,
+          class _Allocator = allocator<__iterator_value_type<_InputIterator>>,
           class            = enable_if_t<__has_input_iterator_category<_InputIterator>::value, void>,
-          class            = enable_if_t<__is_allocator<_Allocator>::value, void>,
-          class            = enable_if_t<!__is_allocator<_Compare>::value, void>>
+          class            = enable_if_t<__is_allocator_v<_Allocator>>,
+          class            = enable_if_t<!__is_allocator_v<_Compare>>>
 multiset(_InputIterator, _InputIterator, _Compare = _Compare(), _Allocator = _Allocator())
-    -> multiset<__iter_value_type<_InputIterator>, _Compare, _Allocator>;
+    -> multiset<__iterator_value_type<_InputIterator>, _Compare, _Allocator>;
 
 #    if _LIBCPP_STD_VER >= 23
 template <ranges::input_range _Range,
           class _Compare   = less<ranges::range_value_t<_Range>>,
           class _Allocator = allocator<ranges::range_value_t<_Range>>,
-          class            = enable_if_t<__is_allocator<_Allocator>::value, void>,
-          class            = enable_if_t<!__is_allocator<_Compare>::value, void>>
+          class            = enable_if_t<__is_allocator_v<_Allocator>>,
+          class            = enable_if_t<!__is_allocator_v<_Compare>>>
 multiset(from_range_t, _Range&&, _Compare = _Compare(), _Allocator = _Allocator())
     -> multiset<ranges::range_value_t<_Range>, _Compare, _Allocator>;
 #    endif
@@ -1386,43 +1413,44 @@ multiset(from_range_t, _Range&&, _Compare = _Compare(), _Allocator = _Allocator(
 template <class _Key,
           class _Compare   = less<_Key>,
           class _Allocator = allocator<_Key>,
-          class            = enable_if_t<__is_allocator<_Allocator>::value, void>,
-          class            = enable_if_t<!__is_allocator<_Compare>::value, void>>
-multiset(initializer_list<_Key>,
-         _Compare   = _Compare(),
-         _Allocator = _Allocator()) -> multiset<_Key, _Compare, _Allocator>;
+          class            = enable_if_t<__is_allocator_v<_Allocator>>,
+          class            = enable_if_t<!__is_allocator_v<_Compare>>>
+multiset(initializer_list<_Key>, _Compare = _Compare(), _Allocator = _Allocator())
+    -> multiset<_Key, _Compare, _Allocator>;
 
 template <class _InputIterator,
           class _Allocator,
           class = enable_if_t<__has_input_iterator_category<_InputIterator>::value, void>,
-          class = enable_if_t<__is_allocator<_Allocator>::value, void>>
+          class = enable_if_t<__is_allocator_v<_Allocator>>>
 multiset(_InputIterator, _InputIterator, _Allocator)
-    -> multiset<__iter_value_type<_InputIterator>, less<__iter_value_type<_InputIterator>>, _Allocator>;
+    -> multiset<__iterator_value_type<_InputIterator>, less<__iterator_value_type<_InputIterator>>, _Allocator>;
 
 #    if _LIBCPP_STD_VER >= 23
-template <ranges::input_range _Range, class _Allocator, class = enable_if_t<__is_allocator<_Allocator>::value, void>>
-multiset(from_range_t,
-         _Range&&,
-         _Allocator) -> multiset<ranges::range_value_t<_Range>, less<ranges::range_value_t<_Range>>, _Allocator>;
+template <ranges::input_range _Range, class _Allocator, class = enable_if_t<__is_allocator_v<_Allocator>>>
+multiset(from_range_t, _Range&&, _Allocator)
+    -> multiset<ranges::range_value_t<_Range>, less<ranges::range_value_t<_Range>>, _Allocator>;
 #    endif
 
-template <class _Key, class _Allocator, class = enable_if_t<__is_allocator<_Allocator>::value, void>>
+template <class _Key, class _Allocator, class = enable_if_t<__is_allocator_v<_Allocator>>>
 multiset(initializer_list<_Key>, _Allocator) -> multiset<_Key, less<_Key>, _Allocator>;
 #  endif
 
-#  ifndef _LIBCPP_CXX03_LANG
+#  if _LIBCPP_STD_VER >= 14
+template <class _Alg, class _Key, class _Compare, class _Allocator>
+struct __specialized_algorithm<_Alg, __single_range<multiset<_Key, _Compare, _Allocator>>> {
+  using __set _LIBCPP_NODEBUG = multiset<_Key, _Compare, _Allocator>;
 
-template <class _Key, class _Compare, class _Allocator>
-multiset<_Key, _Compare, _Allocator>::multiset(multiset&& __s, const allocator_type& __a)
-    : __tree_(std::move(__s.__tree_), __a) {
-  if (__a != __s.get_allocator()) {
-    const_iterator __e = cend();
-    while (!__s.empty())
-      insert(__e, std::move(__s.__tree_.remove(__s.begin())->__value_));
+  static const bool __has_algorithm =
+      __specialized_algorithm<_Alg, __single_range<typename __set::__base>>::__has_algorithm;
+
+  // set's begin() and end() are identical with and without const qualification
+  template <class... _Args>
+  _LIBCPP_HIDE_FROM_ABI static auto operator()(const __set& __set, _Args&&... __args) {
+    return __specialized_algorithm<_Alg, __single_range<typename __set::__base>>()(
+        __set.__tree_, std::forward<_Args>(__args)...);
   }
-}
-
-#  endif // _LIBCPP_CXX03_LANG
+};
+#  endif
 
 template <class _Key, class _Compare, class _Allocator>
 inline _LIBCPP_HIDE_FROM_ABI bool
diff --git a/lib/libcxx/include/shared_mutex b/lib/libcxx/include/shared_mutex
index 8c02e348e4..028bbf5650 100644
--- a/lib/libcxx/include/shared_mutex
+++ b/lib/libcxx/include/shared_mutex
@@ -138,6 +138,7 @@ template <class Mutex>
 #    include <__mutex/tag_types.h>
 #    include <__mutex/unique_lock.h>
 #    include <__system_error/throw_system_error.h>
+#    include <__utility/move.h>
 #    include <__utility/swap.h>
 #    include <cerrno>
 #    include <version>
@@ -340,14 +341,8 @@ public:
   }
 
   _LIBCPP_HIDE_FROM_ABI shared_lock& operator=(shared_lock&& __u) _NOEXCEPT {
-    if (__owns_)
-      __m_->unlock_shared();
-    __m_        = nullptr;
-    __owns_     = false;
-    __m_        = __u.__m_;
-    __owns_     = __u.__owns_;
-    __u.__m_    = nullptr;
-    __u.__owns_ = false;
+    if (this != std::addressof(__u))
+      shared_lock(std::move(__u)).swap(*this);
     return *this;
   }
 
diff --git a/lib/libcxx/include/span b/lib/libcxx/include/span
index 3d4f9e4ba7..1911badd88 100644
--- a/lib/libcxx/include/span
+++ b/lib/libcxx/include/span
@@ -310,30 +310,32 @@ public:
   }
 
   template <size_t _Count>
-  _LIBCPP_HIDE_FROM_ABI constexpr span<element_type, _Count> first() const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr span<element_type, _Count> first() const noexcept {
     static_assert(_Count <= _Extent, "span<T, N>::first<Count>(): Count out of range");
     return span<element_type, _Count>{data(), _Count};
   }
 
   template <size_t _Count>
-  _LIBCPP_HIDE_FROM_ABI constexpr span<element_type, _Count> last() const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr span<element_type, _Count> last() const noexcept {
     static_assert(_Count <= _Extent, "span<T, N>::last<Count>(): Count out of range");
     return span<element_type, _Count>{data() + size() - _Count, _Count};
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr span<element_type, dynamic_extent> first(size_type __count) const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr span<element_type, dynamic_extent>
+  first(size_type __count) const noexcept {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(__count <= size(), "span<T, N>::first(count): count out of range");
     return {data(), __count};
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr span<element_type, dynamic_extent> last(size_type __count) const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr span<element_type, dynamic_extent>
+  last(size_type __count) const noexcept {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(__count <= size(), "span<T, N>::last(count): count out of range");
     return {data() + size() - __count, __count};
   }
 
   template <size_t _Offset, size_t _Count = dynamic_extent>
-  _LIBCPP_HIDE_FROM_ABI constexpr auto
-  subspan() const noexcept -> span<element_type, _Count != dynamic_extent ? _Count : _Extent - _Offset> {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto subspan() const noexcept
+      -> span<element_type, _Count != dynamic_extent ? _Count : _Extent - _Offset> {
     static_assert(_Offset <= _Extent, "span<T, N>::subspan<Offset, Count>(): Offset out of range");
     static_assert(_Count == dynamic_extent || _Count <= _Extent - _Offset,
                   "span<T, N>::subspan<Offset, Count>(): Offset + Count out of range");
@@ -342,7 +344,7 @@ public:
     return _ReturnType{data() + _Offset, _Count == dynamic_extent ? size() - _Offset : _Count};
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr span<element_type, dynamic_extent>
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr span<element_type, dynamic_extent>
   subspan(size_type __offset, size_type __count = dynamic_extent) const noexcept {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(__offset <= size(), "span<T, N>::subspan(offset, count): offset out of range");
     if (__count == dynamic_extent)
@@ -352,52 +354,58 @@ public:
     return {data() + __offset, __count};
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr size_type size() const noexcept { return _Extent; }
-  _LIBCPP_HIDE_FROM_ABI constexpr size_type size_bytes() const noexcept { return _Extent * sizeof(element_type); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr size_type size() const noexcept { return _Extent; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr size_type size_bytes() const noexcept {
+    return _Extent * sizeof(element_type);
+  }
   [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool empty() const noexcept { return _Extent == 0; }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr reference operator[](size_type __idx) const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr reference operator[](size_type __idx) const noexcept {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(__idx < size(), "span<T, N>::operator[](index): index out of range");
     return __data_[__idx];
   }
 
 #    if _LIBCPP_STD_VER >= 26
-  _LIBCPP_HIDE_FROM_ABI constexpr reference at(size_type __index) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr reference at(size_type __index) const {
     if (__index >= size())
       std::__throw_out_of_range("span");
     return __data_[__index];
   }
 #    endif
 
-  _LIBCPP_HIDE_FROM_ABI constexpr reference front() const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr reference front() const noexcept {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(!empty(), "span<T, N>::front() on empty span");
     return __data_[0];
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr reference back() const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr reference back() const noexcept {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(!empty(), "span<T, N>::back() on empty span");
     return __data_[size() - 1];
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr pointer data() const noexcept { return __data_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr pointer data() const noexcept { return __data_; }
 
   // [span.iter], span iterator support
-  _LIBCPP_HIDE_FROM_ABI constexpr iterator begin() const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr iterator begin() const noexcept {
 #    ifdef _LIBCPP_ABI_BOUNDED_ITERATORS
     return std::__make_bounded_iter(data(), data(), data() + size());
 #    else
     return iterator(data());
 #    endif
   }
-  _LIBCPP_HIDE_FROM_ABI constexpr iterator end() const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr iterator end() const noexcept {
 #    ifdef _LIBCPP_ABI_BOUNDED_ITERATORS
     return std::__make_bounded_iter(data() + size(), data(), data() + size());
 #    else
     return iterator(data() + size());
 #    endif
   }
-  _LIBCPP_HIDE_FROM_ABI constexpr reverse_iterator rbegin() const noexcept { return reverse_iterator(end()); }
-  _LIBCPP_HIDE_FROM_ABI constexpr reverse_iterator rend() const noexcept { return reverse_iterator(begin()); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr reverse_iterator rbegin() const noexcept {
+    return reverse_iterator(end());
+  }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr reverse_iterator rend() const noexcept {
+    return reverse_iterator(begin());
+  }
 
   _LIBCPP_HIDE_FROM_ABI span<const byte, _Extent * sizeof(element_type)> __as_bytes() const noexcept {
     return span<const byte, _Extent * sizeof(element_type)>{reinterpret_cast<const byte*>(data()), size_bytes()};
@@ -478,36 +486,38 @@ public:
       : __data_{__other.data()}, __size_{__other.size()} {}
 
   template <size_t _Count>
-  _LIBCPP_HIDE_FROM_ABI constexpr span<element_type, _Count> first() const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr span<element_type, _Count> first() const noexcept {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(_Count <= size(), "span<T>::first<Count>(): Count out of range");
     return span<element_type, _Count>{data(), _Count};
   }
 
   template <size_t _Count>
-  _LIBCPP_HIDE_FROM_ABI constexpr span<element_type, _Count> last() const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr span<element_type, _Count> last() const noexcept {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(_Count <= size(), "span<T>::last<Count>(): Count out of range");
     return span<element_type, _Count>{data() + size() - _Count, _Count};
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr span<element_type, dynamic_extent> first(size_type __count) const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr span<element_type, dynamic_extent>
+  first(size_type __count) const noexcept {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(__count <= size(), "span<T>::first(count): count out of range");
     return {data(), __count};
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr span<element_type, dynamic_extent> last(size_type __count) const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr span<element_type, dynamic_extent>
+  last(size_type __count) const noexcept {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(__count <= size(), "span<T>::last(count): count out of range");
     return {data() + size() - __count, __count};
   }
 
   template <size_t _Offset, size_t _Count = dynamic_extent>
-  _LIBCPP_HIDE_FROM_ABI constexpr span<element_type, _Count> subspan() const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr span<element_type, _Count> subspan() const noexcept {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(_Offset <= size(), "span<T>::subspan<Offset, Count>(): Offset out of range");
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(_Count == dynamic_extent || _Count <= size() - _Offset,
                                         "span<T>::subspan<Offset, Count>(): Offset + Count out of range");
     return span<element_type, _Count>{data() + _Offset, _Count == dynamic_extent ? size() - _Offset : _Count};
   }
 
-  constexpr span<element_type, dynamic_extent> _LIBCPP_HIDE_FROM_ABI
+  [[nodiscard]] constexpr span<element_type, dynamic_extent> _LIBCPP_HIDE_FROM_ABI
   subspan(size_type __offset, size_type __count = dynamic_extent) const noexcept {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(__offset <= size(), "span<T>::subspan(offset, count): offset out of range");
     if (__count == dynamic_extent)
@@ -517,52 +527,58 @@ public:
     return {data() + __offset, __count};
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr size_type size() const noexcept { return __size_; }
-  _LIBCPP_HIDE_FROM_ABI constexpr size_type size_bytes() const noexcept { return __size_ * sizeof(element_type); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr size_type size() const noexcept { return __size_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr size_type size_bytes() const noexcept {
+    return __size_ * sizeof(element_type);
+  }
   [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool empty() const noexcept { return __size_ == 0; }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr reference operator[](size_type __idx) const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr reference operator[](size_type __idx) const noexcept {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(__idx < size(), "span<T>::operator[](index): index out of range");
     return __data_[__idx];
   }
 
 #    if _LIBCPP_STD_VER >= 26
-  _LIBCPP_HIDE_FROM_ABI constexpr reference at(size_type __index) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr reference at(size_type __index) const {
     if (__index >= size())
       std::__throw_out_of_range("span");
     return __data_[__index];
   }
 #    endif
 
-  _LIBCPP_HIDE_FROM_ABI constexpr reference front() const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr reference front() const noexcept {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(!empty(), "span<T>::front() on empty span");
     return __data_[0];
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr reference back() const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr reference back() const noexcept {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(!empty(), "span<T>::back() on empty span");
     return __data_[size() - 1];
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr pointer data() const noexcept { return __data_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr pointer data() const noexcept { return __data_; }
 
   // [span.iter], span iterator support
-  _LIBCPP_HIDE_FROM_ABI constexpr iterator begin() const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr iterator begin() const noexcept {
 #    ifdef _LIBCPP_ABI_BOUNDED_ITERATORS
     return std::__make_bounded_iter(data(), data(), data() + size());
 #    else
     return iterator(data());
 #    endif
   }
-  _LIBCPP_HIDE_FROM_ABI constexpr iterator end() const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr iterator end() const noexcept {
 #    ifdef _LIBCPP_ABI_BOUNDED_ITERATORS
     return std::__make_bounded_iter(data() + size(), data(), data() + size());
 #    else
     return iterator(data() + size());
 #    endif
   }
-  _LIBCPP_HIDE_FROM_ABI constexpr reverse_iterator rbegin() const noexcept { return reverse_iterator(end()); }
-  _LIBCPP_HIDE_FROM_ABI constexpr reverse_iterator rend() const noexcept { return reverse_iterator(begin()); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr reverse_iterator rbegin() const noexcept {
+    return reverse_iterator(end());
+  }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr reverse_iterator rend() const noexcept {
+    return reverse_iterator(begin());
+  }
 
   _LIBCPP_HIDE_FROM_ABI span<const byte, dynamic_extent> __as_bytes() const noexcept {
     return {reinterpret_cast<const byte*>(data()), size_bytes()};
@@ -585,13 +601,13 @@ inline constexpr bool ranges::enable_view<span<_ElementType, _Extent>> = true;
 
 //  as_bytes & as_writable_bytes
 template <class _Tp, size_t _Extent>
-_LIBCPP_HIDE_FROM_ABI auto as_bytes(span<_Tp, _Extent> __s) noexcept {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI auto as_bytes(span<_Tp, _Extent> __s) noexcept {
   return __s.__as_bytes();
 }
 
 template <class _Tp, size_t _Extent>
   requires(!is_const_v<_Tp>)
-_LIBCPP_HIDE_FROM_ABI auto as_writable_bytes(span<_Tp, _Extent> __s) noexcept {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI auto as_writable_bytes(span<_Tp, _Extent> __s) noexcept {
   return __s.__as_writable_bytes();
 }
 
diff --git a/lib/libcxx/include/sstream b/lib/libcxx/include/sstream
index 682a28fd4d..a42e8fbc9b 100644
--- a/lib/libcxx/include/sstream
+++ b/lib/libcxx/include/sstream
@@ -461,15 +461,15 @@ public:
   // [stringbuf.members] Member functions:
 
 #    if _LIBCPP_STD_VER >= 20
-  _LIBCPP_HIDE_FROM_ABI allocator_type get_allocator() const noexcept { return __str_.get_allocator(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI allocator_type get_allocator() const noexcept { return __str_.get_allocator(); }
 #    endif
 
 #    if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_BUILDING_LIBRARY)
-  string_type str() const;
+  [[__nodiscard__]] string_type str() const;
 #    else
-  _LIBCPP_HIDE_FROM_ABI string_type str() const& { return str(__str_.get_allocator()); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI string_type str() const& { return str(__str_.get_allocator()); }
 
-  _LIBCPP_HIDE_FROM_ABI string_type str() && {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI string_type str() && {
     const basic_string_view<_CharT, _Traits> __view = view();
     typename string_type::size_type __pos           = __view.empty() ? 0 : __view.data() - __str_.data();
     // In C++23, this is just string_type(std::move(__str_), __pos, __view.size(), __str_.get_allocator());
@@ -484,12 +484,12 @@ public:
 
 #    if _LIBCPP_STD_VER >= 20
   template <class _SAlloc>
-    requires __is_allocator<_SAlloc>::value
-  _LIBCPP_HIDE_FROM_ABI basic_string<char_type, traits_type, _SAlloc> str(const _SAlloc& __sa) const {
+    requires __is_allocator_v<_SAlloc>
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI basic_string<char_type, traits_type, _SAlloc> str(const _SAlloc& __sa) const {
     return basic_string<_CharT, _Traits, _SAlloc>(view(), __sa);
   }
 
-  _LIBCPP_HIDE_FROM_ABI basic_string_view<char_type, traits_type> view() const noexcept;
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI basic_string_view<char_type, traits_type> view() const noexcept;
 #    endif // _LIBCPP_STD_VER >= 20
 
   void str(const string_type& __s) {
@@ -949,26 +949,28 @@ public:
   }
 
   // [istringstream.members] Member functions:
-  _LIBCPP_HIDE_FROM_ABI basic_stringbuf<char_type, traits_type, allocator_type>* rdbuf() const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI basic_stringbuf<char_type, traits_type, allocator_type>* rdbuf() const {
     return const_cast<basic_stringbuf<char_type, traits_type, allocator_type>*>(std::addressof(__sb_));
   }
 
 #    if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_BUILDING_LIBRARY)
-  _LIBCPP_HIDE_FROM_ABI string_type str() const { return __sb_.str(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI string_type str() const { return __sb_.str(); }
 #    else
-  _LIBCPP_HIDE_FROM_ABI string_type str() const& { return __sb_.str(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI string_type str() const& { return __sb_.str(); }
 
-  _LIBCPP_HIDE_FROM_ABI string_type str() && { return std::move(__sb_).str(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI string_type str() && { return std::move(__sb_).str(); }
 #    endif
 
 #    if _LIBCPP_STD_VER >= 20
   template <class _SAlloc>
-    requires __is_allocator<_SAlloc>::value
-  _LIBCPP_HIDE_FROM_ABI basic_string<char_type, traits_type, _SAlloc> str(const _SAlloc& __sa) const {
+    requires __is_allocator_v<_SAlloc>
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI basic_string<char_type, traits_type, _SAlloc> str(const _SAlloc& __sa) const {
     return __sb_.str(__sa);
   }
 
-  _LIBCPP_HIDE_FROM_ABI basic_string_view<char_type, traits_type> view() const noexcept { return __sb_.view(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI basic_string_view<char_type, traits_type> view() const noexcept {
+    return __sb_.view();
+  }
 #    endif // _LIBCPP_STD_VER >= 20
 
   _LIBCPP_HIDE_FROM_ABI void str(const string_type& __s) { __sb_.str(__s); }
@@ -1087,26 +1089,28 @@ public:
   }
 
   // [ostringstream.members] Member functions:
-  _LIBCPP_HIDE_FROM_ABI basic_stringbuf<char_type, traits_type, allocator_type>* rdbuf() const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI basic_stringbuf<char_type, traits_type, allocator_type>* rdbuf() const {
     return const_cast<basic_stringbuf<char_type, traits_type, allocator_type>*>(std::addressof(__sb_));
   }
 
 #    if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_BUILDING_LIBRARY)
-  _LIBCPP_HIDE_FROM_ABI string_type str() const { return __sb_.str(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI string_type str() const { return __sb_.str(); }
 #    else
-  _LIBCPP_HIDE_FROM_ABI string_type str() const& { return __sb_.str(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI string_type str() const& { return __sb_.str(); }
 
-  _LIBCPP_HIDE_FROM_ABI string_type str() && { return std::move(__sb_).str(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI string_type str() && { return std::move(__sb_).str(); }
 #    endif
 
 #    if _LIBCPP_STD_VER >= 20
   template <class _SAlloc>
-    requires __is_allocator<_SAlloc>::value
-  _LIBCPP_HIDE_FROM_ABI basic_string<char_type, traits_type, _SAlloc> str(const _SAlloc& __sa) const {
+    requires __is_allocator_v<_SAlloc>
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI basic_string<char_type, traits_type, _SAlloc> str(const _SAlloc& __sa) const {
     return __sb_.str(__sa);
   }
 
-  _LIBCPP_HIDE_FROM_ABI basic_string_view<char_type, traits_type> view() const noexcept { return __sb_.view(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI basic_string_view<char_type, traits_type> view() const noexcept {
+    return __sb_.view();
+  }
 #    endif // _LIBCPP_STD_VER >= 20
 
   _LIBCPP_HIDE_FROM_ABI void str(const string_type& __s) { __sb_.str(__s); }
@@ -1227,26 +1231,28 @@ public:
   }
 
   // [stringstream.members] Member functions:
-  _LIBCPP_HIDE_FROM_ABI basic_stringbuf<char_type, traits_type, allocator_type>* rdbuf() const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI basic_stringbuf<char_type, traits_type, allocator_type>* rdbuf() const {
     return const_cast<basic_stringbuf<char_type, traits_type, allocator_type>*>(std::addressof(__sb_));
   }
 
 #    if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_BUILDING_LIBRARY)
-  _LIBCPP_HIDE_FROM_ABI string_type str() const { return __sb_.str(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI string_type str() const { return __sb_.str(); }
 #    else
-  _LIBCPP_HIDE_FROM_ABI string_type str() const& { return __sb_.str(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI string_type str() const& { return __sb_.str(); }
 
-  _LIBCPP_HIDE_FROM_ABI string_type str() && { return std::move(__sb_).str(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI string_type str() && { return std::move(__sb_).str(); }
 #    endif
 
 #    if _LIBCPP_STD_VER >= 20
   template <class _SAlloc>
-    requires __is_allocator<_SAlloc>::value
-  _LIBCPP_HIDE_FROM_ABI basic_string<char_type, traits_type, _SAlloc> str(const _SAlloc& __sa) const {
+    requires __is_allocator_v<_SAlloc>
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI basic_string<char_type, traits_type, _SAlloc> str(const _SAlloc& __sa) const {
     return __sb_.str(__sa);
   }
 
-  _LIBCPP_HIDE_FROM_ABI basic_string_view<char_type, traits_type> view() const noexcept { return __sb_.view(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI basic_string_view<char_type, traits_type> view() const noexcept {
+    return __sb_.view();
+  }
 #    endif // _LIBCPP_STD_VER >= 20
 
   _LIBCPP_HIDE_FROM_ABI void str(const string_type& __s) { __sb_.str(__s); }
diff --git a/lib/libcxx/include/stack b/lib/libcxx/include/stack
index 19d09373e2..537b82210b 100644
--- a/lib/libcxx/include/stack
+++ b/lib/libcxx/include/stack
@@ -235,9 +235,9 @@ public:
 #  endif
 
   [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool empty() const { return c.empty(); }
-  _LIBCPP_HIDE_FROM_ABI size_type size() const { return c.size(); }
-  _LIBCPP_HIDE_FROM_ABI reference top() { return c.back(); }
-  _LIBCPP_HIDE_FROM_ABI const_reference top() const { return c.back(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type size() const { return c.size(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI reference top() { return c.back(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_reference top() const { return c.back(); }
 
   _LIBCPP_HIDE_FROM_ABI void push(const value_type& __v) { c.push_back(__v); }
 #  ifndef _LIBCPP_CXX03_LANG
@@ -294,19 +294,19 @@ public:
 };
 
 #  if _LIBCPP_STD_VER >= 17
-template <class _Container, class = enable_if_t<!__is_allocator<_Container>::value> >
+template <class _Container, class = enable_if_t<!__is_allocator_v<_Container>>>
 stack(_Container) -> stack<typename _Container::value_type, _Container>;
 
 template <class _Container,
           class _Alloc,
-          class = enable_if_t<!__is_allocator<_Container>::value>,
+          class = enable_if_t<!__is_allocator_v<_Container>>,
           class = enable_if_t<uses_allocator<_Container, _Alloc>::value> >
 stack(_Container, _Alloc) -> stack<typename _Container::value_type, _Container>;
 #  endif
 
 #  if _LIBCPP_STD_VER >= 23
 template <class _InputIterator, __enable_if_t<__has_input_iterator_category<_InputIterator>::value, int> = 0>
-stack(_InputIterator, _InputIterator) -> stack<__iter_value_type<_InputIterator>>;
+stack(_InputIterator, _InputIterator) -> stack<__iterator_value_type<_InputIterator>>;
 
 template <ranges::input_range _Range>
 stack(from_range_t, _Range&&) -> stack<ranges::range_value_t<_Range>>;
@@ -314,15 +314,13 @@ stack(from_range_t, _Range&&) -> stack<ranges::range_value_t<_Range>>;
 template <class _InputIterator,
           class _Alloc,
           __enable_if_t<__has_input_iterator_category<_InputIterator>::value, int> = 0,
-          __enable_if_t<__is_allocator<_Alloc>::value, int>                        = 0>
-stack(_InputIterator,
-      _InputIterator,
-      _Alloc) -> stack<__iter_value_type<_InputIterator>, deque<__iter_value_type<_InputIterator>, _Alloc>>;
+          __enable_if_t<__is_allocator_v<_Alloc>, int>                             = 0>
+stack(_InputIterator, _InputIterator, _Alloc)
+    -> stack<__iterator_value_type<_InputIterator>, deque<__iterator_value_type<_InputIterator>, _Alloc>>;
 
-template <ranges::input_range _Range, class _Alloc, __enable_if_t<__is_allocator<_Alloc>::value, int> = 0>
-stack(from_range_t,
-      _Range&&,
-      _Alloc) -> stack<ranges::range_value_t<_Range>, deque<ranges::range_value_t<_Range>, _Alloc>>;
+template <ranges::input_range _Range, class _Alloc, __enable_if_t<__is_allocator_v<_Alloc>, int> = 0>
+stack(from_range_t, _Range&&, _Alloc)
+    -> stack<ranges::range_value_t<_Range>, deque<ranges::range_value_t<_Range>, _Alloc>>;
 
 #  endif
 
diff --git a/lib/libcxx/include/stddef.h b/lib/libcxx/include/stddef.h
index 46bed2a1e1..e9e259da15 100644
--- a/lib/libcxx/include/stddef.h
+++ b/lib/libcxx/include/stddef.h
@@ -25,24 +25,24 @@ Types:
 */
 
 #if defined(__cplusplus) && __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)
-#  include <__cxx03/stddef.h>
+#  include <__cxx03/__config>
 #else
 #  include <__config>
+#endif
 
-#  if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
-#    pragma GCC system_header
-#  endif
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
 
 // Note: This include is outside of header guards because we sometimes get included multiple times
 //       with different defines and the underlying <stddef.h> will know how to deal with that.
-#  include_next <stddef.h>
+#include_next <stddef.h>
 
-#  ifndef _LIBCPP_STDDEF_H
-#    define _LIBCPP_STDDEF_H
+#ifndef _LIBCPP_STDDEF_H
+#  define _LIBCPP_STDDEF_H
 
-#    ifdef __cplusplus
+#  ifdef __cplusplus
 typedef decltype(nullptr) nullptr_t;
-#    endif
-#  endif // defined(__cplusplus) && __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)
+#  endif
 
 #endif // _LIBCPP_STDDEF_H
diff --git a/lib/libcxx/include/stdexcept b/lib/libcxx/include/stdexcept
index 85e11629bd..d01de5c462 100644
--- a/lib/libcxx/include/stdexcept
+++ b/lib/libcxx/include/stdexcept
@@ -91,7 +91,7 @@ public:
 
   ~logic_error() _NOEXCEPT override;
 
-  const char* what() const _NOEXCEPT override;
+  [[__nodiscard__]] const char* what() const _NOEXCEPT override;
 #  else
 
 public:
@@ -115,7 +115,7 @@ public:
 
   ~runtime_error() _NOEXCEPT override;
 
-  const char* what() const _NOEXCEPT override;
+  [[__nodiscard__]] const char* what() const _NOEXCEPT override;
 #  else
 
 public:
diff --git a/lib/libcxx/include/stdio.h b/lib/libcxx/include/stdio.h
index 20b845a00b..4ce98d178e 100644
--- a/lib/libcxx/include/stdio.h
+++ b/lib/libcxx/include/stdio.h
@@ -88,35 +88,34 @@ void perror(const char* s);
 */
 
 #if defined(__cplusplus) && __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)
-#  include <__cxx03/stdio.h>
+#  include <__cxx03/__config>
 #else
 #  include <__config>
+#endif
 
-#  if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
-#    pragma GCC system_header
-#  endif
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
 
 // The inclusion of the system's <stdio.h> is intentionally done once outside of any include
 // guards because some code expects to be able to include the underlying system header multiple
 // times to get different definitions based on the macros that are set before inclusion.
-#  if __has_include_next(<stdio.h>)
-#    include_next <stdio.h>
-#  endif
+#if __has_include_next(<stdio.h>)
+#  include_next <stdio.h>
+#endif
 
-#  ifndef _LIBCPP_STDIO_H
-#    define _LIBCPP_STDIO_H
+#ifndef _LIBCPP_STDIO_H
+#  define _LIBCPP_STDIO_H
 
-#    ifdef __cplusplus
+#  ifdef __cplusplus
 
-#      undef getc
-#      undef putc
-#      undef clearerr
-#      undef feof
-#      undef ferror
-#      undef putchar
-#      undef getchar
+#    undef getc
+#    undef putc
+#    undef clearerr
+#    undef feof
+#    undef ferror
+#    undef putchar
+#    undef getchar
 
-#    endif // __cplusplus
-#  endif   // _LIBCPP_STDIO_H
-
-#endif // defined(__cplusplus) && __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)
+#  endif // __cplusplus
+#endif   // _LIBCPP_STDIO_H
diff --git a/lib/libcxx/include/streambuf b/lib/libcxx/include/streambuf
index 585ae7af65..f9dd5b7451 100644
--- a/lib/libcxx/include/streambuf
+++ b/lib/libcxx/include/streambuf
@@ -119,6 +119,7 @@ protected:
 #    include <__locale>
 #    include <__type_traits/is_same.h>
 #    include <__utility/is_valid_range.h>
+#    include <__utility/scope_guard.h>
 #    include <climits>
 #    include <ios>
 #    include <iosfwd>
@@ -149,47 +150,56 @@ public:
   virtual ~basic_streambuf() {}
 
   // 27.6.2.2.1 locales:
-  inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 locale pubimbue(const locale& __loc) {
+  inline _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 locale pubimbue(const locale& __loc) {
     imbue(__loc);
     locale __r = __loc_;
     __loc_     = __loc;
     return __r;
   }
 
-  inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 locale getloc() const { return __loc_; }
+  [[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 locale getloc() const { return __loc_; }
 
   // 27.6.2.2.2 buffer and positioning:
-  inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 basic_streambuf* pubsetbuf(char_type* __s, streamsize __n) {
+  inline _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 basic_streambuf* pubsetbuf(char_type* __s, streamsize __n) {
     return setbuf(__s, __n);
   }
 
-  inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 pos_type
+  inline _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 pos_type
   pubseekoff(off_type __off, ios_base::seekdir __way, ios_base::openmode __which = ios_base::in | ios_base::out) {
     return seekoff(__off, __way, __which);
   }
 
-  inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 pos_type
+  inline _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 pos_type
   pubseekpos(pos_type __sp, ios_base::openmode __which = ios_base::in | ios_base::out) {
     return seekpos(__sp, __which);
   }
 
-  inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 int pubsync() { return sync(); }
+  inline _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 int pubsync() { return sync(); }
 
   // Get and put areas:
   // 27.6.2.2.3 Get area:
-  inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 streamsize in_avail() {
+  [[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 streamsize in_avail() {
+    __check_invariants();
+    auto __guard = std::__make_scope_guard([this] { this->__check_invariants(); });
+
     if (gptr() < egptr())
       return static_cast<streamsize>(egptr() - gptr());
     return showmanyc();
   }
 
-  inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 int_type snextc() {
+  [[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 int_type snextc() {
+    __check_invariants();
+    auto __guard = std::__make_scope_guard([this] { this->__check_invariants(); });
+
     if (sbumpc() == traits_type::eof())
       return traits_type::eof();
     return sgetc();
   }
 
-  inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 int_type sbumpc() {
+  inline _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 int_type sbumpc() {
+    __check_invariants();
+    auto __guard = std::__make_scope_guard([this] { this->__check_invariants(); });
+
     if (gptr() == egptr())
       return uflow();
     int_type __c = traits_type::to_int_type(*gptr());
@@ -197,23 +207,32 @@ public:
     return __c;
   }
 
-  inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 int_type sgetc() {
+  [[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 int_type sgetc() {
+    __check_invariants();
+    auto __guard = std::__make_scope_guard([this] { this->__check_invariants(); });
+
     if (gptr() == egptr())
       return underflow();
     return traits_type::to_int_type(*gptr());
   }
 
-  inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 streamsize sgetn(char_type* __s, streamsize __n) { return xsgetn(__s, __n); }
+  inline _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 streamsize sgetn(char_type* __s, streamsize __n) { return xsgetn(__s, __n); }
 
   // 27.6.2.2.4 Putback:
-  inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 int_type sputbackc(char_type __c) {
+  inline _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 int_type sputbackc(char_type __c) {
+    __check_invariants();
+    auto __guard = std::__make_scope_guard([this] { this->__check_invariants(); });
+
     if (eback() == gptr() || !traits_type::eq(__c, *(gptr() - 1)))
       return pbackfail(traits_type::to_int_type(__c));
     this->gbump(-1);
     return traits_type::to_int_type(*gptr());
   }
 
-  inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 int_type sungetc() {
+  inline _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 int_type sungetc() {
+    __check_invariants();
+    auto __guard = std::__make_scope_guard([this] { this->__check_invariants(); });
+
     if (eback() == gptr())
       return pbackfail();
     this->gbump(-1);
@@ -221,7 +240,10 @@ public:
   }
 
   // 27.6.2.2.5 Put area:
-  inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 int_type sputc(char_type __c) {
+  inline _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 int_type sputc(char_type __c) {
+    __check_invariants();
+    auto __guard = std::__make_scope_guard([this] { this->__check_invariants(); });
+
     if (pptr() == epptr())
       return overflow(traits_type::to_int_type(__c));
     *pptr() = __c;
@@ -229,7 +251,7 @@ public:
     return traits_type::to_int_type(__c);
   }
 
-  inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 streamsize sputn(const char_type* __s, streamsize __n) {
+  inline _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 streamsize sputn(const char_type* __s, streamsize __n) {
     return xsputn(__s, __n);
   }
 
@@ -270,12 +292,12 @@ protected:
   _LIBCPP_HIDE_FROM_ABI char_type* gptr() const { return __ninp_; }
   _LIBCPP_HIDE_FROM_ABI char_type* egptr() const { return __einp_; }
 
-  inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 void gbump(int __n) { __ninp_ += __n; }
+  inline _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 void gbump(int __n) { __ninp_ += __n; }
 
   // gbump takes an int, so it might not be able to represent the offset we want to add.
   _LIBCPP_HIDE_FROM_ABI void __gbump_ptrdiff(ptrdiff_t __n) { __ninp_ += __n; }
 
-  inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 void setg(char_type* __gbeg, char_type* __gnext, char_type* __gend) {
+  inline _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 void setg(char_type* __gbeg, char_type* __gnext, char_type* __gend) {
     _LIBCPP_ASSERT_VALID_INPUT_RANGE(std::__is_valid_range(__gbeg, __gnext), "[gbeg, gnext) must be a valid range");
     _LIBCPP_ASSERT_VALID_INPUT_RANGE(std::__is_valid_range(__gbeg, __gend), "[gbeg, gend) must be a valid range");
     _LIBCPP_ASSERT_VALID_INPUT_RANGE(std::__is_valid_range(__gnext, __gend), "[gnext, gend) must be a valid range");
@@ -289,11 +311,11 @@ protected:
   _LIBCPP_HIDE_FROM_ABI char_type* pptr() const { return __nout_; }
   _LIBCPP_HIDE_FROM_ABI char_type* epptr() const { return __eout_; }
 
-  inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 void pbump(int __n) { __nout_ += __n; }
+  inline _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 void pbump(int __n) { __nout_ += __n; }
 
   _LIBCPP_HIDE_FROM_ABI void __pbump(streamsize __n) { __nout_ += __n; }
 
-  inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 void setp(char_type* __pbeg, char_type* __pend) {
+  inline _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 void setp(char_type* __pbeg, char_type* __pend) {
     _LIBCPP_ASSERT_VALID_INPUT_RANGE(std::__is_valid_range(__pbeg, __pend), "[pbeg, pend) must be a valid range");
     __bout_ = __nout_ = __pbeg;
     __eout_           = __pend;
@@ -317,6 +339,9 @@ protected:
   virtual streamsize showmanyc() { return 0; }
 
   virtual streamsize xsgetn(char_type* __s, streamsize __n) {
+    __check_invariants();
+    auto __guard = std::__make_scope_guard([this] { this->__check_invariants(); });
+
     int_type __c;
     streamsize __i = 0;
     while (__i < __n) {
@@ -338,6 +363,9 @@ protected:
 
   virtual int_type underflow() { return traits_type::eof(); }
   virtual int_type uflow() {
+    __check_invariants();
+    auto __guard = std::__make_scope_guard([this] { this->__check_invariants(); });
+
     if (underflow() == traits_type::eof())
       return traits_type::eof();
     int_type __c = traits_type::to_int_type(*gptr());
@@ -350,6 +378,9 @@ protected:
 
   // 27.6.2.4.5 Put area:
   virtual streamsize xsputn(const char_type* __s, streamsize __n) {
+    __check_invariants();
+    auto __guard = std::__make_scope_guard([this] { this->__check_invariants(); });
+
     streamsize __i = 0;
     while (__i < __n) {
       if (pptr() >= epptr()) {
@@ -370,6 +401,15 @@ protected:
 
   virtual int_type overflow(int_type = traits_type::eof()) { return traits_type::eof(); }
 
+  // This function checks some invariants of the class (it isn't exhaustive).
+  _LIBCPP_HIDE_FROM_ABI void __check_invariants() const {
+    _LIBCPP_ASSERT_INTERNAL(pbase() <= pptr(), "this is an invariant of the class");
+    _LIBCPP_ASSERT_INTERNAL(pptr() <= epptr(), "this is an invariant of the class");
+
+    _LIBCPP_ASSERT_INTERNAL(eback() <= gptr(), "this is an invariant of the class");
+    _LIBCPP_ASSERT_INTERNAL(gptr() <= egptr(), "this is an invariant of the class");
+  }
+
 private:
   locale __loc_;
   char_type* __binp_ = nullptr;
diff --git a/lib/libcxx/include/string b/lib/libcxx/include/string
index d282071abf..34af7efb56 100644
--- a/lib/libcxx/include/string
+++ b/lib/libcxx/include/string
@@ -280,6 +280,8 @@ public:
     basic_string substr(size_type pos = 0, size_type n = npos) const;                           // constexpr in C++20, removed in C++23
     basic_string substr(size_type pos = 0, size_type n = npos) const&;                          // since C++23
     constexpr basic_string substr(size_type pos = 0, size_type n = npos) &&;                    // since C++23
+    constexpr basic_string_view<charT, traits> subview(size_type pos = 0,
+                                                       size_type n = npos) const;               // since C++26
     void swap(basic_string& str)
         noexcept(allocator_traits<allocator_type>::propagate_on_container_swap::value ||
                  allocator_traits<allocator_type>::is_always_equal::value);                     // C++17, constexpr since C++20
@@ -598,9 +600,9 @@ basic_string<char32_t> operator""s( const char32_t *str, size_t len );
 #  include <__debug_utils/sanitizers.h>
 #  include <__format/enable_insertable.h>
 #  include <__functional/hash.h>
+#  include <__functional/is_transparent.h>
 #  include <__functional/unary_function.h>
 #  include <__fwd/string.h>
-#  include <__ios/fpos.h>
 #  include <__iterator/bounded_iter.h>
 #  include <__iterator/distance.h>
 #  include <__iterator/iterator_traits.h>
@@ -620,7 +622,6 @@ basic_string<char32_t> operator""s( const char32_t *str, size_t len );
 #  include <__ranges/concepts.h>
 #  include <__ranges/container_compatible_range.h>
 #  include <__ranges/from_range.h>
-#  include <__ranges/size.h>
 #  include <__string/char_traits.h>
 #  include <__string/extern_template_lists.h>
 #  include <__type_traits/conditional.h>
@@ -628,24 +629,23 @@ basic_string<char32_t> operator""s( const char32_t *str, size_t len );
 #  include <__type_traits/is_allocator.h>
 #  include <__type_traits/is_array.h>
 #  include <__type_traits/is_convertible.h>
+#  include <__type_traits/is_generic_transparent_comparator.h>
 #  include <__type_traits/is_nothrow_assignable.h>
 #  include <__type_traits/is_nothrow_constructible.h>
-#  include <__type_traits/is_replaceable.h>
 #  include <__type_traits/is_same.h>
 #  include <__type_traits/is_standard_layout.h>
 #  include <__type_traits/is_trivially_constructible.h>
 #  include <__type_traits/is_trivially_copyable.h>
 #  include <__type_traits/is_trivially_relocatable.h>
 #  include <__type_traits/remove_cvref.h>
-#  include <__type_traits/void_t.h>
-#  include <__utility/auto_cast.h>
-#  include <__utility/declval.h>
+#  include <__utility/default_three_way_comparator.h>
+#  include <__utility/exception_guard.h>
 #  include <__utility/forward.h>
 #  include <__utility/is_pointer_in_range.h>
 #  include <__utility/move.h>
+#  include <__utility/no_destroy.h>
 #  include <__utility/scope_guard.h>
 #  include <__utility/swap.h>
-#  include <__utility/unreachable.h>
 #  include <climits>
 #  include <cstdio> // EOF
 #  include <cstring>
@@ -700,18 +700,18 @@ __concatenate_strings(const _Allocator& __alloc,
                       __type_identity_t<basic_string_view<_CharT, _Traits> > __str2);
 
 template <class _Iter>
-struct __string_is_trivial_iterator : public false_type {};
+inline const bool __string_is_trivial_iterator_v = false;
 
 template <class _Tp>
-struct __string_is_trivial_iterator<_Tp*> : public is_arithmetic<_Tp> {};
+inline const bool __string_is_trivial_iterator_v<_Tp*> = is_arithmetic<_Tp>::value;
 
 template <class _Iter>
-struct __string_is_trivial_iterator<__wrap_iter<_Iter> > : public __string_is_trivial_iterator<_Iter> {};
+inline const bool __string_is_trivial_iterator_v<__wrap_iter<_Iter> > = __string_is_trivial_iterator_v<_Iter>;
 
 template <class _CharT, class _Traits, class _Tp>
-struct __can_be_converted_to_string_view
-    : public _BoolConstant< is_convertible<const _Tp&, basic_string_view<_CharT, _Traits> >::value &&
-                            !is_convertible<const _Tp&, const _CharT*>::value > {};
+inline const bool __can_be_converted_to_string_view_v =
+    is_convertible<const _Tp&, basic_string_view<_CharT, _Traits> >::value &&
+    !is_convertible<const _Tp&, const _CharT*>::value;
 
 struct __uninitialized_size_tag {};
 struct __init_with_sentinel_tag {};
@@ -756,20 +756,13 @@ public:
   // external memory. In such cases, the destructor is responsible for unpoisoning
   // the memory to avoid triggering false positives.
   // Therefore it's crucial to ensure the destructor is called.
-  //
-  // However, it is replaceable since implementing move-assignment as a destroy + move-construct
-  // will maintain the right ASAN state.
-  using __trivially_relocatable = void;
+  using __trivially_relocatable _LIBCPP_NODEBUG = void;
 #  else
   using __trivially_relocatable _LIBCPP_NODEBUG = __conditional_t<
       __libcpp_is_trivially_relocatable<allocator_type>::value && __libcpp_is_trivially_relocatable<pointer>::value,
       basic_string,
       void>;
 #  endif
-  using __replaceable _LIBCPP_NODEBUG =
-      __conditional_t<__is_replaceable_v<pointer> && __container_allocator_is_replaceable<__alloc_traits>::value,
-                      basic_string,
-                      void>;
 
 #  if __has_feature(address_sanitizer) && _LIBCPP_INSTRUMENTED_WITH_ASAN
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pointer __asan_volatile_wrapper(pointer const& __ptr) const {
@@ -819,12 +812,21 @@ public:
   using reverse_iterator       = std::reverse_iterator<iterator>;
   using const_reverse_iterator = std::reverse_iterator<const_iterator>;
 
+  using __alloc_result _LIBCPP_NODEBUG = __allocation_result<pointer, size_type>;
+
 private:
   static_assert(CHAR_BIT == 8, "This implementation assumes that one byte contains 8 bits");
 
 #  ifdef _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT
 
   struct __long {
+    __long() = default;
+
+    _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __long(__alloc_result __alloc, size_type __size)
+        : __data_(__alloc.ptr), __size_(__size), __cap_(__alloc.count / __endian_factor), __is_long_(true) {
+      _LIBCPP_ASSERT_INTERNAL(!__fits_in_sso(__alloc.count), "Long capacity should always be larger than the SSO");
+    }
+
     pointer __data_;
     size_type __size_;
     size_type __cap_ : sizeof(size_type) * CHAR_BIT - 1;
@@ -872,6 +874,13 @@ private:
   // some platforms bit fields have a default size rather than the actual
   // size used, e.g., it is 4 bytes on AIX. See D128285 for details.
   struct __long {
+    __long() = default;
+
+    _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __long(__alloc_result __alloc, size_type __size)
+        : __is_long_(true), __cap_(__alloc.count / __endian_factor), __size_(__size), __data_(__alloc.ptr) {
+      _LIBCPP_ASSERT_INTERNAL(!__fits_in_sso(__alloc.count), "Long capacity should always be larger than the SSO");
+    }
+
     struct _LIBCPP_PACKED {
       size_type __is_long_ : 1;
       size_type __cap_ : sizeof(size_type) * CHAR_BIT - 1;
@@ -898,6 +907,11 @@ private:
   union __rep {
     __short __s;
     __long __l;
+
+    __rep() = default;
+    _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __rep(__short __r) : __s(__r) {}
+    _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __rep(__long __r) : __l(__r) {}
+    _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __rep(__uninitialized_tag) {}
   };
 
   _LIBCPP_COMPRESSED_PAIR(__rep, __rep_, allocator_type, __alloc_);
@@ -917,20 +931,7 @@ private:
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 explicit basic_string(
       __uninitialized_size_tag, size_type __size, const allocator_type& __a)
       : __alloc_(__a) {
-    if (__size > max_size())
-      this->__throw_length_error();
-    if (__fits_in_sso(__size)) {
-      __rep_ = __rep();
-      __set_short_size(__size);
-    } else {
-      auto __capacity   = __recommend(__size) + 1;
-      auto __allocation = __alloc_traits::allocate(__alloc_, __capacity);
-      __begin_lifetime(__allocation, __capacity);
-      __set_long_cap(__capacity);
-      __set_long_pointer(__allocation);
-      __set_long_size(__size);
-    }
-    __annotate_new(__size);
+    __init_internal_buffer(__size);
   }
 
   template <class _Iter, class _Sent>
@@ -966,7 +967,7 @@ private:
         std::__wrap_iter<const_pointer>(__get_pointer() + size()));
 #  else
     return const_iterator(__p);
-#  endif // _LIBCPP_ABI_BOUNDED_ITERATORS_IN_STRING
+#  endif                    // _LIBCPP_ABI_BOUNDED_ITERATORS_IN_STRING
   }
 
 public:
@@ -1057,13 +1058,13 @@ public:
   }
 #  endif // _LIBCPP_CXX03_LANG
 
-  template <__enable_if_t<__is_allocator<_Allocator>::value, int> = 0>
+  template <__enable_if_t<__is_allocator_v<_Allocator>, int> = 0>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string(const _CharT* _LIBCPP_DIAGNOSE_NULLPTR __s) {
     _LIBCPP_ASSERT_NON_NULL(__s != nullptr, "basic_string(const char*) detected nullptr");
     __init(__s, traits_type::length(__s));
   }
 
-  template <__enable_if_t<__is_allocator<_Allocator>::value, int> = 0>
+  template <__enable_if_t<__is_allocator_v<_Allocator>, int> = 0>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
   basic_string(const _CharT* _LIBCPP_DIAGNOSE_NULLPTR __s, const _Allocator& __a)
       : __alloc_(__a) {
@@ -1075,13 +1076,15 @@ public:
   basic_string(nullptr_t) = delete;
 #  endif
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string(const _CharT* __s, size_type __n) {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string(const _CharT* __s, size_type __n)
+      _LIBCPP_DIAGNOSE_NULLPTR_IF(__n != 0 && __s == nullptr, " if n is not zero") {
     _LIBCPP_ASSERT_NON_NULL(__n == 0 || __s != nullptr, "basic_string(const char*, n) detected nullptr");
     __init(__s, __n);
   }
 
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
   basic_string(const _CharT* __s, size_type __n, const _Allocator& __a)
+      _LIBCPP_DIAGNOSE_NULLPTR_IF(__n != 0 && __s == nullptr, " if n is not zero")
       : __alloc_(__a) {
     _LIBCPP_ASSERT_NON_NULL(__n == 0 || __s != nullptr, "basic_string(const char*, n, allocator) detected nullptr");
     __init(__s, __n);
@@ -1110,7 +1113,7 @@ public:
   }
 #  endif
 
-  template <__enable_if_t<__is_allocator<_Allocator>::value, int> = 0>
+  template <__enable_if_t<__is_allocator_v<_Allocator>, int> = 0>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string(size_type __n, _CharT __c, const _Allocator& __a)
       : __alloc_(__a) {
     __init(__n, __c);
@@ -1135,7 +1138,7 @@ public:
   }
 
   template <class _Tp,
-            __enable_if_t<__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value &&
+            __enable_if_t<__can_be_converted_to_string_view_v<_CharT, _Traits, _Tp> &&
                               !is_same<__remove_cvref_t<_Tp>, basic_string>::value,
                           int> = 0>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
@@ -1147,7 +1150,7 @@ public:
   }
 
   template <class _Tp,
-            __enable_if_t<__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value &&
+            __enable_if_t<__can_be_converted_to_string_view_v<_CharT, _Traits, _Tp> &&
                               !is_same<__remove_cvref_t<_Tp>, basic_string>::value,
                           int> = 0>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 explicit basic_string(const _Tp& __t) {
@@ -1156,7 +1159,7 @@ public:
   }
 
   template <class _Tp,
-            __enable_if_t<__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value &&
+            __enable_if_t<__can_be_converted_to_string_view_v<_CharT, _Traits, _Tp> &&
                               !is_same<__remove_cvref_t<_Tp>, basic_string>::value,
                           int> = 0>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 explicit basic_string(const _Tp& __t, const allocator_type& __a)
@@ -1201,11 +1204,10 @@ public:
   }
 #  endif // _LIBCPP_CXX03_LANG
 
-  inline _LIBCPP_CONSTEXPR_SINCE_CXX20 ~basic_string() {
-    __annotate_delete();
-    if (__is_long())
-      __alloc_traits::deallocate(__alloc_, __get_long_pointer(), __get_long_cap());
-  }
+  // TODO(boomanaiden154): Once we mark this in destructors as dead on return,
+  // we can use a normal call to __reset_internal_buffer and remove the extra
+  // __rep constructor.
+  inline _LIBCPP_CONSTEXPR_SINCE_CXX20 ~basic_string() { __reset_internal_buffer(__rep(__uninitialized_tag())); }
 
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 operator __self_view() const _NOEXCEPT {
     return __self_view(typename __self_view::__assume_valid(), data(), size());
@@ -1215,7 +1217,7 @@ public:
   operator=(const basic_string& __str);
 
   template <class _Tp,
-            __enable_if_t<__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value &&
+            __enable_if_t<__can_be_converted_to_string_view_v<_CharT, _Traits, _Tp> &&
                               !is_same<__remove_cvref_t<_Tp>, basic_string>::value,
                           int> = 0>
   _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& operator=(const _Tp& __t) {
@@ -1243,45 +1245,55 @@ public:
 #  endif
   _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& operator=(value_type __c);
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 iterator begin() _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 iterator begin() _NOEXCEPT {
     return __make_iterator(__get_pointer());
   }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_iterator begin() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_iterator begin() const _NOEXCEPT {
     return __make_const_iterator(__get_pointer());
   }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 iterator end() _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 iterator end() _NOEXCEPT {
     return __make_iterator(__get_pointer() + size());
   }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_iterator end() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_iterator end() const _NOEXCEPT {
     return __make_const_iterator(__get_pointer() + size());
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reverse_iterator rbegin() _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reverse_iterator rbegin() _NOEXCEPT {
     return reverse_iterator(end());
   }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_reverse_iterator rbegin() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_reverse_iterator
+  rbegin() const _NOEXCEPT {
     return const_reverse_iterator(end());
   }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reverse_iterator rend() _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reverse_iterator rend() _NOEXCEPT {
     return reverse_iterator(begin());
   }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_reverse_iterator rend() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_reverse_iterator rend() const _NOEXCEPT {
     return const_reverse_iterator(begin());
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_iterator cbegin() const _NOEXCEPT { return begin(); }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_iterator cend() const _NOEXCEPT { return end(); }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_reverse_iterator crbegin() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_iterator cbegin() const _NOEXCEPT {
+    return begin();
+  }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_iterator cend() const _NOEXCEPT {
+    return end();
+  }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_reverse_iterator
+  crbegin() const _NOEXCEPT {
     return rbegin();
   }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_reverse_iterator crend() const _NOEXCEPT { return rend(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_reverse_iterator crend() const _NOEXCEPT {
+    return rend();
+  }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type size() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type size() const _NOEXCEPT {
     return __is_long() ? __get_long_size() : __get_short_size();
   }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type length() const _NOEXCEPT { return size(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type length() const _NOEXCEPT {
+    return size();
+  }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type max_size() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type max_size() const _NOEXCEPT {
     if (size_type __m = __alloc_traits::max_size(__alloc_); __m <= std::numeric_limits<size_type>::max() / 2) {
       size_type __res = __m - __alignment;
 
@@ -1299,7 +1311,7 @@ public:
     }
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type capacity() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type capacity() const _NOEXCEPT {
     return (__is_long() ? __get_long_cap() : static_cast<size_type>(__min_cap)) - 1;
   }
 
@@ -1311,13 +1323,19 @@ public:
 #  if _LIBCPP_STD_VER >= 23
   template <class _Op>
   _LIBCPP_HIDE_FROM_ABI constexpr void resize_and_overwrite(size_type __n, _Op __op) {
-    __resize_default_init(__n);
-    __erase_to_end(std::move(__op)(data(), _LIBCPP_AUTO_CAST(__n)));
+    using __result_type = decltype(std::move(__op)(data(), auto(__n)));
+    static_assert(__integer_like<__result_type>, "Operation return type must be integer-like");
+    size_type __sz  = size();
+    size_type __cap = capacity();
+    if (__n > __cap)
+      __grow_by_without_replace(__cap, __n - __cap, __sz, __sz, 0);
+    __annotate_delete();
+    __set_size(__n);
+    __annotate_new(__n);
+    __erase_to_end(std::move(__op)(data(), auto(__n)));
   }
 #  endif
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __resize_default_init(size_type __n);
-
 #  if _LIBCPP_STD_VER < 26 || defined(_LIBCPP_ENABLE_CXX26_REMOVED_STRING_RESERVE)
   _LIBCPP_DEPRECATED_IN_CXX20 _LIBCPP_HIDE_FROM_ABI void reserve() _NOEXCEPT { shrink_to_fit(); }
 #  endif
@@ -1328,7 +1346,8 @@ public:
     return size() == 0;
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_reference operator[](size_type __pos) const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_reference
+  operator[](size_type __pos) const _NOEXCEPT {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(__pos <= size(), "string index out of bounds");
     if (__builtin_constant_p(__pos) && !__fits_in_sso(__pos)) {
       return *(__get_long_pointer() + __pos);
@@ -1336,7 +1355,8 @@ public:
     return *(data() + __pos);
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reference operator[](size_type __pos) _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reference
+  operator[](size_type __pos) _NOEXCEPT {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(__pos <= size(), "string index out of bounds");
     if (__builtin_constant_p(__pos) && !__fits_in_sso(__pos)) {
       return *(__get_long_pointer() + __pos);
@@ -1344,15 +1364,15 @@ public:
     return *(__get_pointer() + __pos);
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 const_reference at(size_type __n) const;
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 reference at(size_type __n);
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 const_reference at(size_type __n) const;
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 reference at(size_type __n);
 
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& operator+=(const basic_string& __str) {
     return append(__str);
   }
 
   template <class _Tp,
-            __enable_if_t<__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value &&
+            __enable_if_t<__can_be_converted_to_string_view_v<_CharT, _Traits, _Tp> &&
                               !is_same<__remove_cvref_t<_Tp>, basic_string >::value,
                           int> = 0>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& operator+=(const _Tp& __t) {
@@ -1381,7 +1401,7 @@ public:
   }
 
   template <class _Tp,
-            __enable_if_t<__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value &&
+            __enable_if_t<__can_be_converted_to_string_view_v<_CharT, _Traits, _Tp> &&
                               !is_same<__remove_cvref_t<_Tp>, basic_string>::value,
                           int> = 0>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& append(const _Tp& __t) {
@@ -1392,7 +1412,7 @@ public:
   _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& append(const basic_string& __str, size_type __pos, size_type __n = npos);
 
   template <class _Tp,
-            __enable_if_t<__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value &&
+            __enable_if_t<__can_be_converted_to_string_view_v<_CharT, _Traits, _Tp> &&
                               !is_same<__remove_cvref_t<_Tp>, basic_string>::value,
                           int> = 0>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string&
@@ -1404,12 +1424,11 @@ public:
     return append(__sv.data() + __pos, std::min(__n, __sz - __pos));
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& append(const value_type* __s, size_type __n);
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& append(const value_type* __s, size_type __n)
+      _LIBCPP_DIAGNOSE_NULLPTR_IF(__n != 0 && __s == nullptr, " if n is not zero");
   _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& append(const value_type* _LIBCPP_DIAGNOSE_NULLPTR __s);
   _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& append(size_type __n, value_type __c);
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __append_default_init(size_type __n);
-
   template <class _InputIterator, __enable_if_t<__has_exactly_input_iterator_category<_InputIterator>::value, int> = 0>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string&
   append(_InputIterator __first, _InputIterator __last) {
@@ -1424,20 +1443,21 @@ public:
     size_type __sz  = size();
     size_type __cap = capacity();
     size_type __n   = static_cast<size_type>(std::distance(__first, __last));
-    if (__n) {
-      if (__string_is_trivial_iterator<_ForwardIterator>::value && !__addr_in_range(*__first)) {
-        if (__cap - __sz < __n)
-          __grow_by_without_replace(__cap, __sz + __n - __cap, __sz, __sz, 0);
-        __annotate_increase(__n);
-        auto __end = __copy_non_overlapping_range(__first, __last, std::__to_address(__get_pointer() + __sz));
-        traits_type::assign(*__end, value_type());
-        __set_size(__sz + __n);
-      } else {
-        const basic_string __temp(__first, __last, __alloc_);
-        append(__temp.data(), __temp.size());
-      }
+    if (__n == 0)
+      return *this;
+
+    if (__string_is_trivial_iterator_v<_ForwardIterator> && !__addr_in_range(*__first)) {
+      if (__cap - __sz < __n)
+        __grow_by_without_replace(__cap, __sz + __n - __cap, __sz, __sz, 0);
+      __annotate_increase(__n);
+      auto __end = __copy_non_overlapping_range(__first, __last, std::__to_address(__get_pointer() + __sz));
+      traits_type::assign(*__end, value_type());
+      __set_size(__sz + __n);
+      return *this;
+    } else {
+      const basic_string __temp(__first, __last, __alloc_);
+      return append(__temp.data(), __temp.size());
     }
-    return *this;
   }
 
 #  if _LIBCPP_STD_VER >= 23
@@ -1457,27 +1477,27 @@ public:
   _LIBCPP_CONSTEXPR_SINCE_CXX20 void push_back(value_type __c);
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void pop_back();
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reference front() _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reference front() _NOEXCEPT {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(!empty(), "string::front(): string is empty");
     return *__get_pointer();
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_reference front() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_reference front() const _NOEXCEPT {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(!empty(), "string::front(): string is empty");
     return *data();
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reference back() _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reference back() _NOEXCEPT {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(!empty(), "string::back(): string is empty");
     return *(__get_pointer() + size() - 1);
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_reference back() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_reference back() const _NOEXCEPT {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(!empty(), "string::back(): string is empty");
     return *(data() + size() - 1);
   }
 
-  template <class _Tp, __enable_if_t<__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value, int> = 0>
+  template <class _Tp, __enable_if_t<__can_be_converted_to_string_view_v<_CharT, _Traits, _Tp>, int> = 0>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& assign(const _Tp& __t) {
     __self_view __sv = __t;
     return assign(__sv.data(), __sv.size());
@@ -1519,7 +1539,7 @@ public:
   _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& assign(const basic_string& __str, size_type __pos, size_type __n = npos);
 
   template <class _Tp,
-            __enable_if_t<__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value &&
+            __enable_if_t<__can_be_converted_to_string_view_v<_CharT, _Traits, _Tp> &&
                               !is_same<__remove_cvref_t<_Tp>, basic_string>::value,
                           int> = 0>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string&
@@ -1531,8 +1551,9 @@ public:
     return assign(__sv.data() + __pos, std::min(__n, __sz - __pos));
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& assign(const value_type* __s, size_type __n);
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& assign(const value_type* __s);
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& assign(const value_type* __s, size_type __n)
+      _LIBCPP_DIAGNOSE_NULLPTR_IF(__n != 0 && __s == nullptr, " if n is not zero");
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& assign(const value_type* _LIBCPP_DIAGNOSE_NULLPTR __s);
   _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& assign(size_type __n, value_type __c);
 
   template <class _InputIterator, __enable_if_t<__has_exactly_input_iterator_category<_InputIterator>::value, int> = 0>
@@ -1545,7 +1566,7 @@ public:
   template <class _ForwardIterator, __enable_if_t<__has_forward_iterator_category<_ForwardIterator>::value, int> = 0>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string&
   assign(_ForwardIterator __first, _ForwardIterator __last) {
-    if (__string_is_trivial_iterator<_ForwardIterator>::value) {
+    if (__string_is_trivial_iterator_v<_ForwardIterator>) {
       size_type __n = static_cast<size_type>(std::distance(__first, __last));
       __assign_trivial(__first, __last, __n);
     } else {
@@ -1558,7 +1579,7 @@ public:
 #  if _LIBCPP_STD_VER >= 23
   template <_ContainerCompatibleRange<_CharT> _Range>
   _LIBCPP_HIDE_FROM_ABI constexpr basic_string& assign_range(_Range&& __range) {
-    if constexpr (__string_is_trivial_iterator<ranges::iterator_t<_Range>>::value &&
+    if constexpr (__string_is_trivial_iterator_v<ranges::iterator_t<_Range>> &&
                   (ranges::forward_range<_Range> || ranges::sized_range<_Range>)) {
       size_type __n = static_cast<size_type>(ranges::distance(__range));
       __assign_trivial(ranges::begin(__range), ranges::end(__range), __n);
@@ -1582,14 +1603,14 @@ public:
     return insert(__pos1, __str.data(), __str.size());
   }
 
-  template <class _Tp, __enable_if_t<__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value, int> = 0>
+  template <class _Tp, __enable_if_t<__can_be_converted_to_string_view_v<_CharT, _Traits, _Tp>, int> = 0>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& insert(size_type __pos1, const _Tp& __t) {
     __self_view __sv = __t;
     return insert(__pos1, __sv.data(), __sv.size());
   }
 
   template <class _Tp,
-            __enable_if_t<__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value &&
+            __enable_if_t<__can_be_converted_to_string_view_v<_CharT, _Traits, _Tp> &&
                               !is_same<__remove_cvref_t<_Tp>, basic_string>::value,
                           int> = 0>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string&
@@ -1603,7 +1624,8 @@ public:
 
   _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string&
   insert(size_type __pos1, const basic_string& __str, size_type __pos2, size_type __n = npos);
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& insert(size_type __pos, const value_type* __s, size_type __n);
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& insert(size_type __pos, const value_type* __s, size_type __n)
+      _LIBCPP_DIAGNOSE_NULLPTR_IF(__n != 0 && __s == nullptr, " if n is not zero");
   _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& insert(size_type __pos, const value_type* _LIBCPP_DIAGNOSE_NULLPTR __s);
   _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& insert(size_type __pos, size_type __n, value_type __c);
   _LIBCPP_CONSTEXPR_SINCE_CXX20 iterator insert(const_iterator __pos, value_type __c);
@@ -1659,7 +1681,7 @@ public:
     return replace(__pos1, __n1, __str.data(), __str.size());
   }
 
-  template <class _Tp, __enable_if_t<__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value, int> = 0>
+  template <class _Tp, __enable_if_t<__can_be_converted_to_string_view_v<_CharT, _Traits, _Tp>, int> = 0>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string&
   replace(size_type __pos1, size_type __n1, const _Tp& __t) {
     __self_view __sv = __t;
@@ -1670,7 +1692,7 @@ public:
   replace(size_type __pos1, size_type __n1, const basic_string& __str, size_type __pos2, size_type __n2 = npos);
 
   template <class _Tp,
-            __enable_if_t<__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value &&
+            __enable_if_t<__can_be_converted_to_string_view_v<_CharT, _Traits, _Tp> &&
                               !is_same<__remove_cvref_t<_Tp>, basic_string>::value,
                           int> = 0>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string&
@@ -1683,8 +1705,10 @@ public:
   }
 
   _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string&
-  replace(size_type __pos, size_type __n1, const value_type* __s, size_type __n2);
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& replace(size_type __pos, size_type __n1, const value_type* __s);
+  replace(size_type __pos, size_type __n1, const value_type* __s, size_type __n2)
+      _LIBCPP_DIAGNOSE_NULLPTR_IF(__n2 != 0 && __s == nullptr, " if n2 is not zero");
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string&
+  replace(size_type __pos, size_type __n1, const value_type* _LIBCPP_DIAGNOSE_NULLPTR __s);
   _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& replace(size_type __pos, size_type __n1, size_type __n2, value_type __c);
 
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string&
@@ -1693,7 +1717,7 @@ public:
         static_cast<size_type>(__i1 - begin()), static_cast<size_type>(__i2 - __i1), __str.data(), __str.size());
   }
 
-  template <class _Tp, __enable_if_t<__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value, int> = 0>
+  template <class _Tp, __enable_if_t<__can_be_converted_to_string_view_v<_CharT, _Traits, _Tp>, int> = 0>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string&
   replace(const_iterator __i1, const_iterator __i2, const _Tp& __t) {
     __self_view __sv = __t;
@@ -1741,19 +1765,24 @@ public:
   _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type copy(value_type* __s, size_type __n, size_type __pos = 0) const;
 
 #  if _LIBCPP_STD_VER <= 20
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string
-  substr(size_type __pos = 0, size_type __n = npos) const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string substr(size_type __pos = 0, size_type __n = npos) const {
     return basic_string(*this, __pos, __n);
   }
 #  else
-  _LIBCPP_HIDE_FROM_ABI constexpr basic_string substr(size_type __pos = 0, size_type __n = npos) const& {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr basic_string substr(size_type __pos = 0, size_type __n = npos) const& {
     return basic_string(*this, __pos, __n);
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr basic_string substr(size_type __pos = 0, size_type __n = npos) && {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr basic_string substr(size_type __pos = 0, size_type __n = npos) && {
     return basic_string(std::move(*this), __pos, __n);
   }
 #  endif
+#  if _LIBCPP_STD_VER >= 26
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr __self_view subview(size_type __pos = 0, size_type __n = npos) const {
+    return __self_view(*this).subview(__pos, __n);
+  }
+#  endif
 
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void swap(basic_string& __str)
 #  if _LIBCPP_STD_VER >= 14
@@ -1765,225 +1794,238 @@ public:
   // [string.ops]
   // ------------
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const value_type* c_str() const _NOEXCEPT { return data(); }
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const value_type* data() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const value_type* c_str() const _NOEXCEPT {
+    return data();
+  }
+
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const value_type* data() const _NOEXCEPT {
     return std::__to_address(__get_pointer());
   }
 #  if _LIBCPP_STD_VER >= 17
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 value_type* data() _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 value_type* data() _NOEXCEPT {
     return std::__to_address(__get_pointer());
   }
 #  endif
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 allocator_type get_allocator() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 allocator_type get_allocator() const _NOEXCEPT {
     return __alloc_;
   }
 
   // find
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
   find(const basic_string& __str, size_type __pos = 0) const _NOEXCEPT {
     return std::__str_find<value_type, size_type, traits_type, npos>(data(), size(), __str.data(), __pos, __str.size());
   }
 
-  template <class _Tp, __enable_if_t<__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
+  template <class _Tp, __enable_if_t<__can_be_converted_to_string_view_v<_CharT, _Traits, _Tp>, int> = 0>
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
   find(const _Tp& __t, size_type __pos = 0) const _NOEXCEPT {
     __self_view __sv = __t;
     return std::__str_find<value_type, size_type, traits_type, npos>(data(), size(), __sv.data(), __pos, __sv.size());
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type find(const value_type* __s, size_type __pos, size_type __n) const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
+  find(const value_type* __s, size_type __pos, size_type __n) const _NOEXCEPT
+      _LIBCPP_DIAGNOSE_NULLPTR_IF(__n != 0 && __s == nullptr, " if n is not zero") {
     _LIBCPP_ASSERT_NON_NULL(__n == 0 || __s != nullptr, "string::find(): received nullptr");
     return std::__str_find<value_type, size_type, traits_type, npos>(data(), size(), __s, __pos, __n);
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
   find(const value_type* _LIBCPP_DIAGNOSE_NULLPTR __s, size_type __pos = 0) const _NOEXCEPT {
     _LIBCPP_ASSERT_NON_NULL(__s != nullptr, "string::find(): received nullptr");
     return std::__str_find<value_type, size_type, traits_type, npos>(
         data(), size(), __s, __pos, traits_type::length(__s));
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type find(value_type __c, size_type __pos = 0) const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type find(value_type __c, size_type __pos = 0) const _NOEXCEPT {
     return std::__str_find<value_type, size_type, traits_type, npos>(data(), size(), __c, __pos);
   }
 
   // rfind
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
   rfind(const basic_string& __str, size_type __pos = npos) const _NOEXCEPT {
     return std::__str_rfind<value_type, size_type, traits_type, npos>(
         data(), size(), __str.data(), __pos, __str.size());
   }
 
-  template <class _Tp, __enable_if_t<__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
+  template <class _Tp, __enable_if_t<__can_be_converted_to_string_view_v<_CharT, _Traits, _Tp>, int> = 0>
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
   rfind(const _Tp& __t, size_type __pos = npos) const _NOEXCEPT {
     __self_view __sv = __t;
     return std::__str_rfind<value_type, size_type, traits_type, npos>(data(), size(), __sv.data(), __pos, __sv.size());
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type rfind(const value_type* __s, size_type __pos, size_type __n) const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
+  rfind(const value_type* __s, size_type __pos, size_type __n) const _NOEXCEPT
+      _LIBCPP_DIAGNOSE_NULLPTR_IF(__n != 0 && __s == nullptr, " if n is not zero") {
     _LIBCPP_ASSERT_NON_NULL(__n == 0 || __s != nullptr, "string::rfind(): received nullptr");
     return std::__str_rfind<value_type, size_type, traits_type, npos>(data(), size(), __s, __pos, __n);
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
   rfind(const value_type* _LIBCPP_DIAGNOSE_NULLPTR __s, size_type __pos = npos) const _NOEXCEPT {
     _LIBCPP_ASSERT_NON_NULL(__s != nullptr, "string::rfind(): received nullptr");
     return std::__str_rfind<value_type, size_type, traits_type, npos>(
         data(), size(), __s, __pos, traits_type::length(__s));
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type rfind(value_type __c, size_type __pos = npos) const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
+  rfind(value_type __c, size_type __pos = npos) const _NOEXCEPT {
     return std::__str_rfind<value_type, size_type, traits_type, npos>(data(), size(), __c, __pos);
   }
 
   // find_first_of
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
   find_first_of(const basic_string& __str, size_type __pos = 0) const _NOEXCEPT {
     return std::__str_find_first_of<value_type, size_type, traits_type, npos>(
         data(), size(), __str.data(), __pos, __str.size());
   }
 
-  template <class _Tp, __enable_if_t<__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
+  template <class _Tp, __enable_if_t<__can_be_converted_to_string_view_v<_CharT, _Traits, _Tp>, int> = 0>
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
   find_first_of(const _Tp& __t, size_type __pos = 0) const _NOEXCEPT {
     __self_view __sv = __t;
     return std::__str_find_first_of<value_type, size_type, traits_type, npos>(
         data(), size(), __sv.data(), __pos, __sv.size());
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
-  find_first_of(const value_type* __s, size_type __pos, size_type __n) const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
+  find_first_of(const value_type* __s, size_type __pos, size_type __n) const _NOEXCEPT
+      _LIBCPP_DIAGNOSE_NULLPTR_IF(__n != 0 && __s == nullptr, " if n is not zero") {
     _LIBCPP_ASSERT_NON_NULL(__n == 0 || __s != nullptr, "string::find_first_of(): received nullptr");
     return std::__str_find_first_of<value_type, size_type, traits_type, npos>(data(), size(), __s, __pos, __n);
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
   find_first_of(const value_type* _LIBCPP_DIAGNOSE_NULLPTR __s, size_type __pos = 0) const _NOEXCEPT {
     _LIBCPP_ASSERT_NON_NULL(__s != nullptr, "string::find_first_of(): received nullptr");
     return std::__str_find_first_of<value_type, size_type, traits_type, npos>(
         data(), size(), __s, __pos, traits_type::length(__s));
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
   find_first_of(value_type __c, size_type __pos = 0) const _NOEXCEPT {
     return find(__c, __pos);
   }
 
   // find_last_of
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
   find_last_of(const basic_string& __str, size_type __pos = npos) const _NOEXCEPT {
     return std::__str_find_last_of<value_type, size_type, traits_type, npos>(
         data(), size(), __str.data(), __pos, __str.size());
   }
 
-  template <class _Tp, __enable_if_t<__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
+  template <class _Tp, __enable_if_t<__can_be_converted_to_string_view_v<_CharT, _Traits, _Tp>, int> = 0>
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
   find_last_of(const _Tp& __t, size_type __pos = npos) const _NOEXCEPT {
     __self_view __sv = __t;
     return std::__str_find_last_of<value_type, size_type, traits_type, npos>(
         data(), size(), __sv.data(), __pos, __sv.size());
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
-  find_last_of(const value_type* __s, size_type __pos, size_type __n) const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
+  find_last_of(const value_type* __s, size_type __pos, size_type __n) const _NOEXCEPT
+      _LIBCPP_DIAGNOSE_NULLPTR_IF(__n != 0 && __s == nullptr, " if n is not zero") {
     _LIBCPP_ASSERT_NON_NULL(__n == 0 || __s != nullptr, "string::find_last_of(): received nullptr");
     return std::__str_find_last_of<value_type, size_type, traits_type, npos>(data(), size(), __s, __pos, __n);
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
   find_last_of(const value_type* _LIBCPP_DIAGNOSE_NULLPTR __s, size_type __pos = npos) const _NOEXCEPT {
     _LIBCPP_ASSERT_NON_NULL(__s != nullptr, "string::find_last_of(): received nullptr");
     return std::__str_find_last_of<value_type, size_type, traits_type, npos>(
         data(), size(), __s, __pos, traits_type::length(__s));
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
   find_last_of(value_type __c, size_type __pos = npos) const _NOEXCEPT {
     return rfind(__c, __pos);
   }
 
   // find_first_not_of
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
   find_first_not_of(const basic_string& __str, size_type __pos = 0) const _NOEXCEPT {
     return std::__str_find_first_not_of<value_type, size_type, traits_type, npos>(
         data(), size(), __str.data(), __pos, __str.size());
   }
 
-  template <class _Tp, __enable_if_t<__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
+  template <class _Tp, __enable_if_t<__can_be_converted_to_string_view_v<_CharT, _Traits, _Tp>, int> = 0>
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
   find_first_not_of(const _Tp& __t, size_type __pos = 0) const _NOEXCEPT {
     __self_view __sv = __t;
     return std::__str_find_first_not_of<value_type, size_type, traits_type, npos>(
         data(), size(), __sv.data(), __pos, __sv.size());
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
-  find_first_not_of(const value_type* __s, size_type __pos, size_type __n) const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
+  find_first_not_of(const value_type* __s, size_type __pos, size_type __n) const _NOEXCEPT
+      _LIBCPP_DIAGNOSE_NULLPTR_IF(__n != 0 && __s == nullptr, " if n is not zero") {
     _LIBCPP_ASSERT_NON_NULL(__n == 0 || __s != nullptr, "string::find_first_not_of(): received nullptr");
     return std::__str_find_first_not_of<value_type, size_type, traits_type, npos>(data(), size(), __s, __pos, __n);
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
   find_first_not_of(const value_type* _LIBCPP_DIAGNOSE_NULLPTR __s, size_type __pos = 0) const _NOEXCEPT {
     _LIBCPP_ASSERT_NON_NULL(__s != nullptr, "string::find_first_not_of(): received nullptr");
     return std::__str_find_first_not_of<value_type, size_type, traits_type, npos>(
         data(), size(), __s, __pos, traits_type::length(__s));
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
   find_first_not_of(value_type __c, size_type __pos = 0) const _NOEXCEPT {
     return std::__str_find_first_not_of<value_type, size_type, traits_type, npos>(data(), size(), __c, __pos);
   }
 
   // find_last_not_of
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
   find_last_not_of(const basic_string& __str, size_type __pos = npos) const _NOEXCEPT {
     return std::__str_find_last_not_of<value_type, size_type, traits_type, npos>(
         data(), size(), __str.data(), __pos, __str.size());
   }
 
-  template <class _Tp, __enable_if_t<__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
+  template <class _Tp, __enable_if_t<__can_be_converted_to_string_view_v<_CharT, _Traits, _Tp>, int> = 0>
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
   find_last_not_of(const _Tp& __t, size_type __pos = npos) const _NOEXCEPT {
     __self_view __sv = __t;
     return std::__str_find_last_not_of<value_type, size_type, traits_type, npos>(
         data(), size(), __sv.data(), __pos, __sv.size());
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
-  find_last_not_of(const value_type* __s, size_type __pos, size_type __n) const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
+  find_last_not_of(const value_type* __s, size_type __pos, size_type __n) const _NOEXCEPT
+      _LIBCPP_DIAGNOSE_NULLPTR_IF(__n != 0 && __s == nullptr, " if n is not zero") {
     _LIBCPP_ASSERT_NON_NULL(__n == 0 || __s != nullptr, "string::find_last_not_of(): received nullptr");
     return std::__str_find_last_not_of<value_type, size_type, traits_type, npos>(data(), size(), __s, __pos, __n);
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
   find_last_not_of(const value_type* _LIBCPP_DIAGNOSE_NULLPTR __s, size_type __pos = npos) const _NOEXCEPT {
     _LIBCPP_ASSERT_NON_NULL(__s != nullptr, "string::find_last_not_of(): received nullptr");
     return std::__str_find_last_not_of<value_type, size_type, traits_type, npos>(
         data(), size(), __s, __pos, traits_type::length(__s));
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
   find_last_not_of(value_type __c, size_type __pos = npos) const _NOEXCEPT {
     return std::__str_find_last_not_of<value_type, size_type, traits_type, npos>(data(), size(), __c, __pos);
   }
 
   // compare
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 int compare(const basic_string& __str) const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 int
+  compare(const basic_string& __str) const _NOEXCEPT {
     return compare(__self_view(__str));
   }
 
-  template <class _Tp, __enable_if_t<__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 int compare(const _Tp& __t) const _NOEXCEPT {
+  template <class _Tp, __enable_if_t<__can_be_converted_to_string_view_v<_CharT, _Traits, _Tp>, int> = 0>
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 int compare(const _Tp& __t) const _NOEXCEPT {
     __self_view __sv = __t;
     size_t __lhs_sz  = size();
     size_t __rhs_sz  = __sv.size();
@@ -1997,73 +2039,77 @@ public:
     return 0;
   }
 
-  template <class _Tp, __enable_if_t<__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 int
+  template <class _Tp, __enable_if_t<__can_be_converted_to_string_view_v<_CharT, _Traits, _Tp>, int> = 0>
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 int
   compare(size_type __pos1, size_type __n1, const _Tp& __t) const {
     __self_view __sv = __t;
     return compare(__pos1, __n1, __sv.data(), __sv.size());
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 int
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 int
   compare(size_type __pos1, size_type __n1, const basic_string& __str) const {
     return compare(__pos1, __n1, __str.data(), __str.size());
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 int
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 int
   compare(size_type __pos1, size_type __n1, const basic_string& __str, size_type __pos2, size_type __n2 = npos) const {
     return compare(__pos1, __n1, __self_view(__str), __pos2, __n2);
   }
 
   template <class _Tp,
-            __enable_if_t<__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value &&
+            __enable_if_t<__can_be_converted_to_string_view_v<_CharT, _Traits, _Tp> &&
                               !is_same<__remove_cvref_t<_Tp>, basic_string>::value,
                           int> = 0>
-  inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 int
+  [[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 int
   compare(size_type __pos1, size_type __n1, const _Tp& __t, size_type __pos2, size_type __n2 = npos) const {
     __self_view __sv = __t;
     return __self_view(*this).substr(__pos1, __n1).compare(__sv.substr(__pos2, __n2));
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 int compare(const value_type* _LIBCPP_DIAGNOSE_NULLPTR __s) const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 int
+  compare(const value_type* _LIBCPP_DIAGNOSE_NULLPTR __s) const _NOEXCEPT {
     _LIBCPP_ASSERT_NON_NULL(__s != nullptr, "string::compare(): received nullptr");
     return compare(0, npos, __s, traits_type::length(__s));
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 int
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 int
   compare(size_type __pos1, size_type __n1, const value_type* _LIBCPP_DIAGNOSE_NULLPTR __s) const {
     _LIBCPP_ASSERT_NON_NULL(__s != nullptr, "string::compare(): received nullptr");
     return compare(__pos1, __n1, __s, traits_type::length(__s));
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 int
-  compare(size_type __pos1, size_type __n1, const value_type* __s, size_type __n2) const;
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 int
+  compare(size_type __pos1, size_type __n1, const value_type* __s, size_type __n2) const
+      _LIBCPP_DIAGNOSE_NULLPTR_IF(__n2 != 0 && __s == nullptr, " if n2 is not zero");
 
   // starts_with
 
 #  if _LIBCPP_STD_VER >= 20
-  constexpr _LIBCPP_HIDE_FROM_ABI bool starts_with(__self_view __sv) const noexcept {
+  [[__nodiscard__]] constexpr _LIBCPP_HIDE_FROM_ABI bool starts_with(__self_view __sv) const noexcept {
     return __self_view(typename __self_view::__assume_valid(), data(), size()).starts_with(__sv);
   }
 
-  constexpr _LIBCPP_HIDE_FROM_ABI bool starts_with(value_type __c) const noexcept {
+  [[__nodiscard__]] constexpr _LIBCPP_HIDE_FROM_ABI bool starts_with(value_type __c) const noexcept {
     return !empty() && _Traits::eq(front(), __c);
   }
 
-  constexpr _LIBCPP_HIDE_FROM_ABI bool starts_with(const value_type* _LIBCPP_DIAGNOSE_NULLPTR __s) const noexcept {
+  [[__nodiscard__]] constexpr _LIBCPP_HIDE_FROM_ABI bool
+  starts_with(const value_type* _LIBCPP_DIAGNOSE_NULLPTR __s) const noexcept {
     return starts_with(__self_view(__s));
   }
 
   // ends_with
 
-  constexpr _LIBCPP_HIDE_FROM_ABI bool ends_with(__self_view __sv) const noexcept {
+  [[__nodiscard__]] constexpr _LIBCPP_HIDE_FROM_ABI bool ends_with(__self_view __sv) const noexcept {
     return __self_view(typename __self_view::__assume_valid(), data(), size()).ends_with(__sv);
   }
 
-  constexpr _LIBCPP_HIDE_FROM_ABI bool ends_with(value_type __c) const noexcept {
+  [[__nodiscard__]] constexpr _LIBCPP_HIDE_FROM_ABI bool ends_with(value_type __c) const noexcept {
     return !empty() && _Traits::eq(back(), __c);
   }
 
-  constexpr _LIBCPP_HIDE_FROM_ABI bool ends_with(const value_type* _LIBCPP_DIAGNOSE_NULLPTR __s) const noexcept {
+  [[__nodiscard__]] constexpr _LIBCPP_HIDE_FROM_ABI bool
+  ends_with(const value_type* _LIBCPP_DIAGNOSE_NULLPTR __s) const noexcept {
     return ends_with(__self_view(__s));
   }
 #  endif
@@ -2071,15 +2117,16 @@ public:
   // contains
 
 #  if _LIBCPP_STD_VER >= 23
-  constexpr _LIBCPP_HIDE_FROM_ABI bool contains(__self_view __sv) const noexcept {
+  [[__nodiscard__]] constexpr _LIBCPP_HIDE_FROM_ABI bool contains(__self_view __sv) const noexcept {
     return __self_view(typename __self_view::__assume_valid(), data(), size()).contains(__sv);
   }
 
-  constexpr _LIBCPP_HIDE_FROM_ABI bool contains(value_type __c) const noexcept {
+  [[__nodiscard__]] constexpr _LIBCPP_HIDE_FROM_ABI bool contains(value_type __c) const noexcept {
     return __self_view(typename __self_view::__assume_valid(), data(), size()).contains(__c);
   }
 
-  constexpr _LIBCPP_HIDE_FROM_ABI bool contains(const value_type* _LIBCPP_DIAGNOSE_NULLPTR __s) const {
+  [[__nodiscard__]] constexpr _LIBCPP_HIDE_FROM_ABI bool
+  contains(const value_type* _LIBCPP_DIAGNOSE_NULLPTR __s) const {
     return __self_view(typename __self_view::__assume_valid(), data(), size()).contains(__s);
   }
 #  endif
@@ -2095,18 +2142,6 @@ private:
     return __rep_.__s.__is_long_;
   }
 
-  static _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __begin_lifetime(pointer __begin, size_type __n) {
-#  if _LIBCPP_STD_VER >= 20
-    if (__libcpp_is_constant_evaluated()) {
-      for (size_type __i = 0; __i != __n; ++__i)
-        std::construct_at(std::addressof(__begin[__i]));
-    }
-#  else
-    (void)__begin;
-    (void)__n;
-#  endif // _LIBCPP_STD_VER >= 20
-  }
-
   _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI static bool __fits_in_sso(size_type __sz) { return __sz < __min_cap; }
 
   template <class _Iterator, class _Sentinel>
@@ -2165,6 +2200,9 @@ private:
   _LIBCPP_CONSTEXPR_SINCE_CXX20 iterator
   __insert_with_size(const_iterator __pos, _Iterator __first, _Sentinel __last, size_type __n);
 
+  // internal buffer accessors
+  // -------------------------
+
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_STRING_INTERNAL_MEMORY_ACCESS void
   __set_short_size(size_type __s) _NOEXCEPT {
     _LIBCPP_ASSERT_INTERNAL(__s < __min_cap, "__s should never be greater than or equal to the short string capacity");
@@ -2194,21 +2232,11 @@ private:
       __set_short_size(__s);
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __set_long_cap(size_type __s) _NOEXCEPT {
-    _LIBCPP_ASSERT_INTERNAL(!__fits_in_sso(__s), "Long capacity should always be larger than the SSO");
-    __rep_.__l.__cap_     = __s / __endian_factor;
-    __rep_.__l.__is_long_ = true;
-  }
-
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type __get_long_cap() const _NOEXCEPT {
     _LIBCPP_ASSERT_INTERNAL(__rep_.__l.__is_long_, "String has to be long when trying to get the long capacity");
     return __rep_.__l.__cap_ * __endian_factor;
   }
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __set_long_pointer(pointer __p) _NOEXCEPT {
-    __rep_.__l.__data_ = __p;
-  }
-
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pointer __get_long_pointer() _NOEXCEPT {
     _LIBCPP_ASSERT_INTERNAL(__rep_.__l.__is_long_, "String has to be long when trying to get the long pointer");
     return _LIBCPP_ASAN_VOLATILE_WRAPPER(__rep_.__l.__data_);
@@ -2236,6 +2264,58 @@ private:
     return __is_long() ? __get_long_pointer() : __get_short_pointer();
   }
 
+  // Internal buffer management
+  // --------------------------
+  //
+  // These functions are only responsible for managing the buffer itself, not the value inside the buffer. As such,
+  // none of these facilities ensure that there is a null terminator at `data()[size()]`.
+
+  // Allocate a buffer of __capacity size with __alloc and return it
+  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR_SINCE_CXX20 __long
+  __allocate_long_buffer(_Allocator& __alloc, size_type __capacity) {
+    _LIBCPP_ASSERT_INTERNAL(!__fits_in_sso(__capacity),
+                            "Trying to allocate long buffer for a capacity what would fit into the small buffer");
+    auto __buffer = std::__allocate_at_least(__alloc, __align_allocation_size(__capacity));
+
+    if (__libcpp_is_constant_evaluated()) {
+      for (size_type __i = 0; __i != __buffer.count; ++__i)
+        std::__construct_at(std::addressof(__buffer.ptr[__i]));
+    }
+
+    return __long(__buffer, __capacity);
+  }
+
+  // Replace the current buffer with __new_rep. Deallocate the old long buffer if it exists.
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __reset_internal_buffer(__rep __new_rep = __short()) {
+    __annotate_delete();
+    if (__is_long())
+      __alloc_traits::deallocate(__alloc_, __get_long_pointer(), __get_long_cap());
+    __rep_ = __new_rep;
+  }
+
+  // Initialize the internal buffer to hold __size elements
+  // The elements and null terminator have to be set by the caller
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pointer __init_internal_buffer(size_type __size) {
+    if (__libcpp_is_constant_evaluated())
+      __rep_ = __rep();
+
+    if (__size > max_size())
+      __throw_length_error();
+
+    if (__fits_in_sso(__size)) {
+      __set_short_size(__size);
+      __annotate_new(__size);
+      return __get_short_pointer();
+    } else {
+      __rep_.__l = __allocate_long_buffer(__alloc_, __size);
+      __annotate_new(__size);
+      return __get_long_pointer();
+    }
+  }
+
+  // ASan annotation helpers
+  // -----------------------
+
   // The following functions are no-ops outside of AddressSanitizer mode.
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
   __annotate_contiguous_container(const void* __old_mid, const void* __new_mid) const {
@@ -2243,7 +2323,7 @@ private:
     (void)__new_mid;
 #  if _LIBCPP_INSTRUMENTED_WITH_ASAN
 #    if defined(__APPLE__)
-    // TODO: remove after addressing issue #96099 (https://github.com/llvm/llvm-project/issues/96099)
+    // TODO: remove after addressing issue #96099 (https://llvm.org/PR96099)
     if (!__is_long())
       return;
 #    endif
@@ -2287,19 +2367,36 @@ private:
     return (__s + (__a - 1)) & ~(__a - 1);
   }
   enum { __alignment = 8 };
-  static _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type __recommend(size_type __s) _NOEXCEPT {
-    if (__s < __min_cap) {
-      return static_cast<size_type>(__min_cap) - 1;
-    }
+
+  // This makes sure that we're using a capacity with some extra alignment, since allocators almost always over-align
+  // the allocations anyways, improving memory usage. More importantly, this ensures that the lowest bit is never set
+  // if __endian_factor == 2, allowing us to store whether we're in the long string inside the lowest bit.
+  _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
+  __align_allocation_size(size_type __size) _NOEXCEPT {
+    _LIBCPP_ASSERT_INTERNAL(
+        !__fits_in_sso(__size), "Trying to align allocation of a size which would fit into the SSO");
     const size_type __boundary = sizeof(value_type) < __alignment ? __alignment / sizeof(value_type) : __endian_factor;
-    size_type __guess          = __align_it<__boundary>(__s + 1) - 1;
-    if (__guess == __min_cap)
+    size_type __guess          = __align_it<__boundary>(__size + 1);
+    if (__guess == __min_cap + 1)
       __guess += __endian_factor;
 
-    _LIBCPP_ASSERT_INTERNAL(__guess >= __s, "recommendation is below the requested size");
+    _LIBCPP_ASSERT_INTERNAL(__guess >= __size, "aligned allocation size is below the requested size");
     return __guess;
   }
 
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
+  __get_amortized_growth_capacity(size_type __required_capacity) {
+    size_type __max_size = max_size();
+    if (__required_capacity > __max_size)
+      __throw_length_error();
+    size_type __current_cap = capacity();
+    _LIBCPP_ASSERT_INTERNAL(
+        __current_cap < __required_capacity, "Trying to grow string even though there is enough capacity already?");
+    if (__current_cap > __max_size / 2 - __alignment)
+      return __max_size;
+    return std::max(__required_capacity, 2 * __current_cap);
+  }
+
   inline _LIBCPP_CONSTEXPR_SINCE_CXX20 void __init(const value_type* __s, size_type __sz);
   inline _LIBCPP_CONSTEXPR_SINCE_CXX20 void __init(size_type __n, value_type __c);
 
@@ -2366,7 +2463,8 @@ private:
 
   // __erase_external_with_move is invoked for erase() invocations where
   // `n ~= npos`, likely requiring memory moves on the string data.
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_NOINLINE void __erase_external_with_move(size_type __pos, size_type __n);
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_NOINLINE void
+  __erase_external_with_move(size_type __pos, size_type __n) _NOEXCEPT;
 
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __copy_assign_alloc(const basic_string& __str) {
     __copy_assign_alloc(
@@ -2378,24 +2476,14 @@ private:
       __alloc_ = __str.__alloc_;
     else {
       if (!__str.__is_long()) {
-        if (__is_long()) {
-          __annotate_delete();
-          __alloc_traits::deallocate(__alloc_, __get_long_pointer(), __get_long_cap());
-          __rep_ = __rep();
-        }
+        __reset_internal_buffer();
         __alloc_ = __str.__alloc_;
       } else {
         __annotate_delete();
-        auto __guard       = std::__make_scope_guard(__annotate_new_size(*this));
-        allocator_type __a = __str.__alloc_;
-        auto __allocation  = std::__allocate_at_least(__a, __str.__get_long_cap());
-        __begin_lifetime(__allocation.ptr, __allocation.count);
-        if (__is_long())
-          __alloc_traits::deallocate(__alloc_, __get_long_pointer(), __get_long_cap());
-        __alloc_ = std::move(__a);
-        __set_long_pointer(__allocation.ptr);
-        __set_long_cap(__allocation.count);
-        __set_long_size(__str.__get_long_size());
+        auto __guard = std::__make_scope_guard(__annotate_new_size(*this));
+        auto __alloc = __str.__alloc_;
+        __reset_internal_buffer(__allocate_long_buffer(__alloc, __str.size()));
+        __alloc_ = std::move(__alloc);
       }
     }
   }
@@ -2507,26 +2595,55 @@ _LIBCPP_STRING_V1_EXTERN_TEMPLATE_LIST(_LIBCPP_DECLARE, wchar_t)
 #  endif
 #  undef _LIBCPP_DECLARE
 
+#  if _LIBCPP_STD_VER <= 17 || !__has_builtin(__builtin_lt_synthesizes_from_spaceship)
+template <class _CharT, class _Traits, class _Alloc>
+struct __default_three_way_comparator<basic_string<_CharT, _Traits, _Alloc>, basic_string<_CharT, _Traits, _Alloc> > {
+  using __string_t _LIBCPP_NODEBUG = basic_string<_CharT, _Traits, _Alloc>;
+
+  _LIBCPP_HIDE_FROM_ABI static int operator()(const __string_t& __lhs, const __string_t& __rhs) {
+    auto __min_len = std::min(__lhs.size(), __rhs.size());
+    auto __ret     = _Traits::compare(__lhs.data(), __rhs.data(), __min_len);
+    if (__ret == 0)
+      return __lhs.size() == __rhs.size() ? 0 : __lhs.size() < __rhs.size() ? -1 : 1;
+    return __ret;
+  }
+};
+#  endif
+
+template <class _Comparator, class _CharT, class _Traits, class _Alloc>
+inline const bool __is_transparently_comparable_v<_Comparator,
+                                                  basic_string<_CharT, _Traits, _Alloc>,
+                                                  const _CharT*,
+                                                  __enable_if_t<__is_generic_transparent_comparator_v<_Comparator> > > =
+    true;
+
+template <class _Comparator, class _CharT, class _Traits, class _Alloc, size_t _Np>
+inline const bool __is_transparently_comparable_v<_Comparator,
+                                                  basic_string<_CharT, _Traits, _Alloc>,
+                                                  _CharT[_Np],
+                                                  __enable_if_t<__is_generic_transparent_comparator_v<_Comparator> > > =
+    true;
+
 #  if _LIBCPP_STD_VER >= 17
 template <class _InputIterator,
-          class _CharT     = __iter_value_type<_InputIterator>,
+          class _CharT     = __iterator_value_type<_InputIterator>,
           class _Allocator = allocator<_CharT>,
           class            = enable_if_t<__has_input_iterator_category<_InputIterator>::value>,
-          class            = enable_if_t<__is_allocator<_Allocator>::value> >
+          class            = enable_if_t<__is_allocator_v<_Allocator>>>
 basic_string(_InputIterator, _InputIterator, _Allocator = _Allocator())
     -> basic_string<_CharT, char_traits<_CharT>, _Allocator>;
 
 template <class _CharT,
           class _Traits,
           class _Allocator = allocator<_CharT>,
-          class            = enable_if_t<__is_allocator<_Allocator>::value> >
+          class            = enable_if_t<__is_allocator_v<_Allocator>>>
 explicit basic_string(basic_string_view<_CharT, _Traits>, const _Allocator& = _Allocator())
     -> basic_string<_CharT, _Traits, _Allocator>;
 
 template <class _CharT,
           class _Traits,
           class _Allocator = allocator<_CharT>,
-          class            = enable_if_t<__is_allocator<_Allocator>::value>,
+          class            = enable_if_t<__is_allocator_v<_Allocator>>,
           class _Sz        = typename allocator_traits<_Allocator>::size_type >
 basic_string(basic_string_view<_CharT, _Traits>, _Sz, _Sz, const _Allocator& = _Allocator())
     -> basic_string<_CharT, _Traits, _Allocator>;
@@ -2535,7 +2652,7 @@ basic_string(basic_string_view<_CharT, _Traits>, _Sz, _Sz, const _Allocator& = _
 #  if _LIBCPP_STD_VER >= 23
 template <ranges::input_range _Range,
           class _Allocator = allocator<ranges::range_value_t<_Range>>,
-          class            = enable_if_t<__is_allocator<_Allocator>::value> >
+          class            = enable_if_t<__is_allocator_v<_Allocator>>>
 basic_string(from_range_t, _Range&&, _Allocator = _Allocator())
     -> basic_string<ranges::range_value_t<_Range>, char_traits<ranges::range_value_t<_Range>>, _Allocator>;
 #  endif
@@ -2543,73 +2660,23 @@ basic_string(from_range_t, _Range&&, _Allocator = _Allocator())
 template <class _CharT, class _Traits, class _Allocator>
 _LIBCPP_CONSTEXPR_SINCE_CXX20 void
 basic_string<_CharT, _Traits, _Allocator>::__init(const value_type* __s, size_type __sz) {
-  if (__libcpp_is_constant_evaluated())
-    __rep_ = __rep();
-  if (__sz > max_size())
-    this->__throw_length_error();
-  pointer __p;
-  if (__fits_in_sso(__sz)) {
-    __set_short_size(__sz);
-    __p = __get_short_pointer();
-  } else {
-    auto __allocation = std::__allocate_at_least(__alloc_, __recommend(__sz) + 1);
-    __p               = __allocation.ptr;
-    __begin_lifetime(__p, __allocation.count);
-    __set_long_pointer(__p);
-    __set_long_cap(__allocation.count);
-    __set_long_size(__sz);
-  }
+  pointer __p = __init_internal_buffer(__sz);
   traits_type::copy(std::__to_address(__p), __s, __sz);
   traits_type::assign(__p[__sz], value_type());
-  __annotate_new(__sz);
 }
 
 template <class _CharT, class _Traits, class _Allocator>
 _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_NOINLINE void
 basic_string<_CharT, _Traits, _Allocator>::__init_copy_ctor_external(const value_type* __s, size_type __sz) {
-  if (__libcpp_is_constant_evaluated())
-    __rep_ = __rep();
-
-  pointer __p;
-  if (__fits_in_sso(__sz)) {
-    __p = __get_short_pointer();
-    __set_short_size(__sz);
-  } else {
-    if (__sz > max_size())
-      this->__throw_length_error();
-    auto __allocation = std::__allocate_at_least(__alloc_, __recommend(__sz) + 1);
-    __p               = __allocation.ptr;
-    __begin_lifetime(__p, __allocation.count);
-    __set_long_pointer(__p);
-    __set_long_cap(__allocation.count);
-    __set_long_size(__sz);
-  }
+  pointer __p = __init_internal_buffer(__sz);
   traits_type::copy(std::__to_address(__p), __s, __sz + 1);
-  __annotate_new(__sz);
 }
 
 template <class _CharT, class _Traits, class _Allocator>
 _LIBCPP_CONSTEXPR_SINCE_CXX20 void basic_string<_CharT, _Traits, _Allocator>::__init(size_type __n, value_type __c) {
-  if (__libcpp_is_constant_evaluated())
-    __rep_ = __rep();
-
-  if (__n > max_size())
-    this->__throw_length_error();
-  pointer __p;
-  if (__fits_in_sso(__n)) {
-    __set_short_size(__n);
-    __p = __get_short_pointer();
-  } else {
-    auto __allocation = std::__allocate_at_least(__alloc_, __recommend(__n) + 1);
-    __p               = __allocation.ptr;
-    __begin_lifetime(__p, __allocation.count);
-    __set_long_pointer(__p);
-    __set_long_cap(__allocation.count);
-    __set_long_size(__n);
-  }
+  pointer __p = __init_internal_buffer(__n);
   traits_type::assign(std::__to_address(__p), __n, __c);
   traits_type::assign(__p[__n], value_type());
-  __annotate_new(__n);
 }
 
 template <class _CharT, class _Traits, class _Allocator>
@@ -2626,19 +2693,10 @@ basic_string<_CharT, _Traits, _Allocator>::__init_with_sentinel(_InputIterator _
   __rep_ = __rep();
   __annotate_new(0);
 
-#  if _LIBCPP_HAS_EXCEPTIONS
-  try {
-#  endif // _LIBCPP_HAS_EXCEPTIONS
-    for (; __first != __last; ++__first)
-      push_back(*__first);
-#  if _LIBCPP_HAS_EXCEPTIONS
-  } catch (...) {
-    __annotate_delete();
-    if (__is_long())
-      __alloc_traits::deallocate(__alloc_, __get_long_pointer(), __get_long_cap());
-    throw;
-  }
-#  endif // _LIBCPP_HAS_EXCEPTIONS
+  auto __guard = std::__make_exception_guard([this] { __reset_internal_buffer(); });
+  for (; __first != __last; ++__first)
+    push_back(*__first);
+  __guard.__complete();
 }
 
 template <class _CharT, class _Traits, class _Allocator>
@@ -2653,39 +2711,12 @@ template <class _CharT, class _Traits, class _Allocator>
 template <class _InputIterator, class _Sentinel>
 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
 basic_string<_CharT, _Traits, _Allocator>::__init_with_size(_InputIterator __first, _Sentinel __last, size_type __sz) {
-  if (__libcpp_is_constant_evaluated())
-    __rep_ = __rep();
+  pointer __p = __init_internal_buffer(__sz);
 
-  if (__sz > max_size())
-    this->__throw_length_error();
-
-  pointer __p;
-  if (__fits_in_sso(__sz)) {
-    __set_short_size(__sz);
-    __p = __get_short_pointer();
-
-  } else {
-    auto __allocation = std::__allocate_at_least(__alloc_, __recommend(__sz) + 1);
-    __p               = __allocation.ptr;
-    __begin_lifetime(__p, __allocation.count);
-    __set_long_pointer(__p);
-    __set_long_cap(__allocation.count);
-    __set_long_size(__sz);
-  }
-
-#  if _LIBCPP_HAS_EXCEPTIONS
-  try {
-#  endif // _LIBCPP_HAS_EXCEPTIONS
-    auto __end = __copy_non_overlapping_range(std::move(__first), std::move(__last), std::__to_address(__p));
-    traits_type::assign(*__end, value_type());
-#  if _LIBCPP_HAS_EXCEPTIONS
-  } catch (...) {
-    if (__is_long())
-      __alloc_traits::deallocate(__alloc_, __get_long_pointer(), __get_long_cap());
-    throw;
-  }
-#  endif                       // _LIBCPP_HAS_EXCEPTIONS
-  __annotate_new(__sz);
+  auto __guard = std::__make_exception_guard([this] { __reset_internal_buffer(); });
+  auto __end   = __copy_non_overlapping_range(std::move(__first), std::move(__last), std::__to_address(__p));
+  traits_type::assign(*__end, value_type());
+  __guard.__complete();
 }
 
 template <class _CharT, class _Traits, class _Allocator>
@@ -2697,32 +2728,22 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 void basic_string<_CharT, _Traits, _Allocator>::__
     size_type __n_del,
     size_type __n_add,
     const value_type* __p_new_stuff) {
-  size_type __ms = max_size();
-  if (__delta_cap > __ms - __old_cap)
-    __throw_length_error();
+  __long __buffer = __allocate_long_buffer(__alloc_, __get_amortized_growth_capacity(__old_cap + __delta_cap));
   pointer __old_p = __get_pointer();
-  size_type __cap =
-      __old_cap < __ms / 2 - __alignment ? __recommend(std::max(__old_cap + __delta_cap, 2 * __old_cap)) : __ms;
   __annotate_delete();
-  auto __guard      = std::__make_scope_guard(__annotate_new_size(*this));
-  auto __allocation = std::__allocate_at_least(__alloc_, __cap + 1);
-  pointer __p       = __allocation.ptr;
-  __begin_lifetime(__p, __allocation.count);
+  auto __guard = std::__make_scope_guard(__annotate_new_size(*this));
   if (__n_copy != 0)
-    traits_type::copy(std::__to_address(__p), std::__to_address(__old_p), __n_copy);
+    traits_type::copy(std::__to_address(__buffer.__data_), std::__to_address(__old_p), __n_copy);
   if (__n_add != 0)
-    traits_type::copy(std::__to_address(__p) + __n_copy, __p_new_stuff, __n_add);
+    traits_type::copy(std::__to_address(__buffer.__data_) + __n_copy, __p_new_stuff, __n_add);
   size_type __sec_cp_sz = __old_sz - __n_del - __n_copy;
   if (__sec_cp_sz != 0)
-    traits_type::copy(
-        std::__to_address(__p) + __n_copy + __n_add, std::__to_address(__old_p) + __n_copy + __n_del, __sec_cp_sz);
-  if (__old_cap + 1 != __min_cap)
-    __alloc_traits::deallocate(__alloc_, __old_p, __old_cap + 1);
-  __set_long_pointer(__p);
-  __set_long_cap(__allocation.count);
-  __old_sz = __n_copy + __n_add + __sec_cp_sz;
-  __set_long_size(__old_sz);
-  traits_type::assign(__p[__old_sz], value_type());
+    traits_type::copy(std::__to_address(__buffer.__data_) + __n_copy + __n_add,
+                      std::__to_address(__old_p) + __n_copy + __n_del,
+                      __sec_cp_sz);
+  __buffer.__size_ = __n_copy + __n_add + __sec_cp_sz;
+  traits_type::assign(__buffer.__data_[__buffer.__size_], value_type());
+  __reset_internal_buffer(__buffer);
 }
 
 // __grow_by is deprecated because it does not set the size. It may not update the size when the size is changed, and it
@@ -2740,25 +2761,20 @@ _LIBCPP_DEPRECATED_("use __grow_by_without_replace") basic_string<_CharT, _Trait
     size_type __n_copy,
     size_type __n_del,
     size_type __n_add) {
-  size_type __ms = max_size();
-  if (__delta_cap > __ms - __old_cap)
-    this->__throw_length_error();
+  __long __buffer = __allocate_long_buffer(__alloc_, __get_amortized_growth_capacity(__old_cap + __delta_cap));
   pointer __old_p = __get_pointer();
-  size_type __cap =
-      __old_cap < __ms / 2 - __alignment ? __recommend(std::max(__old_cap + __delta_cap, 2 * __old_cap)) : __ms;
-  auto __allocation = std::__allocate_at_least(__alloc_, __cap + 1);
-  pointer __p       = __allocation.ptr;
-  __begin_lifetime(__p, __allocation.count);
   if (__n_copy != 0)
-    traits_type::copy(std::__to_address(__p), std::__to_address(__old_p), __n_copy);
+    traits_type::copy(std::__to_address(__buffer.__data_), std::__to_address(__old_p), __n_copy);
   size_type __sec_cp_sz = __old_sz - __n_del - __n_copy;
   if (__sec_cp_sz != 0)
-    traits_type::copy(
-        std::__to_address(__p) + __n_copy + __n_add, std::__to_address(__old_p) + __n_copy + __n_del, __sec_cp_sz);
-  if (__old_cap + 1 != __min_cap)
-    __alloc_traits::deallocate(__alloc_, __old_p, __old_cap + 1);
-  __set_long_pointer(__p);
-  __set_long_cap(__allocation.count);
+    traits_type::copy(std::__to_address(__buffer.__data_) + __n_copy + __n_add,
+                      std::__to_address(__old_p) + __n_copy + __n_del,
+                      __sec_cp_sz);
+
+  // This is -1 to make sure the caller sets the size properly, since old versions of this function didn't set the size
+  // at all.
+  __buffer.__size_ = -1;
+  __reset_internal_buffer(__buffer);
 }
 
 template <class _CharT, class _Traits, class _Allocator>
@@ -2775,6 +2791,7 @@ basic_string<_CharT, _Traits, _Allocator>::__grow_by_without_replace(
   _LIBCPP_SUPPRESS_DEPRECATED_PUSH
   __grow_by(__old_cap, __delta_cap, __old_sz, __n_copy, __n_del, __n_add);
   _LIBCPP_SUPPRESS_DEPRECATED_POP
+  // Due to the ABI of __grow_by we have to set the size after calling it.
   __set_long_size(__old_sz - __n_del + __n_add);
 }
 
@@ -2786,24 +2803,23 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_NOINLINE basic_string<_CharT, _Traits, _Al
 basic_string<_CharT, _Traits, _Allocator>::__assign_no_alias(const value_type* __s, size_type __n) {
   const auto __cap  = __is_short ? static_cast<size_type>(__min_cap) : __get_long_cap();
   const auto __size = __is_short ? __get_short_size() : __get_long_size();
-  if (__n < __cap) {
-    if (__n > __size)
-      __annotate_increase(__n - __size);
-    pointer __p;
-    if (__is_short) {
-      __p = __get_short_pointer();
-      __set_short_size(__n);
-    } else {
-      __p = __get_long_pointer();
-      __set_long_size(__n);
-    }
-    traits_type::copy(std::__to_address(__p), __s, __n);
-    traits_type::assign(__p[__n], value_type());
-    if (__size > __n)
-      __annotate_shrink(__size);
-  } else {
+  if (__n >= __cap) {
     __grow_by_and_replace(__cap - 1, __n - __cap + 1, __size, 0, __size, __n, __s);
+    return *this;
   }
+
+  __annotate_delete();
+  auto __guard = std::__make_scope_guard(__annotate_new_size(*this));
+  pointer __p;
+  if (__is_short) {
+    __p = __get_short_pointer();
+    __set_short_size(__n);
+  } else {
+    __p = __get_long_pointer();
+    __set_long_size(__n);
+  }
+  traits_type::copy(std::__to_address(__p), __s, __n);
+  traits_type::assign(__p[__n], value_type());
   return *this;
 }
 
@@ -2910,7 +2926,7 @@ basic_string<_CharT, _Traits, _Allocator>::__move_assign(basic_string& __str, tr
 {
   __annotate_delete();
   if (__is_long()) {
-    __alloc_traits::deallocate(__alloc_, __get_long_pointer(), __get_long_cap());
+    __reset_internal_buffer();
 #    if _LIBCPP_STD_VER <= 14
     if (!is_nothrow_move_assignable<allocator_type>::value) {
       __set_short_size(0);
@@ -2961,7 +2977,7 @@ template <class _Iterator, class _Sentinel>
 _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void
 basic_string<_CharT, _Traits, _Allocator>::__assign_trivial(_Iterator __first, _Sentinel __last, size_type __n) {
   _LIBCPP_ASSERT_INTERNAL(
-      __string_is_trivial_iterator<_Iterator>::value, "The iterator type given to `__assign_trivial` must be trivial");
+      __string_is_trivial_iterator_v<_Iterator>, "The iterator type given to `__assign_trivial` must be trivial");
 
   size_type __old_size = size();
   size_type __cap      = capacity();
@@ -3017,52 +3033,40 @@ basic_string<_CharT, _Traits, _Allocator>::append(const value_type* __s, size_ty
   _LIBCPP_ASSERT_NON_NULL(__n == 0 || __s != nullptr, "string::append received nullptr");
   size_type __cap = capacity();
   size_type __sz  = size();
-  if (__cap - __sz >= __n) {
-    if (__n) {
-      __annotate_increase(__n);
-      value_type* __p = std::__to_address(__get_pointer());
-      traits_type::copy(__p + __sz, __s, __n);
-      __sz += __n;
-      __set_size(__sz);
-      traits_type::assign(__p[__sz], value_type());
-    }
-  } else
+  if (__cap - __sz < __n) {
     __grow_by_and_replace(__cap, __sz + __n - __cap, __sz, __sz, 0, __n, __s);
+    return *this;
+  }
+
+  if (__n == 0)
+    return *this;
+
+  __annotate_increase(__n);
+  value_type* __p = std::__to_address(__get_pointer());
+  traits_type::copy(__p + __sz, __s, __n);
+  __sz += __n;
+  __set_size(__sz);
+  traits_type::assign(__p[__sz], value_type());
   return *this;
 }
 
 template <class _CharT, class _Traits, class _Allocator>
 _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string<_CharT, _Traits, _Allocator>&
 basic_string<_CharT, _Traits, _Allocator>::append(size_type __n, value_type __c) {
-  if (__n) {
-    size_type __cap = capacity();
-    size_type __sz  = size();
-    if (__cap - __sz < __n)
-      __grow_by_without_replace(__cap, __sz + __n - __cap, __sz, __sz, 0);
-    __annotate_increase(__n);
-    pointer __p = __get_pointer();
-    traits_type::assign(std::__to_address(__p) + __sz, __n, __c);
-    __sz += __n;
-    __set_size(__sz);
-    traits_type::assign(__p[__sz], value_type());
-  }
-  return *this;
-}
+  if (__n == 0)
+    return *this;
 
-template <class _CharT, class _Traits, class _Allocator>
-_LIBCPP_CONSTEXPR_SINCE_CXX20 inline void
-basic_string<_CharT, _Traits, _Allocator>::__append_default_init(size_type __n) {
-  if (__n) {
-    size_type __cap = capacity();
-    size_type __sz  = size();
-    if (__cap - __sz < __n)
-      __grow_by_without_replace(__cap, __sz + __n - __cap, __sz, __sz, 0);
-    __annotate_increase(__n);
-    pointer __p = __get_pointer();
-    __sz += __n;
-    __set_size(__sz);
-    traits_type::assign(__p[__sz], value_type());
-  }
+  size_type __cap = capacity();
+  size_type __sz  = size();
+  if (__cap - __sz < __n)
+    __grow_by_without_replace(__cap, __sz + __n - __cap, __sz, __sz, 0);
+  __annotate_increase(__n);
+  pointer __p = __get_pointer();
+  traits_type::assign(std::__to_address(__p) + __sz, __n, __c);
+  __sz += __n;
+  __set_size(__sz);
+  traits_type::assign(__p[__sz], value_type());
+  return *this;
 }
 
 template <class _CharT, class _Traits, class _Allocator>
@@ -3120,23 +3124,27 @@ basic_string<_CharT, _Traits, _Allocator>::insert(size_type __pos, const value_t
   if (__pos > __sz)
     this->__throw_out_of_range();
   size_type __cap = capacity();
-  if (__cap - __sz >= __n) {
-    if (__n) {
-      __annotate_increase(__n);
-      value_type* __p    = std::__to_address(__get_pointer());
-      size_type __n_move = __sz - __pos;
-      if (__n_move != 0) {
-        if (std::__is_pointer_in_range(__p + __pos, __p + __sz, __s))
-          __s += __n;
-        traits_type::move(__p + __pos + __n, __p + __pos, __n_move);
-      }
-      traits_type::move(__p + __pos, __s, __n);
-      __sz += __n;
-      __set_size(__sz);
-      traits_type::assign(__p[__sz], value_type());
-    }
-  } else
+
+  if (__cap - __sz < __n) {
     __grow_by_and_replace(__cap, __sz + __n - __cap, __sz, __pos, 0, __n, __s);
+    return *this;
+  }
+
+  if (__n == 0)
+    return *this;
+
+  __annotate_increase(__n);
+  value_type* __p    = std::__to_address(__get_pointer());
+  size_type __n_move = __sz - __pos;
+  if (__n_move != 0) {
+    if (std::__is_pointer_in_range(__p + __pos, __p + __sz, __s))
+      __s += __n;
+    traits_type::move(__p + __pos + __n, __p + __pos, __n_move);
+  }
+  traits_type::move(__p + __pos, __s, __n);
+  __sz += __n;
+  __set_size(__sz);
+  traits_type::assign(__p[__sz], value_type());
   return *this;
 }
 
@@ -3146,24 +3154,26 @@ basic_string<_CharT, _Traits, _Allocator>::insert(size_type __pos, size_type __n
   size_type __sz = size();
   if (__pos > __sz)
     this->__throw_out_of_range();
-  if (__n) {
-    size_type __cap = capacity();
-    value_type* __p;
-    if (__cap - __sz >= __n) {
-      __annotate_increase(__n);
-      __p                = std::__to_address(__get_pointer());
-      size_type __n_move = __sz - __pos;
-      if (__n_move != 0)
-        traits_type::move(__p + __pos + __n, __p + __pos, __n_move);
-    } else {
-      __grow_by_without_replace(__cap, __sz + __n - __cap, __sz, __pos, 0, __n);
-      __p = std::__to_address(__get_long_pointer());
-    }
-    traits_type::assign(__p + __pos, __n, __c);
-    __sz += __n;
-    __set_size(__sz);
-    traits_type::assign(__p[__sz], value_type());
+
+  if (__n == 0)
+    return *this;
+
+  size_type __cap = capacity();
+  value_type* __p;
+  if (__cap - __sz >= __n) {
+    __annotate_increase(__n);
+    __p                = std::__to_address(__get_pointer());
+    size_type __n_move = __sz - __pos;
+    if (__n_move != 0)
+      traits_type::move(__p + __pos + __n, __p + __pos, __n_move);
+  } else {
+    __grow_by_without_replace(__cap, __sz + __n - __cap, __sz, __pos, 0, __n);
+    __p = std::__to_address(__get_long_pointer());
   }
+  traits_type::assign(__p + __pos, __n, __c);
+  __sz += __n;
+  __set_size(__sz);
+  traits_type::assign(__p[__sz], value_type());
   return *this;
 }
 
@@ -3176,7 +3186,7 @@ basic_string<_CharT, _Traits, _Allocator>::__insert_with_size(
   if (__n == 0)
     return begin() + __ip;
 
-  if (__string_is_trivial_iterator<_Iterator>::value && !__addr_in_range(*__first)) {
+  if (__string_is_trivial_iterator_v<_Iterator> && !__addr_in_range(*__first)) {
     return __insert_from_safe_copy(__n, __ip, std::move(__first), std::move(__last));
   } else {
     const basic_string __temp(__init_with_sentinel_tag(), std::move(__first), std::move(__last), __alloc_);
@@ -3237,38 +3247,38 @@ basic_string<_CharT, _Traits, _Allocator>::replace(
     this->__throw_out_of_range();
   __n1            = std::min(__n1, __sz - __pos);
   size_type __cap = capacity();
-  if (__cap - __sz + __n1 >= __n2) {
-    value_type* __p = std::__to_address(__get_pointer());
-    if (__n1 != __n2) {
-      if (__n2 > __n1)
-        __annotate_increase(__n2 - __n1);
-      size_type __n_move = __sz - __pos - __n1;
-      if (__n_move != 0) {
-        if (__n1 > __n2) {
-          traits_type::move(__p + __pos, __s, __n2);
-          traits_type::move(__p + __pos + __n2, __p + __pos + __n1, __n_move);
-          return __null_terminate_at(__p, __sz + (__n2 - __n1));
-        }
-        if (std::__is_pointer_in_range(__p + __pos + 1, __p + __sz, __s)) {
-          if (__p + __pos + __n1 <= __s)
-            __s += __n2 - __n1;
-          else // __p + __pos < __s < __p + __pos + __n1
-          {
-            traits_type::move(__p + __pos, __s, __n1);
-            __pos += __n1;
-            __s += __n2;
-            __n2 -= __n1;
-            __n1 = 0;
-          }
-        }
-        traits_type::move(__p + __pos + __n2, __p + __pos + __n1, __n_move);
-      }
-    }
-    traits_type::move(__p + __pos, __s, __n2);
-    return __null_terminate_at(__p, __sz + (__n2 - __n1));
-  } else
+  if (__cap - __sz + __n1 < __n2) {
     __grow_by_and_replace(__cap, __sz - __n1 + __n2 - __cap, __sz, __pos, __n1, __n2, __s);
-  return *this;
+    return *this;
+  }
+
+  value_type* __p = std::__to_address(__get_pointer());
+  if (__n1 != __n2) {
+    if (__n2 > __n1)
+      __annotate_increase(__n2 - __n1);
+    size_type __n_move = __sz - __pos - __n1;
+    if (__n_move != 0) {
+      if (__n1 > __n2) {
+        traits_type::move(__p + __pos, __s, __n2);
+        traits_type::move(__p + __pos + __n2, __p + __pos + __n1, __n_move);
+        return __null_terminate_at(__p, __sz + (__n2 - __n1));
+      }
+      if (std::__is_pointer_in_range(__p + __pos + 1, __p + __sz, __s)) {
+        if (__p + __pos + __n1 <= __s) {
+          __s += __n2 - __n1;
+        } else { // __p + __pos < __s < __p + __pos + __n1
+          traits_type::move(__p + __pos, __s, __n1);
+          __pos += __n1;
+          __s += __n2;
+          __n2 -= __n1;
+          __n1 = 0;
+        }
+      }
+      traits_type::move(__p + __pos + __n2, __p + __pos + __n1, __n_move);
+    }
+  }
+  traits_type::move(__p + __pos, __s, __n2);
+  return __null_terminate_at(__p, __sz + (__n2 - __n1));
 }
 
 template <class _CharT, class _Traits, class _Allocator>
@@ -3320,16 +3330,17 @@ basic_string<_CharT, _Traits, _Allocator>::replace(size_type __pos, size_type __
 // Does not check __pos against size()
 template <class _CharT, class _Traits, class _Allocator>
 _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_NOINLINE void
-basic_string<_CharT, _Traits, _Allocator>::__erase_external_with_move(size_type __pos, size_type __n) {
-  if (__n) {
-    size_type __sz     = size();
-    value_type* __p    = std::__to_address(__get_pointer());
-    __n                = std::min(__n, __sz - __pos);
-    size_type __n_move = __sz - __pos - __n;
-    if (__n_move != 0)
-      traits_type::move(__p + __pos, __p + __pos + __n, __n_move);
-    __null_terminate_at(__p, __sz - __n);
-  }
+basic_string<_CharT, _Traits, _Allocator>::__erase_external_with_move(size_type __pos, size_type __n) _NOEXCEPT {
+  if (__n == 0)
+    return;
+
+  size_type __sz     = size();
+  value_type* __p    = std::__to_address(__get_pointer());
+  __n                = std::min(__n, __sz - __pos);
+  size_type __n_move = __sz - __pos - __n;
+  if (__n_move != 0)
+    traits_type::move(__p + __pos, __p + __pos + __n, __n_move);
+  __null_terminate_at(__p, __sz - __n);
 }
 
 template <class _CharT, class _Traits, class _Allocator>
@@ -3396,16 +3407,6 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 void basic_string<_CharT, _Traits, _Allocator>::re
     __erase_to_end(__n);
 }
 
-template <class _CharT, class _Traits, class _Allocator>
-_LIBCPP_CONSTEXPR_SINCE_CXX20 inline void
-basic_string<_CharT, _Traits, _Allocator>::__resize_default_init(size_type __n) {
-  size_type __sz = size();
-  if (__n > __sz) {
-    __append_default_init(__n - __sz);
-  } else
-    __erase_to_end(__n);
-}
-
 template <class _CharT, class _Traits, class _Allocator>
 _LIBCPP_CONSTEXPR_SINCE_CXX20 void basic_string<_CharT, _Traits, _Allocator>::reserve(size_type __requested_capacity) {
   if (__requested_capacity > max_size())
@@ -3418,31 +3419,23 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 void basic_string<_CharT, _Traits, _Allocator>::re
     return;
 
   __annotation_guard __g(*this);
-  auto __allocation = std::__allocate_at_least(__alloc_, __recommend(__requested_capacity) + 1);
-  auto __size       = size();
-  __begin_lifetime(__allocation.ptr, __allocation.count);
-  traits_type::copy(std::__to_address(__allocation.ptr), data(), __size + 1);
-  if (__is_long())
-    __alloc_traits::deallocate(__alloc_, __get_long_pointer(), __get_long_cap());
-  __set_long_cap(__allocation.count);
-  __set_long_size(__size);
-  __set_long_pointer(__allocation.ptr);
+  __long __buffer  = __allocate_long_buffer(__alloc_, __requested_capacity);
+  __buffer.__size_ = size();
+  traits_type::copy(std::__to_address(__buffer.__data_), data(), __buffer.__size_ + 1);
+  __reset_internal_buffer(__buffer);
 }
 
 template <class _CharT, class _Traits, class _Allocator>
 inline _LIBCPP_CONSTEXPR_SINCE_CXX20 void basic_string<_CharT, _Traits, _Allocator>::shrink_to_fit() _NOEXCEPT {
-  size_type __target_capacity = __recommend(size());
-  if (__target_capacity == capacity())
+  if (!__is_long())
     return;
 
-  _LIBCPP_ASSERT_INTERNAL(__is_long(), "Trying to shrink small string");
-
-  // We're a long string and we're shrinking into the small buffer.
   const auto __ptr  = __get_long_pointer();
   const auto __size = __get_long_size();
   const auto __cap  = __get_long_cap();
 
-  if (__fits_in_sso(__target_capacity)) {
+  // We're a long string and we're shrinking into the small buffer.
+  if (__fits_in_sso(__size)) {
     __annotation_guard __g(*this);
     __set_short_size(__size);
     traits_type::copy(std::__to_address(__get_short_pointer()), std::__to_address(__ptr), __size + 1);
@@ -3450,25 +3443,25 @@ inline _LIBCPP_CONSTEXPR_SINCE_CXX20 void basic_string<_CharT, _Traits, _Allocat
     return;
   }
 
+  if (__align_allocation_size(__size) == __cap)
+    return;
+
 #  if _LIBCPP_HAS_EXCEPTIONS
   try {
 #  endif // _LIBCPP_HAS_EXCEPTIONS
     __annotation_guard __g(*this);
-    auto __allocation = std::__allocate_at_least(__alloc_, __target_capacity + 1);
+    __long __buffer = __allocate_long_buffer(__alloc_, __size);
 
     // The Standard mandates shrink_to_fit() does not increase the capacity.
     // With equal capacity keep the existing buffer. This avoids extra work
     // due to swapping the elements.
-    if (__allocation.count - 1 >= capacity()) {
-      __alloc_traits::deallocate(__alloc_, __allocation.ptr, __allocation.count);
+    if (__buffer.__cap_ * __endian_factor - 1 >= capacity()) {
+      __alloc_traits::deallocate(__alloc_, __buffer.__data_, __buffer.__cap_ * __endian_factor);
       return;
     }
 
-    __begin_lifetime(__allocation.ptr, __allocation.count);
-    traits_type::copy(std::__to_address(__allocation.ptr), std::__to_address(__ptr), __size + 1);
-    __alloc_traits::deallocate(__alloc_, __ptr, __cap);
-    __set_long_cap(__allocation.count);
-    __set_long_pointer(__allocation.ptr);
+    traits_type::copy(std::__to_address(__buffer.__data_), std::__to_address(__get_long_pointer()), __size + 1);
+    __reset_internal_buffer(__buffer);
 #  if _LIBCPP_HAS_EXCEPTIONS
   } catch (...) {
     return;
@@ -3574,7 +3567,8 @@ operator==(const basic_string<_CharT, _Traits, _Allocator>& __lhs,
 
 template <class _CharT, class _Traits, class _Allocator>
 inline _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool
-operator==(const basic_string<_CharT, _Traits, _Allocator>& __lhs, const _CharT* __rhs) _NOEXCEPT {
+operator==(const basic_string<_CharT, _Traits, _Allocator>& __lhs,
+           const _CharT* _LIBCPP_DIAGNOSE_NULLPTR __rhs) _NOEXCEPT {
   _LIBCPP_ASSERT_NON_NULL(__rhs != nullptr, "operator==(basic_string, char*): received nullptr");
 
   using _String = basic_string<_CharT, _Traits, _Allocator>;
@@ -3858,46 +3852,52 @@ swap(basic_string<_CharT, _Traits, _Allocator>& __lhs, basic_string<_CharT, _Tra
   __lhs.swap(__rhs);
 }
 
-_LIBCPP_EXPORTED_FROM_ABI int stoi(const string& __str, size_t* __idx = nullptr, int __base = 10);
-_LIBCPP_EXPORTED_FROM_ABI long stol(const string& __str, size_t* __idx = nullptr, int __base = 10);
-_LIBCPP_EXPORTED_FROM_ABI unsigned long stoul(const string& __str, size_t* __idx = nullptr, int __base = 10);
-_LIBCPP_EXPORTED_FROM_ABI long long stoll(const string& __str, size_t* __idx = nullptr, int __base = 10);
-_LIBCPP_EXPORTED_FROM_ABI unsigned long long stoull(const string& __str, size_t* __idx = nullptr, int __base = 10);
+[[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI int stoi(const string& __str, size_t* __idx = nullptr, int __base = 10);
+[[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI unsigned long
+stoul(const string& __str, size_t* __idx = nullptr, int __base = 10);
+[[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI long stol(const string& __str, size_t* __idx = nullptr, int __base = 10);
+[[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI long long
+stoll(const string& __str, size_t* __idx = nullptr, int __base = 10);
+[[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI unsigned long long
+stoull(const string& __str, size_t* __idx = nullptr, int __base = 10);
 
-_LIBCPP_EXPORTED_FROM_ABI float stof(const string& __str, size_t* __idx = nullptr);
-_LIBCPP_EXPORTED_FROM_ABI double stod(const string& __str, size_t* __idx = nullptr);
-_LIBCPP_EXPORTED_FROM_ABI long double stold(const string& __str, size_t* __idx = nullptr);
+[[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI float stof(const string& __str, size_t* __idx = nullptr);
+[[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI double stod(const string& __str, size_t* __idx = nullptr);
+[[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI long double stold(const string& __str, size_t* __idx = nullptr);
 
-_LIBCPP_EXPORTED_FROM_ABI string to_string(int __val);
-_LIBCPP_EXPORTED_FROM_ABI string to_string(unsigned __val);
-_LIBCPP_EXPORTED_FROM_ABI string to_string(long __val);
-_LIBCPP_EXPORTED_FROM_ABI string to_string(unsigned long __val);
-_LIBCPP_EXPORTED_FROM_ABI string to_string(long long __val);
-_LIBCPP_EXPORTED_FROM_ABI string to_string(unsigned long long __val);
-_LIBCPP_EXPORTED_FROM_ABI string to_string(float __val);
-_LIBCPP_EXPORTED_FROM_ABI string to_string(double __val);
-_LIBCPP_EXPORTED_FROM_ABI string to_string(long double __val);
+[[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI string to_string(int __val);
+[[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI string to_string(unsigned __val);
+[[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI string to_string(long __val);
+[[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI string to_string(unsigned long __val);
+[[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI string to_string(long long __val);
+[[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI string to_string(unsigned long long __val);
+[[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI string to_string(float __val);
+[[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI string to_string(double __val);
+[[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI string to_string(long double __val);
 
 #  if _LIBCPP_HAS_WIDE_CHARACTERS
-_LIBCPP_EXPORTED_FROM_ABI int stoi(const wstring& __str, size_t* __idx = nullptr, int __base = 10);
-_LIBCPP_EXPORTED_FROM_ABI long stol(const wstring& __str, size_t* __idx = nullptr, int __base = 10);
-_LIBCPP_EXPORTED_FROM_ABI unsigned long stoul(const wstring& __str, size_t* __idx = nullptr, int __base = 10);
-_LIBCPP_EXPORTED_FROM_ABI long long stoll(const wstring& __str, size_t* __idx = nullptr, int __base = 10);
-_LIBCPP_EXPORTED_FROM_ABI unsigned long long stoull(const wstring& __str, size_t* __idx = nullptr, int __base = 10);
+[[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI int stoi(const wstring& __str, size_t* __idx = nullptr, int __base = 10);
+[[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI long stol(const wstring& __str, size_t* __idx = nullptr, int __base = 10);
+[[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI unsigned long
+stoul(const wstring& __str, size_t* __idx = nullptr, int __base = 10);
+[[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI long long
+stoll(const wstring& __str, size_t* __idx = nullptr, int __base = 10);
+[[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI unsigned long long
+stoull(const wstring& __str, size_t* __idx = nullptr, int __base = 10);
 
-_LIBCPP_EXPORTED_FROM_ABI float stof(const wstring& __str, size_t* __idx = nullptr);
-_LIBCPP_EXPORTED_FROM_ABI double stod(const wstring& __str, size_t* __idx = nullptr);
-_LIBCPP_EXPORTED_FROM_ABI long double stold(const wstring& __str, size_t* __idx = nullptr);
+[[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI float stof(const wstring& __str, size_t* __idx = nullptr);
+[[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI double stod(const wstring& __str, size_t* __idx = nullptr);
+[[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI long double stold(const wstring& __str, size_t* __idx = nullptr);
 
-_LIBCPP_EXPORTED_FROM_ABI wstring to_wstring(int __val);
-_LIBCPP_EXPORTED_FROM_ABI wstring to_wstring(unsigned __val);
-_LIBCPP_EXPORTED_FROM_ABI wstring to_wstring(long __val);
-_LIBCPP_EXPORTED_FROM_ABI wstring to_wstring(unsigned long __val);
-_LIBCPP_EXPORTED_FROM_ABI wstring to_wstring(long long __val);
-_LIBCPP_EXPORTED_FROM_ABI wstring to_wstring(unsigned long long __val);
-_LIBCPP_EXPORTED_FROM_ABI wstring to_wstring(float __val);
-_LIBCPP_EXPORTED_FROM_ABI wstring to_wstring(double __val);
-_LIBCPP_EXPORTED_FROM_ABI wstring to_wstring(long double __val);
+[[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI wstring to_wstring(int __val);
+[[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI wstring to_wstring(unsigned __val);
+[[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI wstring to_wstring(long __val);
+[[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI wstring to_wstring(unsigned long __val);
+[[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI wstring to_wstring(long long __val);
+[[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI wstring to_wstring(unsigned long long __val);
+[[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI wstring to_wstring(float __val);
+[[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI wstring to_wstring(double __val);
+[[__nodiscard__]] _LIBCPP_EXPORTED_FROM_ABI wstring to_wstring(long double __val);
 #  endif // _LIBCPP_HAS_WIDE_CHARACTERS
 
 template <class _CharT, class _Traits, class _Allocator>
@@ -3906,7 +3906,7 @@ _LIBCPP_TEMPLATE_DATA_VIS const typename basic_string<_CharT, _Traits, _Allocato
 
 template <class _CharT, class _Allocator>
 struct __string_hash : public __unary_function<basic_string<_CharT, char_traits<_CharT>, _Allocator>, size_t> {
-  _LIBCPP_HIDE_FROM_ABI size_t
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t
   operator()(const basic_string<_CharT, char_traits<_CharT>, _Allocator>& __val) const _NOEXCEPT {
     return std::__do_string_hash(__val.data(), __val.data() + __val.size());
   }
@@ -3977,30 +3977,31 @@ erase_if(basic_string<_CharT, _Traits, _Allocator>& __str, _Predicate __pred) {
 // Literal suffixes for basic_string [basic.string.literals]
 inline namespace literals {
 inline namespace string_literals {
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string<char>
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string<char>
 operator""s(const char* __str, size_t __len) {
   return basic_string<char>(__str, __len);
 }
 
 #    if _LIBCPP_HAS_WIDE_CHARACTERS
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string<wchar_t>
-operator""s(const wchar_t* __str, size_t __len) {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI
+_LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string<wchar_t> operator""s(const wchar_t* __str, size_t __len) {
   return basic_string<wchar_t>(__str, __len);
 }
 #    endif
 
 #    if _LIBCPP_HAS_CHAR8_T
-inline _LIBCPP_HIDE_FROM_ABI constexpr basic_string<char8_t> operator""s(const char8_t* __str, size_t __len) {
+[[__nodiscard__]] inline
+    _LIBCPP_HIDE_FROM_ABI constexpr basic_string<char8_t> operator""s(const char8_t* __str, size_t __len) {
   return basic_string<char8_t>(__str, __len);
 }
 #    endif
 
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string<char16_t>
-operator""s(const char16_t* __str, size_t __len) {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI
+_LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string<char16_t> operator""s(const char16_t* __str, size_t __len) {
   return basic_string<char16_t>(__str, __len);
 }
 
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string<char32_t>
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string<char32_t>
 operator""s(const char32_t* __str, size_t __len) {
   return basic_string<char32_t>(__str, __len);
 }
diff --git a/lib/libcxx/include/string_view b/lib/libcxx/include/string_view
index 861187c064..5dd04a9ba8 100644
--- a/lib/libcxx/include/string_view
+++ b/lib/libcxx/include/string_view
@@ -130,6 +130,8 @@ namespace std {
       size_type copy(charT* s, size_type n, size_type pos = 0) const;  // constexpr in C++20
 
       constexpr basic_string_view substr(size_type pos = 0, size_type n = npos) const;
+      constexpr basic_string_view subview(size_type pos = 0,
+                                          size_type n = npos) const;      // freestanding-deleted, since C++26
       constexpr int compare(basic_string_view s) const noexcept;
       constexpr int compare(size_type pos1, size_type n1, basic_string_view s) const;
       constexpr int compare(size_type pos1, size_type n1,
@@ -318,9 +320,9 @@ public:
   _LIBCPP_HIDE_FROM_ABI basic_string_view& operator=(const basic_string_view&) _NOEXCEPT = default;
 
   _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI basic_string_view(const _CharT* __s, size_type __len) _NOEXCEPT
-      : __data_(__s),
-        __size_(__len) {
-#  if _LIBCPP_STD_VER >= 14
+      _LIBCPP_DIAGNOSE_NULLPTR_IF(__len != 0 && __s == nullptr, " if len is not zero")
+      : __data_(__s), __size_(__len) {
+#  if !defined(_LIBCPP_CXX03_LANG) && (!defined(_LIBCPP_COMPILER_GCC) || _LIBCPP_STD_VER >= 14)
     // Allocations must fit in `ptrdiff_t` for pointer arithmetic to work. If `__len` exceeds it, the input
     // range could not have been valid. Most likely the caller underflowed some arithmetic and inadvertently
     // passed in a negative length.
@@ -352,7 +354,7 @@ public:
       : __data_(ranges::data(__r)), __size_(ranges::size(__r)) {}
 #  endif // _LIBCPP_STD_VER >= 23
 
-  _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI basic_string_view(const _CharT* __s)
+  _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI basic_string_view(const _CharT* _LIBCPP_DIAGNOSE_NULLPTR __s)
       : __data_(__s), __size_(std::__char_traits_length_checked<_Traits>(__s)) {}
 
 #  if _LIBCPP_STD_VER >= 23
@@ -360,11 +362,11 @@ public:
 #  endif
 
   // [string.view.iterators], iterators
-  _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_iterator begin() const _NOEXCEPT { return cbegin(); }
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_iterator begin() const _NOEXCEPT { return cbegin(); }
 
-  _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_iterator end() const _NOEXCEPT { return cend(); }
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_iterator end() const _NOEXCEPT { return cend(); }
 
-  _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_iterator cbegin() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_iterator cbegin() const _NOEXCEPT {
 #  ifdef _LIBCPP_ABI_BOUNDED_ITERATORS
     return std::__make_bounded_iter(data(), data(), data() + size());
 #  else
@@ -372,7 +374,7 @@ public:
 #  endif
   }
 
-  _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_iterator cend() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_iterator cend() const _NOEXCEPT {
 #  ifdef _LIBCPP_ABI_BOUNDED_ITERATORS
     return std::__make_bounded_iter(data() + size(), data(), data() + size());
 #  else
@@ -380,51 +382,54 @@ public:
 #  endif
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX17 _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rbegin() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX17 _LIBCPP_HIDE_FROM_ABI const_reverse_iterator
+  rbegin() const _NOEXCEPT {
     return const_reverse_iterator(cend());
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX17 _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rend() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX17 _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rend() const _NOEXCEPT {
     return const_reverse_iterator(cbegin());
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX17 _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crbegin() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX17 _LIBCPP_HIDE_FROM_ABI const_reverse_iterator
+  crbegin() const _NOEXCEPT {
     return const_reverse_iterator(cend());
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX17 _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crend() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX17 _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crend() const _NOEXCEPT {
     return const_reverse_iterator(cbegin());
   }
 
   // [string.view.capacity], capacity
-  _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __size_; }
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __size_; }
 
-  _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI size_type length() const _NOEXCEPT { return __size_; }
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI size_type length() const _NOEXCEPT { return __size_; }
 
-  _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT {
     return numeric_limits<size_type>::max() / sizeof(value_type);
   }
 
   [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool empty() const _NOEXCEPT { return __size_ == 0; }
 
   // [string.view.access], element access
-  _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_reference operator[](size_type __pos) const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_reference
+  operator[](size_type __pos) const _NOEXCEPT {
     return _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(__pos < size(), "string_view[] index out of bounds"), __data_[__pos];
   }
 
-  _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_reference at(size_type __pos) const {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_reference at(size_type __pos) const {
     return __pos >= size() ? (__throw_out_of_range("string_view::at"), __data_[0]) : __data_[__pos];
   }
 
-  _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_reference front() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_reference front() const _NOEXCEPT {
     return _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(!empty(), "string_view::front(): string is empty"), __data_[0];
   }
 
-  _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_reference back() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_reference back() const _NOEXCEPT {
     return _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(!empty(), "string_view::back(): string is empty"), __data_[__size_ - 1];
   }
 
-  _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_pointer data() const _NOEXCEPT { return __data_; }
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_pointer data() const _NOEXCEPT { return __data_; }
 
   // [string.view.modifiers], modifiers:
   _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI void remove_prefix(size_type __n) _NOEXCEPT {
@@ -457,15 +462,23 @@ public:
     return __rlen;
   }
 
-  _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI basic_string_view substr(size_type __pos = 0, size_type __n = npos) const {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI basic_string_view
+  substr(size_type __pos = 0, size_type __n = npos) const {
     // Use the `__assume_valid` form of the constructor to avoid an unnecessary check. Any substring of a view is a
     // valid view. In particular, `size()` is known to be smaller than `numeric_limits<difference_type>::max()`, so the
-    // new size is also smaller. See also https://github.com/llvm/llvm-project/issues/91634.
+    // new size is also smaller. See also https://llvm.org/PR91634.
     return __pos > size() ? (__throw_out_of_range("string_view::substr"), basic_string_view())
                           : basic_string_view(__assume_valid(), data() + __pos, std::min(__n, size() - __pos));
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 int compare(basic_string_view __sv) const _NOEXCEPT {
+#  if _LIBCPP_STD_VER >= 26
+  [[nodiscard]]
+  _LIBCPP_HIDE_FROM_ABI constexpr basic_string_view subview(size_type __pos = 0, size_type __n = npos) const {
+    return substr(__pos, __n);
+  }
+#  endif
+
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 int compare(basic_string_view __sv) const _NOEXCEPT {
     size_type __rlen = std::min(size(), __sv.size());
     int __retval     = _Traits::compare(data(), __sv.data(), __rlen);
     if (__retval == 0) // first __rlen chars matched
@@ -473,217 +486,226 @@ public:
     return __retval;
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI int
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI int
   compare(size_type __pos1, size_type __n1, basic_string_view __sv) const {
     return substr(__pos1, __n1).compare(__sv);
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI int
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI int
   compare(size_type __pos1, size_type __n1, basic_string_view __sv, size_type __pos2, size_type __n2) const {
     return substr(__pos1, __n1).compare(__sv.substr(__pos2, __n2));
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI int compare(const _CharT* __s) const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI int
+  compare(const _CharT* _LIBCPP_DIAGNOSE_NULLPTR __s) const _NOEXCEPT {
     return compare(basic_string_view(__s));
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI int
-  compare(size_type __pos1, size_type __n1, const _CharT* __s) const {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI int
+  compare(size_type __pos1, size_type __n1, const _CharT* _LIBCPP_DIAGNOSE_NULLPTR __s) const {
     return substr(__pos1, __n1).compare(basic_string_view(__s));
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI int
-  compare(size_type __pos1, size_type __n1, const _CharT* __s, size_type __n2) const {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI int
+  compare(size_type __pos1, size_type __n1, const _CharT* __s, size_type __n2) const
+      _LIBCPP_DIAGNOSE_NULLPTR_IF(__n2 != 0 && __s == nullptr, " if n2 is not zero") {
     return substr(__pos1, __n1).compare(basic_string_view(__s, __n2));
   }
 
   // find
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
   find(basic_string_view __s, size_type __pos = 0) const _NOEXCEPT {
-    _LIBCPP_ASSERT_NON_NULL(__s.size() == 0 || __s.data() != nullptr, "string_view::find(): received nullptr");
     return std::__str_find<value_type, size_type, traits_type, npos>(data(), size(), __s.data(), __pos, __s.size());
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type find(_CharT __c, size_type __pos = 0) const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
+  find(_CharT __c, size_type __pos = 0) const _NOEXCEPT {
     return std::__str_find<value_type, size_type, traits_type, npos>(data(), size(), __c, __pos);
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
-  find(const _CharT* __s, size_type __pos, size_type __n) const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
+  find(const _CharT* __s, size_type __pos, size_type __n) const _NOEXCEPT
+      _LIBCPP_DIAGNOSE_NULLPTR_IF(__n != 0 && __s == nullptr, " if n is not zero") {
     _LIBCPP_ASSERT_NON_NULL(__n == 0 || __s != nullptr, "string_view::find(): received nullptr");
     return std::__str_find<value_type, size_type, traits_type, npos>(data(), size(), __s, __pos, __n);
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
-  find(const _CharT* __s, size_type __pos = 0) const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
+  find(const _CharT* _LIBCPP_DIAGNOSE_NULLPTR __s, size_type __pos = 0) const _NOEXCEPT {
     _LIBCPP_ASSERT_NON_NULL(__s != nullptr, "string_view::find(): received nullptr");
     return std::__str_find<value_type, size_type, traits_type, npos>(
         data(), size(), __s, __pos, traits_type::length(__s));
   }
 
   // rfind
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
   rfind(basic_string_view __s, size_type __pos = npos) const _NOEXCEPT {
-    _LIBCPP_ASSERT_NON_NULL(__s.size() == 0 || __s.data() != nullptr, "string_view::find(): received nullptr");
     return std::__str_rfind<value_type, size_type, traits_type, npos>(data(), size(), __s.data(), __pos, __s.size());
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
   rfind(_CharT __c, size_type __pos = npos) const _NOEXCEPT {
     return std::__str_rfind<value_type, size_type, traits_type, npos>(data(), size(), __c, __pos);
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
-  rfind(const _CharT* __s, size_type __pos, size_type __n) const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
+  rfind(const _CharT* __s, size_type __pos, size_type __n) const _NOEXCEPT
+      _LIBCPP_DIAGNOSE_NULLPTR_IF(__n != 0 && __s == nullptr, " if n is not zero") {
     _LIBCPP_ASSERT_NON_NULL(__n == 0 || __s != nullptr, "string_view::rfind(): received nullptr");
     return std::__str_rfind<value_type, size_type, traits_type, npos>(data(), size(), __s, __pos, __n);
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
-  rfind(const _CharT* __s, size_type __pos = npos) const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
+  rfind(const _CharT* _LIBCPP_DIAGNOSE_NULLPTR __s, size_type __pos = npos) const _NOEXCEPT {
     _LIBCPP_ASSERT_NON_NULL(__s != nullptr, "string_view::rfind(): received nullptr");
     return std::__str_rfind<value_type, size_type, traits_type, npos>(
         data(), size(), __s, __pos, traits_type::length(__s));
   }
 
   // find_first_of
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
   find_first_of(basic_string_view __s, size_type __pos = 0) const _NOEXCEPT {
-    _LIBCPP_ASSERT_NON_NULL(__s.size() == 0 || __s.data() != nullptr, "string_view::find_first_of(): received nullptr");
     return std::__str_find_first_of<value_type, size_type, traits_type, npos>(
         data(), size(), __s.data(), __pos, __s.size());
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
   find_first_of(_CharT __c, size_type __pos = 0) const _NOEXCEPT {
     return find(__c, __pos);
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
-  find_first_of(const _CharT* __s, size_type __pos, size_type __n) const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
+  find_first_of(const _CharT* __s, size_type __pos, size_type __n) const _NOEXCEPT
+      _LIBCPP_DIAGNOSE_NULLPTR_IF(__n != 0 && __s == nullptr, " if n is not zero") {
     _LIBCPP_ASSERT_NON_NULL(__n == 0 || __s != nullptr, "string_view::find_first_of(): received nullptr");
     return std::__str_find_first_of<value_type, size_type, traits_type, npos>(data(), size(), __s, __pos, __n);
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
-  find_first_of(const _CharT* __s, size_type __pos = 0) const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
+  find_first_of(const _CharT* _LIBCPP_DIAGNOSE_NULLPTR __s, size_type __pos = 0) const _NOEXCEPT {
     _LIBCPP_ASSERT_NON_NULL(__s != nullptr, "string_view::find_first_of(): received nullptr");
     return std::__str_find_first_of<value_type, size_type, traits_type, npos>(
         data(), size(), __s, __pos, traits_type::length(__s));
   }
 
   // find_last_of
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
   find_last_of(basic_string_view __s, size_type __pos = npos) const _NOEXCEPT {
-    _LIBCPP_ASSERT_NON_NULL(__s.size() == 0 || __s.data() != nullptr, "string_view::find_last_of(): received nullptr");
     return std::__str_find_last_of<value_type, size_type, traits_type, npos>(
         data(), size(), __s.data(), __pos, __s.size());
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
   find_last_of(_CharT __c, size_type __pos = npos) const _NOEXCEPT {
     return rfind(__c, __pos);
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
-  find_last_of(const _CharT* __s, size_type __pos, size_type __n) const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
+  find_last_of(const _CharT* __s, size_type __pos, size_type __n) const _NOEXCEPT
+      _LIBCPP_DIAGNOSE_NULLPTR_IF(__n != 0 && __s == nullptr, " if n is not zero") {
     _LIBCPP_ASSERT_NON_NULL(__n == 0 || __s != nullptr, "string_view::find_last_of(): received nullptr");
     return std::__str_find_last_of<value_type, size_type, traits_type, npos>(data(), size(), __s, __pos, __n);
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
-  find_last_of(const _CharT* __s, size_type __pos = npos) const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
+  find_last_of(const _CharT* _LIBCPP_DIAGNOSE_NULLPTR __s, size_type __pos = npos) const _NOEXCEPT {
     _LIBCPP_ASSERT_NON_NULL(__s != nullptr, "string_view::find_last_of(): received nullptr");
     return std::__str_find_last_of<value_type, size_type, traits_type, npos>(
         data(), size(), __s, __pos, traits_type::length(__s));
   }
 
   // find_first_not_of
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
   find_first_not_of(basic_string_view __s, size_type __pos = 0) const _NOEXCEPT {
-    _LIBCPP_ASSERT_NON_NULL(
-        __s.size() == 0 || __s.data() != nullptr, "string_view::find_first_not_of(): received nullptr");
     return std::__str_find_first_not_of<value_type, size_type, traits_type, npos>(
         data(), size(), __s.data(), __pos, __s.size());
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
   find_first_not_of(_CharT __c, size_type __pos = 0) const _NOEXCEPT {
     return std::__str_find_first_not_of<value_type, size_type, traits_type, npos>(data(), size(), __c, __pos);
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
-  find_first_not_of(const _CharT* __s, size_type __pos, size_type __n) const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
+  find_first_not_of(const _CharT* __s, size_type __pos, size_type __n) const _NOEXCEPT
+      _LIBCPP_DIAGNOSE_NULLPTR_IF(__n != 0 && __s == nullptr, " if n is not zero") {
     _LIBCPP_ASSERT_NON_NULL(__n == 0 || __s != nullptr, "string_view::find_first_not_of(): received nullptr");
     return std::__str_find_first_not_of<value_type, size_type, traits_type, npos>(data(), size(), __s, __pos, __n);
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
-  find_first_not_of(const _CharT* __s, size_type __pos = 0) const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
+  find_first_not_of(const _CharT* _LIBCPP_DIAGNOSE_NULLPTR __s, size_type __pos = 0) const _NOEXCEPT {
     _LIBCPP_ASSERT_NON_NULL(__s != nullptr, "string_view::find_first_not_of(): received nullptr");
     return std::__str_find_first_not_of<value_type, size_type, traits_type, npos>(
         data(), size(), __s, __pos, traits_type::length(__s));
   }
 
   // find_last_not_of
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
   find_last_not_of(basic_string_view __s, size_type __pos = npos) const _NOEXCEPT {
-    _LIBCPP_ASSERT_NON_NULL(
-        __s.size() == 0 || __s.data() != nullptr, "string_view::find_last_not_of(): received nullptr");
     return std::__str_find_last_not_of<value_type, size_type, traits_type, npos>(
         data(), size(), __s.data(), __pos, __s.size());
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
   find_last_not_of(_CharT __c, size_type __pos = npos) const _NOEXCEPT {
     return std::__str_find_last_not_of<value_type, size_type, traits_type, npos>(data(), size(), __c, __pos);
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
-  find_last_not_of(const _CharT* __s, size_type __pos, size_type __n) const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
+  find_last_not_of(const _CharT* __s, size_type __pos, size_type __n) const _NOEXCEPT
+      _LIBCPP_DIAGNOSE_NULLPTR_IF(__n != 0 && __s == nullptr, " if n is not zero") {
     _LIBCPP_ASSERT_NON_NULL(__n == 0 || __s != nullptr, "string_view::find_last_not_of(): received nullptr");
     return std::__str_find_last_not_of<value_type, size_type, traits_type, npos>(data(), size(), __s, __pos, __n);
   }
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
-  find_last_not_of(const _CharT* __s, size_type __pos = npos) const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI size_type
+  find_last_not_of(const _CharT* _LIBCPP_DIAGNOSE_NULLPTR __s, size_type __pos = npos) const _NOEXCEPT {
     _LIBCPP_ASSERT_NON_NULL(__s != nullptr, "string_view::find_last_not_of(): received nullptr");
     return std::__str_find_last_not_of<value_type, size_type, traits_type, npos>(
         data(), size(), __s, __pos, traits_type::length(__s));
   }
 
 #  if _LIBCPP_STD_VER >= 20
-  constexpr _LIBCPP_HIDE_FROM_ABI bool starts_with(basic_string_view __s) const noexcept {
+  [[nodiscard]] constexpr _LIBCPP_HIDE_FROM_ABI bool starts_with(basic_string_view __s) const noexcept {
     return size() >= __s.size() && compare(0, __s.size(), __s) == 0;
   }
 
-  constexpr _LIBCPP_HIDE_FROM_ABI bool starts_with(value_type __c) const noexcept {
+  [[nodiscard]] constexpr _LIBCPP_HIDE_FROM_ABI bool starts_with(value_type __c) const noexcept {
     return !empty() && _Traits::eq(front(), __c);
   }
 
-  constexpr _LIBCPP_HIDE_FROM_ABI bool starts_with(const value_type* __s) const noexcept {
+  [[nodiscard]] constexpr _LIBCPP_HIDE_FROM_ABI bool
+  starts_with(const value_type* _LIBCPP_DIAGNOSE_NULLPTR __s) const noexcept {
     return starts_with(basic_string_view(__s));
   }
 
-  constexpr _LIBCPP_HIDE_FROM_ABI bool ends_with(basic_string_view __s) const noexcept {
+  [[nodiscard]] constexpr _LIBCPP_HIDE_FROM_ABI bool ends_with(basic_string_view __s) const noexcept {
     return size() >= __s.size() && compare(size() - __s.size(), npos, __s) == 0;
   }
 
-  constexpr _LIBCPP_HIDE_FROM_ABI bool ends_with(value_type __c) const noexcept {
+  [[nodiscard]] constexpr _LIBCPP_HIDE_FROM_ABI bool ends_with(value_type __c) const noexcept {
     return !empty() && _Traits::eq(back(), __c);
   }
 
-  constexpr _LIBCPP_HIDE_FROM_ABI bool ends_with(const value_type* __s) const noexcept {
+  [[nodiscard]] constexpr _LIBCPP_HIDE_FROM_ABI bool
+  ends_with(const value_type* _LIBCPP_DIAGNOSE_NULLPTR __s) const noexcept {
     return ends_with(basic_string_view(__s));
   }
 #  endif
 
 #  if _LIBCPP_STD_VER >= 23
-  constexpr _LIBCPP_HIDE_FROM_ABI bool contains(basic_string_view __sv) const noexcept { return find(__sv) != npos; }
+  [[nodiscard]] constexpr _LIBCPP_HIDE_FROM_ABI bool contains(basic_string_view __sv) const noexcept {
+    return find(__sv) != npos;
+  }
 
-  constexpr _LIBCPP_HIDE_FROM_ABI bool contains(value_type __c) const noexcept { return find(__c) != npos; }
+  [[nodiscard]] constexpr _LIBCPP_HIDE_FROM_ABI bool contains(value_type __c) const noexcept {
+    return find(__c) != npos;
+  }
 
-  constexpr _LIBCPP_HIDE_FROM_ABI bool contains(const value_type* __s) const { return find(__s) != npos; }
+  [[nodiscard]] constexpr _LIBCPP_HIDE_FROM_ABI bool contains(const value_type* _LIBCPP_DIAGNOSE_NULLPTR __s) const {
+    return find(__s) != npos;
+  }
 #  endif
 
 private:
@@ -886,7 +908,8 @@ operator<<(basic_ostream<_CharT, _Traits>& __os, basic_string_view<_CharT, _Trai
 // [string.view.hash]
 template <class _CharT>
 struct __string_view_hash : public __unary_function<basic_string_view<_CharT, char_traits<_CharT> >, size_t> {
-  _LIBCPP_HIDE_FROM_ABI size_t operator()(const basic_string_view<_CharT, char_traits<_CharT> > __val) const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t
+  operator()(const basic_string_view<_CharT, char_traits<_CharT> > __val) const _NOEXCEPT {
     return std::__do_string_hash(__val.data(), __val.data() + __val.size());
   }
 };
@@ -913,30 +936,31 @@ struct hash<basic_string_view<wchar_t, char_traits<wchar_t> > > : __string_view_
 #  if _LIBCPP_STD_VER >= 14
 inline namespace literals {
 inline namespace string_view_literals {
-inline _LIBCPP_HIDE_FROM_ABI constexpr basic_string_view<char> operator""sv(const char* __str, size_t __len) noexcept {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI constexpr basic_string_view<char>
+operator""sv(const char* __str, size_t __len) noexcept {
   return basic_string_view<char>(__str, __len);
 }
 
 #    if _LIBCPP_HAS_WIDE_CHARACTERS
-inline _LIBCPP_HIDE_FROM_ABI constexpr basic_string_view<wchar_t>
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI constexpr basic_string_view<wchar_t>
 operator""sv(const wchar_t* __str, size_t __len) noexcept {
   return basic_string_view<wchar_t>(__str, __len);
 }
 #    endif
 
 #    if _LIBCPP_HAS_CHAR8_T
-inline _LIBCPP_HIDE_FROM_ABI constexpr basic_string_view<char8_t>
+[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI constexpr basic_string_view<char8_t>
 operator""sv(const char8_t* __str, size_t __len) noexcept {
   return basic_string_view<char8_t>(__str, __len);
 }
 #    endif
 
-inline _LIBCPP_HIDE_FROM_ABI constexpr basic_string_view<char16_t>
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI constexpr basic_string_view<char16_t>
 operator""sv(const char16_t* __str, size_t __len) noexcept {
   return basic_string_view<char16_t>(__str, __len);
 }
 
-inline _LIBCPP_HIDE_FROM_ABI constexpr basic_string_view<char32_t>
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI constexpr basic_string_view<char32_t>
 operator""sv(const char32_t* __str, size_t __len) noexcept {
   return basic_string_view<char32_t>(__str, __len);
 }
diff --git a/lib/libcxx/include/strstream b/lib/libcxx/include/strstream
index 1a17f8389c..b33977ff66 100644
--- a/lib/libcxx/include/strstream
+++ b/lib/libcxx/include/strstream
@@ -179,8 +179,8 @@ public:
   void swap(strstreambuf& __rhs);
 
   void freeze(bool __freezefl = true);
-  char* str();
-  int pcount() const;
+  [[__nodiscard__]] char* str();
+  [[__nodiscard__]] int pcount() const;
 
 protected:
   int_type overflow(int_type __c = EOF) override;
@@ -264,8 +264,8 @@ public:
     __sb_.swap(__rhs.__sb_);
   }
 
-  _LIBCPP_HIDE_FROM_ABI strstreambuf* rdbuf() const { return const_cast<strstreambuf*>(&__sb_); }
-  _LIBCPP_HIDE_FROM_ABI char* str() { return __sb_.str(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI strstreambuf* rdbuf() const { return const_cast<strstreambuf*>(&__sb_); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI char* str() { return __sb_.str(); }
 
 private:
   strstreambuf __sb_;
@@ -297,10 +297,10 @@ public:
     __sb_.swap(__rhs.__sb_);
   }
 
-  _LIBCPP_HIDE_FROM_ABI strstreambuf* rdbuf() const { return const_cast<strstreambuf*>(&__sb_); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI strstreambuf* rdbuf() const { return const_cast<strstreambuf*>(&__sb_); }
   _LIBCPP_HIDE_FROM_ABI void freeze(bool __freezefl = true) { __sb_.freeze(__freezefl); }
-  _LIBCPP_HIDE_FROM_ABI char* str() { return __sb_.str(); }
-  _LIBCPP_HIDE_FROM_ABI int pcount() const { return __sb_.pcount(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI char* str() { return __sb_.str(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI int pcount() const { return __sb_.pcount(); }
 
 private:
   strstreambuf __sb_; // exposition only
@@ -340,10 +340,10 @@ public:
   }
 
   // Members:
-  _LIBCPP_HIDE_FROM_ABI strstreambuf* rdbuf() const { return const_cast<strstreambuf*>(&__sb_); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI strstreambuf* rdbuf() const { return const_cast<strstreambuf*>(&__sb_); }
   _LIBCPP_HIDE_FROM_ABI void freeze(bool __freezefl = true) { __sb_.freeze(__freezefl); }
-  _LIBCPP_HIDE_FROM_ABI int pcount() const { return __sb_.pcount(); }
-  _LIBCPP_HIDE_FROM_ABI char* str() { return __sb_.str(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI int pcount() const { return __sb_.pcount(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI char* str() { return __sb_.str(); }
 
 private:
   strstreambuf __sb_; // exposition only
diff --git a/lib/libcxx/include/syncstream b/lib/libcxx/include/syncstream
index 1f7605e06a..bd64453683 100644
--- a/lib/libcxx/include/syncstream
+++ b/lib/libcxx/include/syncstream
@@ -317,9 +317,9 @@ public:
 
   _LIBCPP_HIDE_FROM_ABI bool emit() { return emit(false); }
 
-  _LIBCPP_HIDE_FROM_ABI streambuf_type* get_wrapped() const noexcept { return __wrapped_; }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI streambuf_type* get_wrapped() const noexcept { return __wrapped_; }
 
-  _LIBCPP_HIDE_FROM_ABI allocator_type get_allocator() const noexcept { return __str_.get_allocator(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI allocator_type get_allocator() const noexcept { return __str_.get_allocator(); }
 
   _LIBCPP_HIDE_FROM_ABI void set_emit_on_sync(bool __b) noexcept { __emit_on_sync_ = __b; }
 
@@ -496,9 +496,9 @@ public:
     }
   }
 
-  _LIBCPP_HIDE_FROM_ABI streambuf_type* get_wrapped() const noexcept { return __sb_.get_wrapped(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI streambuf_type* get_wrapped() const noexcept { return __sb_.get_wrapped(); }
 
-  _LIBCPP_HIDE_FROM_ABI syncbuf_type* rdbuf() const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI syncbuf_type* rdbuf() const noexcept {
     return const_cast<syncbuf_type*>(std::addressof(__sb_));
   }
 
diff --git a/lib/libcxx/include/tgmath.h b/lib/libcxx/include/tgmath.h
index 3f8f14fd57..6ed7a2bd68 100644
--- a/lib/libcxx/include/tgmath.h
+++ b/lib/libcxx/include/tgmath.h
@@ -18,22 +18,22 @@
 */
 
 #if defined(__cplusplus) && __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)
-#  include <__cxx03/tgmath.h>
+#  include <__cxx03/__config>
 #else
 #  include <__config>
+#endif
 
-#  if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
-#    pragma GCC system_header
-#  endif
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
 
-#  ifdef __cplusplus
-#    include <cmath>
-#    include <complex>
-#  else
-#    if __has_include_next(<tgmath.h>)
-#      include_next <tgmath.h>
-#    endif
+#ifdef __cplusplus
+#  include <cmath>
+#  include <complex>
+#else
+#  if __has_include_next(<tgmath.h>)
+#    include_next <tgmath.h>
 #  endif
-#endif // defined(__cplusplus) && __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)
+#endif
 
 #endif // _LIBCPP_TGMATH_H
diff --git a/lib/libcxx/include/tuple b/lib/libcxx/include/tuple
index 75021f0ea5..e02e45f425 100644
--- a/lib/libcxx/include/tuple
+++ b/lib/libcxx/include/tuple
@@ -106,6 +106,11 @@ public:
 
     void swap(tuple&) noexcept(AND(swap(declval<T&>(), declval<T&>())...));               // constexpr in C++20
     constexpr void swap(const tuple&) const noexcept(see-below);                          // C++23
+
+    template<tuple-like UTuple>
+        friend constexpr bool operator==(const tuple& t, const UTuple& u);                // C++23
+    template<tuple-like UTuple>
+        friend constexpr auto operator<=>(const tuple& t, const UTuple& u);               // C++23
 };
 
 
@@ -220,19 +225,16 @@ template <class... Types>
 #  include <__config>
 #  include <__cstddef/size_t.h>
 #  include <__fwd/array.h>
+#  include <__fwd/get.h>
 #  include <__fwd/pair.h>
 #  include <__fwd/tuple.h>
 #  include <__memory/allocator_arg_t.h>
 #  include <__memory/uses_allocator.h>
 #  include <__tuple/find_index.h>
 #  include <__tuple/ignore.h>
-#  include <__tuple/make_tuple_types.h>
-#  include <__tuple/sfinae_helpers.h>
 #  include <__tuple/tuple_element.h>
-#  include <__tuple/tuple_indices.h>
-#  include <__tuple/tuple_like_ext.h>
+#  include <__tuple/tuple_like.h>
 #  include <__tuple/tuple_size.h>
-#  include <__tuple/tuple_types.h>
 #  include <__type_traits/common_reference.h>
 #  include <__type_traits/common_type.h>
 #  include <__type_traits/conditional.h>
@@ -241,7 +243,6 @@ template <class... Types>
 #  include <__type_traits/disjunction.h>
 #  include <__type_traits/enable_if.h>
 #  include <__type_traits/invoke.h>
-#  include <__type_traits/is_arithmetic.h>
 #  include <__type_traits/is_assignable.h>
 #  include <__type_traits/is_constructible.h>
 #  include <__type_traits/is_convertible.h>
@@ -251,7 +252,6 @@ template <class... Types>
 #  include <__type_traits/is_nothrow_assignable.h>
 #  include <__type_traits/is_nothrow_constructible.h>
 #  include <__type_traits/is_reference.h>
-#  include <__type_traits/is_replaceable.h>
 #  include <__type_traits/is_same.h>
 #  include <__type_traits/is_swappable.h>
 #  include <__type_traits/is_trivially_relocatable.h>
@@ -263,12 +263,12 @@ template <class... Types>
 #  include <__type_traits/remove_cv.h>
 #  include <__type_traits/remove_cvref.h>
 #  include <__type_traits/remove_reference.h>
+#  include <__type_traits/type_list.h>
 #  include <__type_traits/unwrap_ref.h>
 #  include <__utility/declval.h>
 #  include <__utility/forward.h>
 #  include <__utility/integer_sequence.h>
 #  include <__utility/move.h>
-#  include <__utility/piecewise_construct.h>
 #  include <__utility/swap.h>
 #  include <version>
 
@@ -288,9 +288,65 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 #  ifndef _LIBCPP_CXX03_LANG
 
+template <size_t _Ip, class _Tp, class _Up>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 bool __tuple_compare_equal(const _Tp& __x, const _Up& __y) {
+  if constexpr (_Ip == 0)
+    return true;
+  else
+    return std::__tuple_compare_equal<_Ip - 1>(__x, __y) && std::get<_Ip - 1>(__x) == std::get<_Ip - 1>(__y);
+}
+
+#    if _LIBCPP_STD_VER >= 26
+template <class _Tp, class _Up, class _IndexSeq = make_index_sequence<tuple_size_v<_Tp>>>
+inline constexpr bool __can_tuple_compare_equal = false;
+
+// TODO(LLVM 23): Remove `tuple_size_v<_Tp> == tuple_size_v<_Up>` here once once LLVM-20 support ends
+// because the resolution of CWG2369 landed in LLVM-21.
+template <class _Tp, class _Up, size_t... _Is>
+  requires(tuple_size_v<_Tp> == tuple_size_v<_Up>)
+inline constexpr bool __can_tuple_compare_equal<_Tp, _Up, index_sequence<_Is...>> =
+    __all<requires(const tuple_element_t<_Is, _Tp>& __t, const tuple_element_t<_Is, _Up>& __u) {
+      { __t == __u } -> __boolean_testable;
+    }...>::value;
+#    endif // _LIBCPP_STD_VER >= 26
+
+#    if _LIBCPP_STD_VER >= 20
+template <class _Ret, class _Tp, class _Up, size_t... _Is>
+_LIBCPP_HIDE_FROM_ABI constexpr _Ret __tuple_compare_three_way(const _Tp& __x, const _Up& __y, index_sequence<_Is...>) {
+  _Ret __result = strong_ordering::equal;
+  static_cast<void>(
+      ((__result = std::__synth_three_way(std::get<_Is>(__x), std::get<_Is>(__y)), __result != 0) || ...));
+  return __result;
+}
+#    endif // _LIBCPP_STD_VER >= 20
+
+#    if _LIBCPP_STD_VER >= 23
+template <class _Tp>
+concept __tuple_like_no_tuple = __tuple_like<_Tp> && !__is_tuple_v<_Tp>;
+
+template <class _Tp, class _Up, class _IndexSeq>
+struct __tuple_common_comparison_category_impl {};
+
+// TODO(LLVM 23): Remove `tuple_size_v<_Tp> == tuple_size_v<_Up>` here once once LLVM-20 support ends
+// because the resolution of CWG2369 landed in LLVM-21.
+template <class _Tp, class _Up, size_t... _Is>
+  requires(tuple_size_v<_Tp> == tuple_size_v<_Up>) && requires {
+    typename common_comparison_category_t<
+        __synth_three_way_result<tuple_element_t<_Is, _Tp>, tuple_element_t<_Is, _Up>>...>;
+  }
+struct __tuple_common_comparison_category_impl<_Tp, _Up, index_sequence<_Is...>> {
+  using type _LIBCPP_NODEBUG =
+      common_comparison_category_t<__synth_three_way_result<tuple_element_t<_Is, _Tp>, tuple_element_t<_Is, _Up>>...>;
+};
+
+template <__tuple_like _Tp, __tuple_like _Up>
+using __tuple_common_comparison_category _LIBCPP_NODEBUG =
+    __tuple_common_comparison_category_impl<_Tp, _Up, make_index_sequence<tuple_size_v<_Tp>>>::type;
+#    endif // _LIBCPP_STD_VER >= 23
+
 // __tuple_leaf
 
-template <size_t _Ip, class _Hp, bool = is_empty<_Hp>::value && !__libcpp_is_final<_Hp>::value >
+template <size_t _Ip, class _Hp, bool = is_empty<_Hp>::value && !__is_final_v<_Hp> >
 class __tuple_leaf;
 
 template <size_t _Ip, class _Hp, bool _Ep>
@@ -444,62 +500,52 @@ public:
 template <class... _Tp>
 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void __swallow(_Tp&&...) _NOEXCEPT {}
 
-template <class _Tp>
-struct __all_default_constructible;
-
-template <class... _Tp>
-struct __all_default_constructible<__tuple_types<_Tp...>> : __all<is_default_constructible<_Tp>::value...> {};
-
 // __tuple_impl
 
 template <class _Indx, class... _Tp>
 struct __tuple_impl;
 
+struct __forward_args {};
+struct __value_init {};
+struct __from_tuple {};
+
 template <size_t... _Indx, class... _Tp>
 struct _LIBCPP_DECLSPEC_EMPTY_BASES
-    __tuple_impl<__tuple_indices<_Indx...>, _Tp...> : public __tuple_leaf<_Indx, _Tp>... {
+    __tuple_impl<__index_sequence<_Indx...>, _Tp...> : public __tuple_leaf<_Indx, _Tp>... {
   _LIBCPP_HIDE_FROM_ABI constexpr __tuple_impl() noexcept(
       __all<is_nothrow_default_constructible<_Tp>::value...>::value) {}
 
-  template <size_t... _Uf, class... _Tf, size_t... _Ul, class... _Tl, class... _Up>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 explicit __tuple_impl(
-      __tuple_indices<_Uf...>,
-      __tuple_types<_Tf...>,
-      __tuple_indices<_Ul...>,
-      __tuple_types<_Tl...>,
-      _Up&&... __u) noexcept(__all<is_nothrow_constructible<_Tf, _Up>::value...>::value &&
-                             __all<is_nothrow_default_constructible<_Tl>::value...>::value)
-      : __tuple_leaf<_Uf, _Tf>(std::forward<_Up>(__u))..., __tuple_leaf<_Ul, _Tl>()... {}
+  template <class... _Args>
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 explicit __tuple_impl(__forward_args, _Args&&... __args)
+      : __tuple_leaf<_Indx, _Tp>(std::forward<_Args>(__args))... {}
 
-  template <class _Alloc, size_t... _Uf, class... _Tf, size_t... _Ul, class... _Tl, class... _Up>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 explicit __tuple_impl(
-      allocator_arg_t,
-      const _Alloc& __a,
-      __tuple_indices<_Uf...>,
-      __tuple_types<_Tf...>,
-      __tuple_indices<_Ul...>,
-      __tuple_types<_Tl...>,
-      _Up&&... __u)
-      : __tuple_leaf<_Uf, _Tf>(__uses_alloc_ctor<_Tf, _Alloc, _Up>(), __a, std::forward<_Up>(__u))...,
-        __tuple_leaf<_Ul, _Tl>(__uses_alloc_ctor<_Tl, _Alloc>(), __a)... {}
+  template <class _Alloc>
+  _LIBCPP_HIDE_FROM_ABI
+  _LIBCPP_CONSTEXPR_SINCE_CXX14 explicit __tuple_impl(allocator_arg_t, const _Alloc& __alloc, __value_init)
+      : __tuple_leaf<_Indx, _Tp>(__uses_alloc_ctor<_Tp, _Alloc>(), __alloc)... {}
 
-  template <class _Tuple, __enable_if_t<__tuple_constructible<_Tuple, tuple<_Tp...> >::value, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 __tuple_impl(_Tuple&& __t) noexcept(
-      (__all<is_nothrow_constructible<
-           _Tp,
-           typename tuple_element<_Indx, typename __make_tuple_types<_Tuple>::type>::type>::value...>::value))
+  template <class _Alloc, class... _Args>
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 explicit __tuple_impl(
+      allocator_arg_t, const _Alloc& __alloc, __forward_args, _Args&&... __args)
+      : __tuple_leaf<_Indx, _Tp>(__uses_alloc_ctor<_Tp, _Alloc, _Args>(), __alloc, std::forward<_Args>(__args))... {}
+
+  template <class _Tuple>
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 __tuple_impl(__from_tuple, _Tuple&& __t) noexcept(
+      (__all<is_nothrow_constructible<_Tp, __copy_cvref_t<_Tuple, __tuple_element_t<_Indx, __remove_cvref_t<_Tuple>>>>::
+                 value...>::value))
       : __tuple_leaf<_Indx, _Tp>(
-            std::forward<typename tuple_element<_Indx, typename __make_tuple_types<_Tuple>::type>::type>(
+            std::forward<__copy_cvref_t<_Tuple, __tuple_element_t<_Indx, __remove_cvref_t<_Tuple>>>>(
                 std::get<_Indx>(__t)))... {}
 
-  template <class _Alloc, class _Tuple, __enable_if_t<__tuple_constructible<_Tuple, tuple<_Tp...> >::value, int> = 0>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 __tuple_impl(allocator_arg_t, const _Alloc& __a, _Tuple&& __t)
+  template <class _Alloc, class _Tuple>
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14
+  __tuple_impl(allocator_arg_t, const _Alloc& __a, __from_tuple, _Tuple&& __t)
       : __tuple_leaf<_Indx, _Tp>(
             __uses_alloc_ctor<_Tp,
                               _Alloc,
-                              typename tuple_element<_Indx, typename __make_tuple_types<_Tuple>::type>::type>(),
+                              __copy_cvref_t<_Tuple, __tuple_element_t<_Indx, __remove_cvref_t<_Tuple>>>>(),
             __a,
-            std::forward<typename tuple_element<_Indx, typename __make_tuple_types<_Tuple>::type>::type>(
+            std::forward<__copy_cvref_t<_Tuple, __tuple_element_t<_Indx, __remove_cvref_t<_Tuple>>>>(
                 std::get<_Indx>(__t)))... {}
 
   __tuple_impl(const __tuple_impl&) = default;
@@ -518,19 +564,19 @@ struct _LIBCPP_DECLSPEC_EMPTY_BASES
 
 template <class _Dest, class _Source, size_t... _Np>
 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void
-__memberwise_copy_assign(_Dest& __dest, _Source const& __source, __tuple_indices<_Np...>) {
+__memberwise_copy_assign(_Dest& __dest, _Source const& __source, __index_sequence<_Np...>) {
   std::__swallow(((std::get<_Np>(__dest) = std::get<_Np>(__source)), void(), 0)...);
 }
 
 template <class _Dest, class _Source, class... _Up, size_t... _Np>
 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void
-__memberwise_forward_assign(_Dest& __dest, _Source&& __source, __tuple_types<_Up...>, __tuple_indices<_Np...>) {
+__memberwise_forward_assign(_Dest& __dest, _Source&& __source, __type_list<_Up...>, __index_sequence<_Np...>) {
   std::__swallow(((std::get<_Np>(__dest) = std::forward<_Up>(std::get<_Np>(__source))), void(), 0)...);
 }
 
 template <class... _Tp>
 class _LIBCPP_NO_SPECIALIZATIONS tuple {
-  typedef __tuple_impl<typename __make_tuple_indices<sizeof...(_Tp)>::type, _Tp...> _BaseT;
+  typedef __tuple_impl<__index_sequence_for<_Tp...>, _Tp...> _BaseT;
 
   _BaseT __base_;
 
@@ -549,7 +595,6 @@ class _LIBCPP_NO_SPECIALIZATIONS tuple {
 public:
   using __trivially_relocatable _LIBCPP_NODEBUG =
       __conditional_t<_And<__libcpp_is_trivially_relocatable<_Tp>...>::value, tuple, void>;
-  using __replaceable _LIBCPP_NODEBUG = __conditional_t<_And<__is_replaceable<_Tp>...>::value, tuple, void>;
 
   // [tuple.cnstr]
 
@@ -566,12 +611,7 @@ public:
             __enable_if_t< _And< _IsDefault<_Tp>... >::value, int> = 0>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 explicit(_Not<_Lazy<_And, _IsImpDefault<_Tp>...> >::value)
       tuple(allocator_arg_t, _Alloc const& __a)
-      : __base_(allocator_arg_t(),
-                __a,
-                __tuple_indices<>(),
-                __tuple_types<>(),
-                typename __make_tuple_indices<sizeof...(_Tp), 0>::type(),
-                __tuple_types<_Tp...>()) {}
+      : __base_(allocator_arg_t(), __a, __value_init{}) {}
 
   // tuple(const T&...) constructors (including allocator_arg_t variants)
   template <template <class...> class _And = _And,
@@ -579,11 +619,7 @@ public:
   _LIBCPP_HIDE_FROM_ABI
   _LIBCPP_CONSTEXPR_SINCE_CXX14 explicit(_Not<_Lazy<_And, is_convertible<const _Tp&, _Tp>...> >::value)
       tuple(const _Tp&... __t) noexcept(_And<is_nothrow_copy_constructible<_Tp>...>::value)
-      : __base_(typename __make_tuple_indices<sizeof...(_Tp)>::type(),
-                typename __make_tuple_types<tuple, sizeof...(_Tp)>::type(),
-                typename __make_tuple_indices<0>::type(),
-                typename __make_tuple_types<tuple, 0>::type(),
-                __t...) {}
+      : __base_(__forward_args{}, __t...) {}
 
   template <class _Alloc,
             template <class...> class _And = _And,
@@ -591,13 +627,7 @@ public:
   _LIBCPP_HIDE_FROM_ABI
   _LIBCPP_CONSTEXPR_SINCE_CXX20 explicit(_Not<_Lazy<_And, is_convertible<const _Tp&, _Tp>...> >::value)
       tuple(allocator_arg_t, const _Alloc& __a, const _Tp&... __t)
-      : __base_(allocator_arg_t(),
-                __a,
-                typename __make_tuple_indices<sizeof...(_Tp)>::type(),
-                typename __make_tuple_types<tuple, sizeof...(_Tp)>::type(),
-                typename __make_tuple_indices<0>::type(),
-                typename __make_tuple_types<tuple, 0>::type(),
-                __t...) {}
+      : __base_(allocator_arg_t(), __a, __forward_args{}, __t...) {}
 
   // tuple(U&& ...) constructors (including allocator_arg_t variants)
   template <class... _Up>
@@ -616,11 +646,7 @@ public:
                            int> = 0>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 explicit(_Not<_Lazy<_And, is_convertible<_Up, _Tp>...> >::value)
       tuple(_Up&&... __u) noexcept(_And<is_nothrow_constructible<_Tp, _Up>...>::value)
-      : __base_(typename __make_tuple_indices<sizeof...(_Up)>::type(),
-                typename __make_tuple_types<tuple, sizeof...(_Up)>::type(),
-                typename __make_tuple_indices<sizeof...(_Tp), sizeof...(_Up)>::type(),
-                typename __make_tuple_types<tuple, sizeof...(_Tp), sizeof...(_Up)>::type(),
-                std::forward<_Up>(__u)...) {}
+      : __base_(__forward_args{}, std::forward<_Up>(__u)...) {}
 
   template <class _Alloc,
             class... _Up,
@@ -628,13 +654,7 @@ public:
                            int> = 0>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 explicit(_Not<_Lazy<_And, is_convertible<_Up, _Tp>...> >::value)
       tuple(allocator_arg_t, const _Alloc& __a, _Up&&... __u)
-      : __base_(allocator_arg_t(),
-                __a,
-                typename __make_tuple_indices<sizeof...(_Up)>::type(),
-                typename __make_tuple_types<tuple, sizeof...(_Up)>::type(),
-                typename __make_tuple_indices<sizeof...(_Tp), sizeof...(_Up)>::type(),
-                typename __make_tuple_types<tuple, sizeof...(_Tp), sizeof...(_Up)>::type(),
-                std::forward<_Up>(__u)...) {}
+      : __base_(allocator_arg_t(), __a, __forward_args{}, std::forward<_Up>(__u)...) {}
 
   // Copy and move constructors (including the allocator_arg_t variants)
   tuple(const tuple&) = default;
@@ -644,13 +664,13 @@ public:
             template <class...> class _And                                  = _And,
             __enable_if_t< _And<is_copy_constructible<_Tp>...>::value, int> = 0>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 tuple(allocator_arg_t, const _Alloc& __alloc, const tuple& __t)
-      : __base_(allocator_arg_t(), __alloc, __t) {}
+      : __base_(allocator_arg_t(), __alloc, __from_tuple(), __t) {}
 
   template <class _Alloc,
             template <class...> class _And                                  = _And,
             __enable_if_t< _And<is_move_constructible<_Tp>...>::value, int> = 0>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 tuple(allocator_arg_t, const _Alloc& __alloc, tuple&& __t)
-      : __base_(allocator_arg_t(), __alloc, std::move(__t)) {}
+      : __base_(allocator_arg_t(), __alloc, __from_tuple(), std::move(__t)) {}
 
   // tuple(const tuple<U...>&) constructors (including allocator_arg_t variants)
 
@@ -683,7 +703,7 @@ public:
   _LIBCPP_HIDE_FROM_ABI
   _LIBCPP_CONSTEXPR_SINCE_CXX14 explicit(_Not<_Lazy<_And, is_convertible<const _Up&, _Tp>...> >::value)
       tuple(const tuple<_Up...>& __t) noexcept(_And<is_nothrow_constructible<_Tp, const _Up&>...>::value)
-      : __base_(__t) {}
+      : __base_(__from_tuple(), __t) {}
 
   template <class... _Up,
             class _Alloc,
@@ -691,33 +711,33 @@ public:
   _LIBCPP_HIDE_FROM_ABI
   _LIBCPP_CONSTEXPR_SINCE_CXX20 explicit(_Not<_Lazy<_And, is_convertible<const _Up&, _Tp>...> >::value)
       tuple(allocator_arg_t, const _Alloc& __a, const tuple<_Up...>& __t)
-      : __base_(allocator_arg_t(), __a, __t) {}
+      : __base_(allocator_arg_t(), __a, __from_tuple(), __t) {}
 
 #    if _LIBCPP_STD_VER >= 23
   // tuple(tuple<U...>&) constructors (including allocator_arg_t variants)
 
   template <class... _Up, enable_if_t< _EnableCtorFromUTypesTuple<tuple<_Up...>&>::value>* = nullptr>
   _LIBCPP_HIDE_FROM_ABI constexpr explicit(!_Lazy<_And, is_convertible<_Up&, _Tp>...>::value) tuple(tuple<_Up...>& __t)
-      : __base_(__t) {}
+      : __base_(__from_tuple(), __t) {}
 
   template <class _Alloc, class... _Up, enable_if_t< _EnableCtorFromUTypesTuple<tuple<_Up...>&>::value>* = nullptr>
   _LIBCPP_HIDE_FROM_ABI constexpr explicit(!_Lazy<_And, is_convertible<_Up&, _Tp>...>::value)
       tuple(allocator_arg_t, const _Alloc& __alloc, tuple<_Up...>& __t)
-      : __base_(allocator_arg_t(), __alloc, __t) {}
+      : __base_(allocator_arg_t(), __alloc, __from_tuple(), __t) {}
 #    endif // _LIBCPP_STD_VER >= 23
 
   // tuple(tuple<U...>&&) constructors (including allocator_arg_t variants)
   template <class... _Up, __enable_if_t< _And< _EnableCtorFromUTypesTuple<tuple<_Up...>&&> >::value, int> = 0>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 explicit(_Not<_Lazy<_And, is_convertible<_Up, _Tp>...> >::value)
       tuple(tuple<_Up...>&& __t) noexcept(_And<is_nothrow_constructible<_Tp, _Up>...>::value)
-      : __base_(std::move(__t)) {}
+      : __base_(__from_tuple(), std::move(__t)) {}
 
   template <class _Alloc,
             class... _Up,
             __enable_if_t< _And< _EnableCtorFromUTypesTuple<tuple<_Up...>&&> >::value, int> = 0>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 explicit(_Not<_Lazy<_And, is_convertible<_Up, _Tp>...> >::value)
       tuple(allocator_arg_t, const _Alloc& __a, tuple<_Up...>&& __t)
-      : __base_(allocator_arg_t(), __a, std::move(__t)) {}
+      : __base_(allocator_arg_t(), __a, __from_tuple(), std::move(__t)) {}
 
 #    if _LIBCPP_STD_VER >= 23
   // tuple(const tuple<U...>&&) constructors (including allocator_arg_t variants)
@@ -725,14 +745,14 @@ public:
   template <class... _Up, enable_if_t< _EnableCtorFromUTypesTuple<const tuple<_Up...>&&>::value>* = nullptr>
   _LIBCPP_HIDE_FROM_ABI constexpr explicit(!_Lazy<_And, is_convertible<const _Up&&, _Tp>...>::value)
       tuple(const tuple<_Up...>&& __t)
-      : __base_(std::move(__t)) {}
+      : __base_(__from_tuple(), std::move(__t)) {}
 
   template <class _Alloc,
             class... _Up,
             enable_if_t< _EnableCtorFromUTypesTuple<const tuple<_Up...>&&>::value>* = nullptr>
   _LIBCPP_HIDE_FROM_ABI constexpr explicit(!_Lazy<_And, is_convertible<const _Up&&, _Tp>...>::value)
       tuple(allocator_arg_t, const _Alloc& __alloc, const tuple<_Up...>&& __t)
-      : __base_(allocator_arg_t(), __alloc, std::move(__t)) {}
+      : __base_(allocator_arg_t(), __alloc, __from_tuple(), std::move(__t)) {}
 #    endif // _LIBCPP_STD_VER >= 23
 
   // tuple(const pair<U1, U2>&) constructors (including allocator_arg_t variants)
@@ -767,7 +787,7 @@ public:
   _LIBCPP_HIDE_FROM_ABI
   _LIBCPP_CONSTEXPR_SINCE_CXX14 explicit(_Not<_BothImplicitlyConvertible<const pair<_Up1, _Up2>&> >::value)
       tuple(const pair<_Up1, _Up2>& __p) noexcept(_NothrowConstructibleFromPair<const pair<_Up1, _Up2>&>::value)
-      : __base_(__p) {}
+      : __base_(__from_tuple(), __p) {}
 
   template <class _Alloc,
             class _Up1,
@@ -777,7 +797,7 @@ public:
   _LIBCPP_HIDE_FROM_ABI
   _LIBCPP_CONSTEXPR_SINCE_CXX20 explicit(_Not<_BothImplicitlyConvertible<const pair<_Up1, _Up2>&> >::value)
       tuple(allocator_arg_t, const _Alloc& __a, const pair<_Up1, _Up2>& __p)
-      : __base_(allocator_arg_t(), __a, __p) {}
+      : __base_(allocator_arg_t(), __a, __from_tuple(), __p) {}
 
 #    if _LIBCPP_STD_VER >= 23
   // tuple(pair<U1, U2>&) constructors (including allocator_arg_t variants)
@@ -785,7 +805,7 @@ public:
   template <class _U1, class _U2, enable_if_t< _EnableCtorFromPair<pair<_U1, _U2>&>::value>* = nullptr>
   _LIBCPP_HIDE_FROM_ABI constexpr explicit(!_BothImplicitlyConvertible<pair<_U1, _U2>&>::value)
       tuple(pair<_U1, _U2>& __p)
-      : __base_(__p) {}
+      : __base_(__from_tuple(), __p) {}
 
   template <class _Alloc,
             class _U1,
@@ -793,7 +813,7 @@ public:
             enable_if_t< _EnableCtorFromPair<std::pair<_U1, _U2>&>::value>* = nullptr>
   _LIBCPP_HIDE_FROM_ABI constexpr explicit(!_BothImplicitlyConvertible<pair<_U1, _U2>&>::value)
       tuple(allocator_arg_t, const _Alloc& __alloc, pair<_U1, _U2>& __p)
-      : __base_(allocator_arg_t(), __alloc, __p) {}
+      : __base_(allocator_arg_t(), __alloc, __from_tuple(), __p) {}
 #    endif
 
   // tuple(pair<U1, U2>&&) constructors (including allocator_arg_t variants)
@@ -805,7 +825,7 @@ public:
   _LIBCPP_HIDE_FROM_ABI
   _LIBCPP_CONSTEXPR_SINCE_CXX14 explicit(_Not<_BothImplicitlyConvertible<pair<_Up1, _Up2>&&> >::value)
       tuple(pair<_Up1, _Up2>&& __p) noexcept(_NothrowConstructibleFromPair<pair<_Up1, _Up2>&&>::value)
-      : __base_(std::move(__p)) {}
+      : __base_(__from_tuple(), std::move(__p)) {}
 
   template <class _Alloc,
             class _Up1,
@@ -815,7 +835,7 @@ public:
   _LIBCPP_HIDE_FROM_ABI
   _LIBCPP_CONSTEXPR_SINCE_CXX20 explicit(_Not<_BothImplicitlyConvertible<pair<_Up1, _Up2>&&> >::value)
       tuple(allocator_arg_t, const _Alloc& __a, pair<_Up1, _Up2>&& __p)
-      : __base_(allocator_arg_t(), __a, std::move(__p)) {}
+      : __base_(allocator_arg_t(), __a, __from_tuple(), std::move(__p)) {}
 
 #    if _LIBCPP_STD_VER >= 23
   // tuple(const pair<U1, U2>&&) constructors (including allocator_arg_t variants)
@@ -823,7 +843,7 @@ public:
   template <class _U1, class _U2, enable_if_t< _EnableCtorFromPair<const pair<_U1, _U2>&&>::value>* = nullptr>
   _LIBCPP_HIDE_FROM_ABI constexpr explicit(!_BothImplicitlyConvertible<const pair<_U1, _U2>&&>::value)
       tuple(const pair<_U1, _U2>&& __p)
-      : __base_(std::move(__p)) {}
+      : __base_(__from_tuple(), std::move(__p)) {}
 
   template <class _Alloc,
             class _U1,
@@ -831,14 +851,14 @@ public:
             enable_if_t< _EnableCtorFromPair<const pair<_U1, _U2>&&>::value>* = nullptr>
   _LIBCPP_HIDE_FROM_ABI constexpr explicit(!_BothImplicitlyConvertible<const pair<_U1, _U2>&&>::value)
       tuple(allocator_arg_t, const _Alloc& __alloc, const pair<_U1, _U2>&& __p)
-      : __base_(allocator_arg_t(), __alloc, std::move(__p)) {}
+      : __base_(allocator_arg_t(), __alloc, __from_tuple(), std::move(__p)) {}
 #    endif // _LIBCPP_STD_VER >= 23
 
   // [tuple.assign]
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 tuple&
   operator=(_If<_And<is_copy_assignable<_Tp>...>::value, tuple, __nat> const& __tuple) noexcept(
       _And<is_nothrow_copy_assignable<_Tp>...>::value) {
-    std::__memberwise_copy_assign(*this, __tuple, typename __make_tuple_indices<sizeof...(_Tp)>::type());
+    std::__memberwise_copy_assign(*this, __tuple, __index_sequence_for<_Tp...>());
     return *this;
   }
 
@@ -846,15 +866,14 @@ public:
   _LIBCPP_HIDE_FROM_ABI constexpr const tuple& operator=(tuple const& __tuple) const
     requires(_And<is_copy_assignable<const _Tp>...>::value)
   {
-    std::__memberwise_copy_assign(*this, __tuple, typename __make_tuple_indices<sizeof...(_Tp)>::type());
+    std::__memberwise_copy_assign(*this, __tuple, __index_sequence_for<_Tp...>());
     return *this;
   }
 
   _LIBCPP_HIDE_FROM_ABI constexpr const tuple& operator=(tuple&& __tuple) const
     requires(_And<is_assignable<const _Tp&, _Tp>...>::value)
   {
-    std::__memberwise_forward_assign(
-        *this, std::move(__tuple), __tuple_types<_Tp...>(), typename __make_tuple_indices<sizeof...(_Tp)>::type());
+    std::__memberwise_forward_assign(*this, std::move(__tuple), __type_list<_Tp...>(), __index_sequence_for<_Tp...>());
     return *this;
   }
 #    endif // _LIBCPP_STD_VER >= 23
@@ -862,8 +881,7 @@ public:
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 tuple&
   operator=(_If<_And<is_move_assignable<_Tp>...>::value, tuple, __nat>&& __tuple) noexcept(
       _And<is_nothrow_move_assignable<_Tp>...>::value) {
-    std::__memberwise_forward_assign(
-        *this, std::move(__tuple), __tuple_types<_Tp...>(), typename __make_tuple_indices<sizeof...(_Tp)>::type());
+    std::__memberwise_forward_assign(*this, std::move(__tuple), __type_list<_Tp...>(), __index_sequence_for<_Tp...>());
     return *this;
   }
 
@@ -873,7 +891,7 @@ public:
                      int> = 0>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 tuple&
   operator=(tuple<_Up...> const& __tuple) noexcept(_And<is_nothrow_assignable<_Tp&, _Up const&>...>::value) {
-    std::__memberwise_copy_assign(*this, __tuple, typename __make_tuple_indices<sizeof...(_Tp)>::type());
+    std::__memberwise_copy_assign(*this, __tuple, __index_sequence_for<_Tp...>());
     return *this;
   }
 
@@ -882,8 +900,7 @@ public:
                            int> = 0>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 tuple&
   operator=(tuple<_Up...>&& __tuple) noexcept(_And<is_nothrow_assignable<_Tp&, _Up>...>::value) {
-    std::__memberwise_forward_assign(
-        *this, std::move(__tuple), __tuple_types<_Up...>(), typename __make_tuple_indices<sizeof...(_Tp)>::type());
+    std::__memberwise_forward_assign(*this, std::move(__tuple), __type_list<_Up...>(), __index_sequence_for<_Tp...>());
     return *this;
   }
 
@@ -892,7 +909,7 @@ public:
             enable_if_t< _And<_BoolConstant<sizeof...(_Tp) == sizeof...(_UTypes)>,
                               is_assignable<const _Tp&, const _UTypes&>...>::value>* = nullptr>
   _LIBCPP_HIDE_FROM_ABI constexpr const tuple& operator=(const tuple<_UTypes...>& __u) const {
-    std::__memberwise_copy_assign(*this, __u, typename __make_tuple_indices<sizeof...(_Tp)>::type());
+    std::__memberwise_copy_assign(*this, __u, index_sequence_for<_Tp...>());
     return *this;
   }
 
@@ -900,8 +917,7 @@ public:
             enable_if_t< _And<_BoolConstant<sizeof...(_Tp) == sizeof...(_UTypes)>,
                               is_assignable<const _Tp&, _UTypes>...>::value>* = nullptr>
   _LIBCPP_HIDE_FROM_ABI constexpr const tuple& operator=(tuple<_UTypes...>&& __u) const {
-    std::__memberwise_forward_assign(
-        *this, __u, __tuple_types<_UTypes...>(), typename __make_tuple_indices<sizeof...(_Tp)>::type());
+    std::__memberwise_forward_assign(*this, __u, __type_list<_UTypes...>(), index_sequence_for<_Tp...>());
     return *this;
   }
 #    endif // _LIBCPP_STD_VER >= 23
@@ -967,7 +983,7 @@ public:
       __enable_if_t< _And< _BoolConstant<_Np == sizeof...(_Tp)>, is_assignable<_Tp&, _Up const&>... >::value, int> = 0>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 tuple&
   operator=(array<_Up, _Np> const& __array) noexcept(_And<is_nothrow_assignable<_Tp&, _Up const&>...>::value) {
-    std::__memberwise_copy_assign(*this, __array, typename __make_tuple_indices<sizeof...(_Tp)>::type());
+    std::__memberwise_copy_assign(*this, __array, __index_sequence_for<_Tp...>());
     return *this;
   }
 
@@ -979,10 +995,7 @@ public:
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 tuple&
   operator=(array<_Up, _Np>&& __array) noexcept(_And<is_nothrow_assignable<_Tp&, _Up>...>::value) {
     std::__memberwise_forward_assign(
-        *this,
-        std::move(__array),
-        __tuple_types<_If<true, _Up, _Tp>...>(),
-        typename __make_tuple_indices<sizeof...(_Tp)>::type());
+        *this, std::move(__array), __type_list<_If<true, _Up, _Tp>...>(), __index_sequence_for<_Tp...>());
     return *this;
   }
 
@@ -997,7 +1010,24 @@ public:
       noexcept(__all<is_nothrow_swappable_v<const _Tp&>...>::value) {
     __base_.swap(__t.__base_);
   }
-#    endif // _LIBCPP_STD_VER >= 23
+
+  template <__tuple_like_no_tuple _UTuple>
+#      if _LIBCPP_STD_VER >= 26
+    requires __can_tuple_compare_equal<tuple, _UTuple> && (sizeof...(_Tp) == tuple_size_v<_UTuple>)
+#      endif // _LIBCPP_STD_VER >= 26
+  _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const tuple& __x, const _UTuple& __y) {
+    static_assert(sizeof...(_Tp) == tuple_size_v<_UTuple>, "Can't compare tuple-like values of different sizes");
+    return std::__tuple_compare_equal<sizeof...(_Tp)>(__x, __y);
+  }
+
+  template <__tuple_like_no_tuple _UTuple>
+    requires(sizeof...(_Tp) == tuple_size_v<_UTuple>)
+  _LIBCPP_HIDE_FROM_ABI friend constexpr __tuple_common_comparison_category<tuple, _UTuple>
+  operator<=>(const tuple& __x, const _UTuple& __y) {
+    return std::__tuple_compare_three_way<__tuple_common_comparison_category<tuple, _UTuple>>(
+        __x, __y, index_sequence_for<_Tp...>{});
+  }
+#    endif   // _LIBCPP_STD_VER >= 23
 };
 
 _LIBCPP_DIAGNOSTIC_PUSH
@@ -1019,6 +1049,21 @@ public:
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void swap(tuple&) _NOEXCEPT {}
 #    if _LIBCPP_STD_VER >= 23
   _LIBCPP_HIDE_FROM_ABI constexpr void swap(const tuple&) const noexcept {}
+
+  template <__tuple_like_no_tuple _UTuple>
+#      if _LIBCPP_STD_VER >= 26
+    requires(tuple_size_v<_UTuple> == 0)
+#      endif // _LIBCPP_STD_VER >= 26
+  _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const tuple&, const _UTuple&) {
+    static_assert(tuple_size_v<_UTuple> == 0, "Can't compare tuple-like values of different sizes");
+    return true;
+  }
+
+  template <__tuple_like_no_tuple _UTuple>
+    requires(tuple_size_v<_UTuple> == 0)
+  _LIBCPP_HIDE_FROM_ABI friend constexpr strong_ordering operator<=>(const tuple&, const _UTuple&) {
+    return strong_ordering::equal;
+  }
 #    endif
 };
 _LIBCPP_DIAGNOSTIC_POP
@@ -1068,28 +1113,32 @@ swap(const tuple<_Tp...>& __lhs,
 // get
 
 template <size_t _Ip, class... _Tp>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 typename tuple_element<_Ip, tuple<_Tp...> >::type&
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14
+typename tuple_element<_Ip, tuple<_Tp...> >::type&
 get(tuple<_Tp...>& __t) _NOEXCEPT {
   using type _LIBCPP_NODEBUG = typename tuple_element<_Ip, tuple<_Tp...> >::type;
   return static_cast<__tuple_leaf<_Ip, type>&>(__t.__base_).get();
 }
 
 template <size_t _Ip, class... _Tp>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const typename tuple_element<_Ip, tuple<_Tp...> >::type&
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI
+_LIBCPP_CONSTEXPR_SINCE_CXX14 const typename tuple_element<_Ip, tuple<_Tp...> >::type&
 get(const tuple<_Tp...>& __t) _NOEXCEPT {
   using type _LIBCPP_NODEBUG = typename tuple_element<_Ip, tuple<_Tp...> >::type;
   return static_cast<const __tuple_leaf<_Ip, type>&>(__t.__base_).get();
 }
 
 template <size_t _Ip, class... _Tp>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 typename tuple_element<_Ip, tuple<_Tp...> >::type&&
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14
+typename tuple_element<_Ip, tuple<_Tp...> >::type&&
 get(tuple<_Tp...>&& __t) _NOEXCEPT {
   using type _LIBCPP_NODEBUG = typename tuple_element<_Ip, tuple<_Tp...> >::type;
   return static_cast<type&&>(static_cast<__tuple_leaf<_Ip, type>&&>(__t.__base_).get());
 }
 
 template <size_t _Ip, class... _Tp>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const typename tuple_element<_Ip, tuple<_Tp...> >::type&&
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI
+_LIBCPP_CONSTEXPR_SINCE_CXX14 const typename tuple_element<_Ip, tuple<_Tp...> >::type&&
 get(const tuple<_Tp...>&& __t) _NOEXCEPT {
   using type _LIBCPP_NODEBUG = typename tuple_element<_Ip, tuple<_Tp...> >::type;
   return static_cast<const type&&>(static_cast<const __tuple_leaf<_Ip, type>&&>(__t.__base_).get());
@@ -1098,22 +1147,22 @@ get(const tuple<_Tp...>&& __t) _NOEXCEPT {
 #    if _LIBCPP_STD_VER >= 14
 
 template <class _T1, class... _Args>
-inline _LIBCPP_HIDE_FROM_ABI constexpr _T1& get(tuple<_Args...>& __tup) noexcept {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI constexpr _T1& get(tuple<_Args...>& __tup) noexcept {
   return std::get<__find_exactly_one_t<_T1, _Args...>::value>(__tup);
 }
 
 template <class _T1, class... _Args>
-inline _LIBCPP_HIDE_FROM_ABI constexpr _T1 const& get(tuple<_Args...> const& __tup) noexcept {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI constexpr _T1 const& get(tuple<_Args...> const& __tup) noexcept {
   return std::get<__find_exactly_one_t<_T1, _Args...>::value>(__tup);
 }
 
 template <class _T1, class... _Args>
-inline _LIBCPP_HIDE_FROM_ABI constexpr _T1&& get(tuple<_Args...>&& __tup) noexcept {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI constexpr _T1&& get(tuple<_Args...>&& __tup) noexcept {
   return std::get<__find_exactly_one_t<_T1, _Args...>::value>(std::move(__tup));
 }
 
 template <class _T1, class... _Args>
-inline _LIBCPP_HIDE_FROM_ABI constexpr _T1 const&& get(tuple<_Args...> const&& __tup) noexcept {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI constexpr _T1 const&& get(tuple<_Args...> const&& __tup) noexcept {
   return std::get<__find_exactly_one_t<_T1, _Args...>::value>(std::move(__tup));
 }
 
@@ -1122,37 +1171,22 @@ inline _LIBCPP_HIDE_FROM_ABI constexpr _T1 const&& get(tuple<_Args...> const&& _
 // tie
 
 template <class... _Tp>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 tuple<_Tp&...> tie(_Tp&... __t) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 tuple<_Tp&...> tie(_Tp&... __t) _NOEXCEPT {
   return tuple<_Tp&...>(__t...);
 }
 
 template <class... _Tp>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 tuple<__unwrap_ref_decay_t<_Tp>...>
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 tuple<__unwrap_ref_decay_t<_Tp>...>
 make_tuple(_Tp&&... __t) {
   return tuple<__unwrap_ref_decay_t<_Tp>...>(std::forward<_Tp>(__t)...);
 }
 
 template <class... _Tp>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 tuple<_Tp&&...> forward_as_tuple(_Tp&&... __t) _NOEXCEPT {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 tuple<_Tp&&...>
+forward_as_tuple(_Tp&&... __t) _NOEXCEPT {
   return tuple<_Tp&&...>(std::forward<_Tp>(__t)...);
 }
 
-template <size_t _Ip>
-struct __tuple_equal {
-  template <class _Tp, class _Up>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 bool operator()(const _Tp& __x, const _Up& __y) {
-    return __tuple_equal<_Ip - 1>()(__x, __y) && std::get<_Ip - 1>(__x) == std::get<_Ip - 1>(__y);
-  }
-};
-
-template <>
-struct __tuple_equal<0> {
-  template <class _Tp, class _Up>
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 bool operator()(const _Tp&, const _Up&) {
-    return true;
-  }
-};
-
 template <class... _Tp, class... _Up>
 #    if _LIBCPP_STD_VER >= 26
   requires(__all<requires(const _Tp& __t, const _Up& __u) {
@@ -1162,27 +1196,19 @@ template <class... _Tp, class... _Up>
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 bool
 operator==(const tuple<_Tp...>& __x, const tuple<_Up...>& __y) {
   static_assert(sizeof...(_Tp) == sizeof...(_Up), "Can't compare tuples of different sizes");
-  return __tuple_equal<sizeof...(_Tp)>()(__x, __y);
+  return std::__tuple_compare_equal<sizeof...(_Tp)>(__x, __y);
 }
 
 #    if _LIBCPP_STD_VER >= 20
 
 // operator<=>
 
-template <class... _Tp, class... _Up, size_t... _Is>
-_LIBCPP_HIDE_FROM_ABI constexpr auto
-__tuple_compare_three_way(const tuple<_Tp...>& __x, const tuple<_Up...>& __y, index_sequence<_Is...>) {
-  common_comparison_category_t<__synth_three_way_result<_Tp, _Up>...> __result = strong_ordering::equal;
-  static_cast<void>(
-      ((__result = std::__synth_three_way(std::get<_Is>(__x), std::get<_Is>(__y)), __result != 0) || ...));
-  return __result;
-}
-
 template <class... _Tp, class... _Up>
   requires(sizeof...(_Tp) == sizeof...(_Up))
 _LIBCPP_HIDE_FROM_ABI constexpr common_comparison_category_t<__synth_three_way_result<_Tp, _Up>...>
 operator<=>(const tuple<_Tp...>& __x, const tuple<_Up...>& __y) {
-  return std::__tuple_compare_three_way(__x, __y, index_sequence_for<_Tp...>{});
+  return std::__tuple_compare_three_way<common_comparison_category_t<__synth_three_way_result<_Tp, _Up>...>>(
+      __x, __y, index_sequence_for<_Tp...>{});
 }
 
 #    else // _LIBCPP_STD_VER >= 20
@@ -1243,65 +1269,59 @@ operator<=(const tuple<_Tp...>& __x, const tuple<_Up...>& __y) {
 
 // tuple_cat
 
-template <class _Tp, class _Up>
-struct __tuple_cat_type;
+template <class... _Tuples>
+struct __tuple_cat_return_impl;
 
-template <class... _Ttypes, class... _Utypes>
-struct __tuple_cat_type<tuple<_Ttypes...>, __tuple_types<_Utypes...> > {
-  using type _LIBCPP_NODEBUG = tuple<_Ttypes..., _Utypes...>;
+template <class... _Types>
+struct __tuple_cat_return_impl<tuple<_Types...>> {
+  using type _LIBCPP_NODEBUG = tuple<_Types...>;
 };
 
-template <class _ResultTuple, bool _Is_Tuple0TupleLike, class... _Tuples>
-struct __tuple_cat_return_1 {};
+template <class... _Types0, class... _Types1, class... _Tuples>
+struct __tuple_cat_return_impl<tuple<_Types0...>, tuple<_Types1...>, _Tuples...>
+    : __tuple_cat_return_impl<tuple<_Types0..., _Types1...>, _Tuples...> {};
 
-template <class... _Types, class _Tuple0>
-struct __tuple_cat_return_1<tuple<_Types...>, true, _Tuple0> {
-  using type _LIBCPP_NODEBUG =
-      typename __tuple_cat_type< tuple<_Types...>,
-                                 typename __make_tuple_types<__remove_cvref_t<_Tuple0> >::type >::type;
+template <class... _Types0, class _Tp, class _Up, class... _Tuples>
+struct __tuple_cat_return_impl<tuple<_Types0...>, pair<_Tp, _Up>, _Tuples...>
+    : __tuple_cat_return_impl<tuple<_Types0..., _Tp, _Up>, _Tuples...> {};
+
+template <class, class, class>
+struct __tuple_cat_array;
+
+template <class... _Types, class _ValueT, size_t... _Indices>
+struct __tuple_cat_array<tuple<_Types...>, _ValueT, __index_sequence<_Indices...>> {
+  template <size_t>
+  using __value_type _LIBCPP_NODEBUG = _ValueT;
+
+  using type _LIBCPP_NODEBUG = tuple<_Types..., __value_type<_Indices>...>;
 };
 
-template <class... _Types, class _Tuple0, class _Tuple1, class... _Tuples>
-struct __tuple_cat_return_1<tuple<_Types...>, true, _Tuple0, _Tuple1, _Tuples...>
-    : public __tuple_cat_return_1<
-          typename __tuple_cat_type< tuple<_Types...>,
-                                     typename __make_tuple_types<__remove_cvref_t<_Tuple0> >::type >::type,
-          __tuple_like_ext<__libcpp_remove_reference_t<_Tuple1> >::value,
-          _Tuple1,
-          _Tuples...> {};
+template <class... _Types, class _ValueT, size_t _Np, class... _Tuples>
+struct __tuple_cat_return_impl<tuple<_Types...>, array<_ValueT, _Np>, _Tuples...>
+    : __tuple_cat_return_impl<typename __tuple_cat_array<tuple<_Types...>, _ValueT, __make_index_sequence<_Np>>::type,
+                              _Tuples...> {};
 
 template <class... _Tuples>
-struct __tuple_cat_return;
+using __tuple_cat_return_t _LIBCPP_NODEBUG =
+    typename __tuple_cat_return_impl<tuple<>, __remove_cvref_t<_Tuples>...>::type;
 
-template <class _Tuple0, class... _Tuples>
-struct __tuple_cat_return<_Tuple0, _Tuples...>
-    : public __tuple_cat_return_1<tuple<>,
-                                  __tuple_like_ext<__libcpp_remove_reference_t<_Tuple0> >::value,
-                                  _Tuple0,
-                                  _Tuples...> {};
-
-template <>
-struct __tuple_cat_return<> {
-  using type _LIBCPP_NODEBUG = tuple<>;
-};
-
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 tuple<> tuple_cat() { return tuple<>(); }
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 tuple<> tuple_cat() { return tuple<>(); }
 
 template <class _Rp, class _Indices, class _Tuple0, class... _Tuples>
 struct __tuple_cat_return_ref_imp;
 
 template <class... _Types, size_t... _I0, class _Tuple0>
-struct __tuple_cat_return_ref_imp<tuple<_Types...>, __tuple_indices<_I0...>, _Tuple0> {
+struct __tuple_cat_return_ref_imp<tuple<_Types...>, __index_sequence<_I0...>, _Tuple0> {
   using _T0 _LIBCPP_NODEBUG = __libcpp_remove_reference_t<_Tuple0>;
   typedef tuple<_Types..., __copy_cvref_t<_Tuple0, typename tuple_element<_I0, _T0>::type>&&...> type;
 };
 
 template <class... _Types, size_t... _I0, class _Tuple0, class _Tuple1, class... _Tuples>
-struct __tuple_cat_return_ref_imp<tuple<_Types...>, __tuple_indices<_I0...>, _Tuple0, _Tuple1, _Tuples...>
+struct __tuple_cat_return_ref_imp<tuple<_Types...>, __index_sequence<_I0...>, _Tuple0, _Tuple1, _Tuples...>
     : public __tuple_cat_return_ref_imp<
           tuple<_Types...,
                 __copy_cvref_t<_Tuple0, typename tuple_element<_I0, __libcpp_remove_reference_t<_Tuple0>>::type>&&...>,
-          typename __make_tuple_indices<tuple_size<__libcpp_remove_reference_t<_Tuple1> >::value>::type,
+          __make_index_sequence<tuple_size<__libcpp_remove_reference_t<_Tuple1> >::value>,
           _Tuple1,
           _Tuples...> {};
 
@@ -1309,7 +1329,7 @@ template <class _Tuple0, class... _Tuples>
 struct __tuple_cat_return_ref
     : public __tuple_cat_return_ref_imp<
           tuple<>,
-          typename __make_tuple_indices< tuple_size<__libcpp_remove_reference_t<_Tuple0> >::value >::type,
+          __make_index_sequence< tuple_size<__libcpp_remove_reference_t<_Tuple0> >::value >,
           _Tuple0,
           _Tuples...> {};
 
@@ -1317,7 +1337,7 @@ template <class _Types, class _I0, class _J0>
 struct __tuple_cat;
 
 template <class... _Types, size_t... _I0, size_t... _J0>
-struct __tuple_cat<tuple<_Types...>, __tuple_indices<_I0...>, __tuple_indices<_J0...> > {
+struct __tuple_cat<tuple<_Types...>, __index_sequence<_I0...>, __index_sequence<_J0...>> {
   template <class _Tuple0>
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14
   typename __tuple_cat_return_ref<tuple<_Types...>&&, _Tuple0&&>::type
@@ -1335,8 +1355,8 @@ struct __tuple_cat<tuple<_Types...>, __tuple_indices<_I0...>, __tuple_indices<_J
     using _T0 _LIBCPP_NODEBUG = __libcpp_remove_reference_t<_Tuple0>;
     using _T1 _LIBCPP_NODEBUG = __libcpp_remove_reference_t<_Tuple1>;
     return __tuple_cat<tuple<_Types..., __copy_cvref_t<_Tuple0, typename tuple_element<_J0, _T0>::type>&&...>,
-                       typename __make_tuple_indices<sizeof...(_Types) + tuple_size<_T0>::value>::type,
-                       typename __make_tuple_indices<tuple_size<_T1>::value>::type>()(
+                       __make_index_sequence<sizeof...(_Types) + tuple_size<_T0>::value>,
+                       __make_index_sequence<tuple_size<_T1>::value>>()(
         std::forward_as_tuple(
             std::forward<_Types>(std::get<_I0>(__t))..., std::get<_J0>(std::forward<_Tuple0>(__t0))...),
         std::forward<_Tuple1>(__t1),
@@ -1346,21 +1366,21 @@ struct __tuple_cat<tuple<_Types...>, __tuple_indices<_I0...>, __tuple_indices<_J
 
 template <class _TupleDst, class _TupleSrc, size_t... _Indices>
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _TupleDst
-__tuple_cat_select_element_wise(_TupleSrc&& __src, __tuple_indices<_Indices...>) {
+__tuple_cat_select_element_wise(_TupleSrc&& __src, __index_sequence<_Indices...>) {
   static_assert(tuple_size<_TupleDst>::value == tuple_size<_TupleSrc>::value,
                 "misuse of __tuple_cat_select_element_wise with tuples of different sizes");
   return _TupleDst(std::get<_Indices>(std::forward<_TupleSrc>(__src))...);
 }
 
 template <class _Tuple0, class... _Tuples>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 typename __tuple_cat_return<_Tuple0, _Tuples...>::type
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 __tuple_cat_return_t<_Tuple0, _Tuples...>
 tuple_cat(_Tuple0&& __t0, _Tuples&&... __tpls) {
   using _T0 _LIBCPP_NODEBUG          = __libcpp_remove_reference_t<_Tuple0>;
-  using _TRet _LIBCPP_NODEBUG        = typename __tuple_cat_return<_Tuple0, _Tuples...>::type;
-  using _T0Indices _LIBCPP_NODEBUG   = typename __make_tuple_indices<tuple_size<_T0>::value>::type;
-  using _TRetIndices _LIBCPP_NODEBUG = typename __make_tuple_indices<tuple_size<_TRet>::value>::type;
+  using _TRet _LIBCPP_NODEBUG        = __tuple_cat_return_t<_Tuple0, _Tuples...>;
+  using _T0Indices _LIBCPP_NODEBUG   = __make_index_sequence<tuple_size<_T0>::value>;
+  using _TRetIndices _LIBCPP_NODEBUG = __make_index_sequence<tuple_size<_TRet>::value>;
   return std::__tuple_cat_select_element_wise<_TRet>(
-      __tuple_cat<tuple<>, __tuple_indices<>, _T0Indices>()(
+      __tuple_cat<tuple<>, __index_sequence<>, _T0Indices>()(
           tuple<>(), std::forward<_Tuple0>(__t0), std::forward<_Tuples>(__tpls)...),
       _TRetIndices());
 }
@@ -1376,7 +1396,7 @@ struct uses_allocator<tuple<_Tp...>, _Alloc> : true_type {};
 // clang-format off
 template <class _Fn, class _Tuple, size_t... _Id>
 inline _LIBCPP_HIDE_FROM_ABI constexpr decltype(auto)
-__apply_tuple_impl(_Fn&& __f, _Tuple&& __t, __tuple_indices<_Id...>)
+__apply_tuple_impl(_Fn&& __f, _Tuple&& __t, index_sequence<_Id...>)
     _LIBCPP_NOEXCEPT_RETURN(std::__invoke(std::forward<_Fn>(__f), std::get<_Id>(std::forward<_Tuple>(__t))...))
 
 template <class _Fn, class _Tuple>
@@ -1384,28 +1404,29 @@ inline _LIBCPP_HIDE_FROM_ABI constexpr decltype(auto) apply(_Fn&& __f, _Tuple&&
     _LIBCPP_NOEXCEPT_RETURN(std::__apply_tuple_impl(
         std::forward<_Fn>(__f),
         std::forward<_Tuple>(__t),
-        typename __make_tuple_indices<tuple_size_v<remove_reference_t<_Tuple>>>::type{}))
+        make_index_sequence<tuple_size_v<remove_reference_t<_Tuple>>>()))
 
 #if _LIBCPP_STD_VER >= 20
 template <class _Tp, class _Tuple, size_t... _Idx>
-inline _LIBCPP_HIDE_FROM_ABI constexpr _Tp __make_from_tuple_impl(_Tuple&& __t, __tuple_indices<_Idx...>)
+inline _LIBCPP_HIDE_FROM_ABI constexpr _Tp __make_from_tuple_impl(_Tuple&& __t, index_sequence<_Idx...>)
   noexcept(noexcept(_Tp(std::get<_Idx>(std::forward<_Tuple>(__t))...)))
   requires is_constructible_v<_Tp, decltype(std::get<_Idx>(std::forward<_Tuple>(__t)))...> {
   return _Tp(std::get<_Idx>(std::forward<_Tuple>(__t))...);
 }
 #else
 template <class _Tp, class _Tuple, size_t... _Idx>
-inline _LIBCPP_HIDE_FROM_ABI constexpr _Tp __make_from_tuple_impl(_Tuple&& __t, __tuple_indices<_Idx...>,
+inline _LIBCPP_HIDE_FROM_ABI constexpr _Tp __make_from_tuple_impl(_Tuple&& __t, index_sequence<_Idx...>,
     enable_if_t<is_constructible_v<_Tp, decltype(std::get<_Idx>(std::forward<_Tuple>(__t)))...>> * = nullptr)
     _LIBCPP_NOEXCEPT_RETURN(_Tp(std::get<_Idx>(std::forward<_Tuple>(__t))...))
 #endif // _LIBCPP_STD_VER >= 20
+#undef _LIBCPP_NOEXCEPT_RETURN
 
 template <class _Tp, class _Tuple,
-          class _Seq = typename __make_tuple_indices<tuple_size_v<remove_reference_t<_Tuple>>>::type, class = void>
+          class _Seq = make_index_sequence<tuple_size_v<remove_reference_t<_Tuple>>>, class = void>
 inline constexpr bool __can_make_from_tuple = false;
 
 template <class _Tp, class _Tuple, size_t... _Idx>
-inline constexpr bool __can_make_from_tuple<_Tp, _Tuple, __tuple_indices<_Idx...>,
+inline constexpr bool __can_make_from_tuple<_Tp, _Tuple, index_sequence<_Idx...>,
     enable_if_t<is_constructible_v<_Tp, decltype(std::get<_Idx>(std::declval<_Tuple>()))...>>> = true;
 
 // Based on LWG3528(https://wg21.link/LWG3528) and http://eel.is/c++draft/description#structure.requirements-9,
@@ -1418,10 +1439,19 @@ template <class _Tp, class _Tuple>
 #else
 template <class _Tp, class _Tuple, class = enable_if_t<__can_make_from_tuple<_Tp, _Tuple>>> // strengthen
 #endif // _LIBCPP_STD_VER >= 20
-inline _LIBCPP_HIDE_FROM_ABI constexpr _Tp make_from_tuple(_Tuple&& __t)
-    _LIBCPP_NOEXCEPT_RETURN(std::__make_from_tuple_impl<_Tp>(
-        std::forward<_Tuple>(__t), typename __make_tuple_indices<tuple_size_v<remove_reference_t<_Tuple>>>::type{}))
-#    undef _LIBCPP_NOEXCEPT_RETURN
+
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI constexpr _Tp make_from_tuple(_Tuple&& __t)
+  noexcept(noexcept(std::__make_from_tuple_impl<_Tp>(std::forward<_Tuple>(__t),
+                    make_index_sequence<tuple_size_v<remove_reference_t<_Tuple>>>()))) {
+#if _LIBCPP_STD_VER >= 23
+  if constexpr (tuple_size_v<remove_reference_t<_Tuple>> == 1) {
+    static_assert(!std::reference_constructs_from_temporary_v<_Tp, decltype(std::get<0>(std::declval<_Tuple>()))>,
+                  "Attempted construction of reference element binds to a temporary whose lifetime has ended");
+  }
+#endif // _LIBCPP_STD_VER >= 23
+  return std::__make_from_tuple_impl<_Tp>(
+        std::forward<_Tuple>(__t), make_index_sequence<tuple_size_v<remove_reference_t<_Tuple>>>());
+}
 
 #  endif // _LIBCPP_STD_VER >= 17
 
diff --git a/lib/libcxx/include/type_traits b/lib/libcxx/include/type_traits
index a6e0c18675..f3e397e4df 100644
--- a/lib/libcxx/include/type_traits
+++ b/lib/libcxx/include/type_traits
@@ -454,6 +454,10 @@ namespace std
       template<class B> inline constexpr bool negation_v
         = negation<B>::value;                                   // since C++17
 
+      // [meta.const.eval], constant evaluation context
+      constexpr bool is_constant_evaluated() noexcept;                   // C++20
+      template<class T>
+        consteval bool is_within_lifetime(const T*) noexcept;            // C++26
 }
 
 */
@@ -546,9 +550,7 @@ namespace std
 
 #  if _LIBCPP_STD_VER >= 20
 #    include <__type_traits/common_reference.h>
-#    include <__type_traits/is_bounded_array.h>
 #    include <__type_traits/is_constant_evaluated.h>
-#    include <__type_traits/is_unbounded_array.h>
 #    include <__type_traits/type_identity.h>
 #    include <__type_traits/unwrap_ref.h>
 #  endif
@@ -559,6 +561,10 @@ namespace std
 #    include <__type_traits/reference_converts_from_temporary.h>
 #  endif
 
+#  if _LIBCPP_STD_VER >= 26
+#    include <__type_traits/is_within_lifetime.h>
+#  endif
+
 #  include <version>
 
 #  if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
diff --git a/lib/libcxx/include/typeindex b/lib/libcxx/include/typeindex
index e32cb07431..82ea3d616f 100644
--- a/lib/libcxx/include/typeindex
+++ b/lib/libcxx/include/typeindex
@@ -86,8 +86,8 @@ public:
   }
 #  endif
 
-  _LIBCPP_HIDE_FROM_ABI size_t hash_code() const _NOEXCEPT { return __t_->hash_code(); }
-  _LIBCPP_HIDE_FROM_ABI const char* name() const _NOEXCEPT { return __t_->name(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t hash_code() const _NOEXCEPT { return __t_->hash_code(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const char* name() const _NOEXCEPT { return __t_->name(); }
 };
 
 template <class _Tp>
@@ -95,7 +95,9 @@ struct hash;
 
 template <>
 struct hash<type_index> : public __unary_function<type_index, size_t> {
-  _LIBCPP_HIDE_FROM_ABI size_t operator()(type_index __index) const _NOEXCEPT { return __index.hash_code(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t operator()(type_index __index) const _NOEXCEPT {
+    return __index.hash_code();
+  }
 };
 
 _LIBCPP_END_NAMESPACE_STD
diff --git a/lib/libcxx/include/typeinfo b/lib/libcxx/include/typeinfo
index 24aaabf0a8..f67d61e368 100644
--- a/lib/libcxx/include/typeinfo
+++ b/lib/libcxx/include/typeinfo
@@ -95,11 +95,13 @@ class _LIBCPP_EXPORTED_FROM_ABI type_info {
 public:
   virtual ~type_info();
 
-  const char* name() const _NOEXCEPT;
+  [[__nodiscard__]] const char* name() const _NOEXCEPT;
 
-  _LIBCPP_HIDE_FROM_ABI bool before(const type_info& __arg) const _NOEXCEPT { return __compare(__arg) < 0; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool before(const type_info& __arg) const _NOEXCEPT {
+    return __compare(__arg) < 0;
+  }
 
-  size_t hash_code() const _NOEXCEPT;
+  [[__nodiscard__]] size_t hash_code() const _NOEXCEPT;
 
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 bool operator==(const type_info& __arg) const _NOEXCEPT {
     // When evaluated in a constant expression, both type infos simply can't come
@@ -186,99 +188,99 @@ public:
 #        endif
 #      endif
 
-struct __type_info_implementations {
-  struct __string_impl_base {
-    typedef const char* __type_name_t;
-    _LIBCPP_HIDE_FROM_ABI _LIBCPP_ALWAYS_INLINE _LIBCPP_CONSTEXPR static const char*
-    __type_name_to_string(__type_name_t __v) _NOEXCEPT {
-      return __v;
-    }
-    _LIBCPP_HIDE_FROM_ABI _LIBCPP_ALWAYS_INLINE _LIBCPP_CONSTEXPR static __type_name_t
-    __string_to_type_name(const char* __v) _NOEXCEPT {
-      return __v;
-    }
-  };
+namespace __type_info_implementations {
+struct __string_impl_base {
+  typedef const char* __type_name_t;
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_ALWAYS_INLINE _LIBCPP_CONSTEXPR static const char*
+  __type_name_to_string(__type_name_t __v) _NOEXCEPT {
+    return __v;
+  }
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_ALWAYS_INLINE _LIBCPP_CONSTEXPR static __type_name_t
+  __string_to_type_name(const char* __v) _NOEXCEPT {
+    return __v;
+  }
+};
 
-  struct __unique_impl : __string_impl_base {
-    _LIBCPP_HIDE_FROM_ABI _LIBCPP_ALWAYS_INLINE static size_t __hash(__type_name_t __v) _NOEXCEPT {
-      return reinterpret_cast<size_t>(__v);
-    }
-    _LIBCPP_HIDE_FROM_ABI _LIBCPP_ALWAYS_INLINE static bool __eq(__type_name_t __lhs, __type_name_t __rhs) _NOEXCEPT {
-      return __lhs == __rhs;
-    }
-    _LIBCPP_HIDE_FROM_ABI _LIBCPP_ALWAYS_INLINE static bool __lt(__type_name_t __lhs, __type_name_t __rhs) _NOEXCEPT {
+struct __unique_impl : __string_impl_base {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_ALWAYS_INLINE static size_t __hash(__type_name_t __v) _NOEXCEPT {
+    return reinterpret_cast<size_t>(__v);
+  }
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_ALWAYS_INLINE static bool __eq(__type_name_t __lhs, __type_name_t __rhs) _NOEXCEPT {
+    return __lhs == __rhs;
+  }
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_ALWAYS_INLINE static bool __lt(__type_name_t __lhs, __type_name_t __rhs) _NOEXCEPT {
+    return __lhs < __rhs;
+  }
+};
+
+struct __non_unique_impl : __string_impl_base {
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_ALWAYS_INLINE static size_t __hash(__type_name_t __ptr) _NOEXCEPT {
+    size_t __hash = 5381;
+    while (unsigned char __c = static_cast<unsigned char>(*__ptr++))
+      __hash = (__hash * 33) ^ __c;
+    return __hash;
+  }
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_ALWAYS_INLINE static bool __eq(__type_name_t __lhs, __type_name_t __rhs) _NOEXCEPT {
+    return __lhs == __rhs || __builtin_strcmp(__lhs, __rhs) == 0;
+  }
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_ALWAYS_INLINE static bool __lt(__type_name_t __lhs, __type_name_t __rhs) _NOEXCEPT {
+    return __builtin_strcmp(__lhs, __rhs) < 0;
+  }
+};
+
+struct __non_unique_arm_rtti_bit_impl {
+  typedef uintptr_t __type_name_t;
+
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_ALWAYS_INLINE static const char* __type_name_to_string(__type_name_t __v) _NOEXCEPT {
+    return reinterpret_cast<const char*>(__v & ~__non_unique_rtti_bit::value);
+  }
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_ALWAYS_INLINE static __type_name_t __string_to_type_name(const char* __v) _NOEXCEPT {
+    return reinterpret_cast<__type_name_t>(__v);
+  }
+
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_ALWAYS_INLINE static size_t __hash(__type_name_t __v) _NOEXCEPT {
+    if (__is_type_name_unique(__v))
+      return __v;
+    return __non_unique_impl::__hash(__type_name_to_string(__v));
+  }
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_ALWAYS_INLINE static bool __eq(__type_name_t __lhs, __type_name_t __rhs) _NOEXCEPT {
+    if (__lhs == __rhs)
+      return true;
+    if (__is_type_name_unique(__lhs) || __is_type_name_unique(__rhs))
+      // Either both are unique and have a different address, or one of them
+      // is unique and the other one isn't. In both cases they are unequal.
+      return false;
+    return __builtin_strcmp(__type_name_to_string(__lhs), __type_name_to_string(__rhs)) == 0;
+  }
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_ALWAYS_INLINE static bool __lt(__type_name_t __lhs, __type_name_t __rhs) _NOEXCEPT {
+    if (__is_type_name_unique(__lhs) || __is_type_name_unique(__rhs))
       return __lhs < __rhs;
-    }
-  };
+    return __builtin_strcmp(__type_name_to_string(__lhs), __type_name_to_string(__rhs)) < 0;
+  }
 
-  struct __non_unique_impl : __string_impl_base {
-    _LIBCPP_HIDE_FROM_ABI _LIBCPP_ALWAYS_INLINE static size_t __hash(__type_name_t __ptr) _NOEXCEPT {
-      size_t __hash = 5381;
-      while (unsigned char __c = static_cast<unsigned char>(*__ptr++))
-        __hash = (__hash * 33) ^ __c;
-      return __hash;
-    }
-    _LIBCPP_HIDE_FROM_ABI _LIBCPP_ALWAYS_INLINE static bool __eq(__type_name_t __lhs, __type_name_t __rhs) _NOEXCEPT {
-      return __lhs == __rhs || __builtin_strcmp(__lhs, __rhs) == 0;
-    }
-    _LIBCPP_HIDE_FROM_ABI _LIBCPP_ALWAYS_INLINE static bool __lt(__type_name_t __lhs, __type_name_t __rhs) _NOEXCEPT {
-      return __builtin_strcmp(__lhs, __rhs) < 0;
-    }
-  };
+private:
+  // The unique bit is the top bit. It is expected that __type_name_t is 64 bits when
+  // this implementation is actually used.
+  typedef integral_constant<__type_name_t, (1ULL << ((__CHAR_BIT__ * sizeof(__type_name_t)) - 1))>
+      __non_unique_rtti_bit;
 
-  struct __non_unique_arm_rtti_bit_impl {
-    typedef uintptr_t __type_name_t;
+  _LIBCPP_HIDE_FROM_ABI static bool __is_type_name_unique(__type_name_t __lhs) _NOEXCEPT {
+    return !(__lhs & __non_unique_rtti_bit::value);
+  }
+};
 
-    _LIBCPP_HIDE_FROM_ABI _LIBCPP_ALWAYS_INLINE static const char* __type_name_to_string(__type_name_t __v) _NOEXCEPT {
-      return reinterpret_cast<const char*>(__v & ~__non_unique_rtti_bit::value);
-    }
-    _LIBCPP_HIDE_FROM_ABI _LIBCPP_ALWAYS_INLINE static __type_name_t __string_to_type_name(const char* __v) _NOEXCEPT {
-      return reinterpret_cast<__type_name_t>(__v);
-    }
-
-    _LIBCPP_HIDE_FROM_ABI _LIBCPP_ALWAYS_INLINE static size_t __hash(__type_name_t __v) _NOEXCEPT {
-      if (__is_type_name_unique(__v))
-        return __v;
-      return __non_unique_impl::__hash(__type_name_to_string(__v));
-    }
-    _LIBCPP_HIDE_FROM_ABI _LIBCPP_ALWAYS_INLINE static bool __eq(__type_name_t __lhs, __type_name_t __rhs) _NOEXCEPT {
-      if (__lhs == __rhs)
-        return true;
-      if (__is_type_name_unique(__lhs) || __is_type_name_unique(__rhs))
-        // Either both are unique and have a different address, or one of them
-        // is unique and the other one isn't. In both cases they are unequal.
-        return false;
-      return __builtin_strcmp(__type_name_to_string(__lhs), __type_name_to_string(__rhs)) == 0;
-    }
-    _LIBCPP_HIDE_FROM_ABI _LIBCPP_ALWAYS_INLINE static bool __lt(__type_name_t __lhs, __type_name_t __rhs) _NOEXCEPT {
-      if (__is_type_name_unique(__lhs) || __is_type_name_unique(__rhs))
-        return __lhs < __rhs;
-      return __builtin_strcmp(__type_name_to_string(__lhs), __type_name_to_string(__rhs)) < 0;
-    }
-
-  private:
-    // The unique bit is the top bit. It is expected that __type_name_t is 64 bits when
-    // this implementation is actually used.
-    typedef integral_constant<__type_name_t, (1ULL << ((__CHAR_BIT__ * sizeof(__type_name_t)) - 1))>
-        __non_unique_rtti_bit;
-
-    _LIBCPP_HIDE_FROM_ABI static bool __is_type_name_unique(__type_name_t __lhs) _NOEXCEPT {
-      return !(__lhs & __non_unique_rtti_bit::value);
-    }
-  };
-
-  typedef
+typedef
 #      if _LIBCPP_TYPEINFO_COMPARISON_IMPLEMENTATION == 1
-      __unique_impl
+    __unique_impl
 #      elif _LIBCPP_TYPEINFO_COMPARISON_IMPLEMENTATION == 2
-      __non_unique_impl
+    __non_unique_impl
 #      elif _LIBCPP_TYPEINFO_COMPARISON_IMPLEMENTATION == 3
-      __non_unique_arm_rtti_bit_impl
+    __non_unique_arm_rtti_bit_impl
 #      else
 #        error invalid configuration for _LIBCPP_TYPEINFO_COMPARISON_IMPLEMENTATION
 #      endif
-          __impl;
-};
+        __impl;
+} // namespace __type_info_implementations
 
 #      if __has_cpp_attribute(_Clang::__ptrauth_vtable_pointer__)
 #        if __has_feature(ptrauth_type_info_vtable_pointer_discrimination)
@@ -306,14 +308,15 @@ protected:
 
 public:
   virtual ~type_info();
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const char* name() const _NOEXCEPT {
+    return __impl::__type_name_to_string(__type_name);
+  }
 
-  _LIBCPP_HIDE_FROM_ABI const char* name() const _NOEXCEPT { return __impl::__type_name_to_string(__type_name); }
-
-  _LIBCPP_HIDE_FROM_ABI bool before(const type_info& __arg) const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool before(const type_info& __arg) const _NOEXCEPT {
     return __impl::__lt(__type_name, __arg.__type_name);
   }
 
-  _LIBCPP_HIDE_FROM_ABI size_t hash_code() const _NOEXCEPT { return __impl::__hash(__type_name); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t hash_code() const _NOEXCEPT { return __impl::__hash(__type_name); }
 
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 bool operator==(const type_info& __arg) const _NOEXCEPT {
     // When evaluated in a constant expression, both type infos simply can't come
@@ -336,7 +339,7 @@ public:
   _LIBCPP_HIDE_FROM_ABI bad_cast(const bad_cast&) _NOEXCEPT            = default;
   _LIBCPP_HIDE_FROM_ABI bad_cast& operator=(const bad_cast&) _NOEXCEPT = default;
   ~bad_cast() _NOEXCEPT override;
-  const char* what() const _NOEXCEPT override;
+  [[__nodiscard__]] const char* what() const _NOEXCEPT override;
 };
 
 class _LIBCPP_EXPORTED_FROM_ABI bad_typeid : public exception {
@@ -345,7 +348,7 @@ public:
   _LIBCPP_HIDE_FROM_ABI bad_typeid(const bad_typeid&) _NOEXCEPT            = default;
   _LIBCPP_HIDE_FROM_ABI bad_typeid& operator=(const bad_typeid&) _NOEXCEPT = default;
   ~bad_typeid() _NOEXCEPT override;
-  const char* what() const _NOEXCEPT override;
+  [[__nodiscard__]] const char* what() const _NOEXCEPT override;
 };
 
 } // namespace std
diff --git a/lib/libcxx/include/unordered_map b/lib/libcxx/include/unordered_map
index 5b70cdeae1..ca53348eb5 100644
--- a/lib/libcxx/include/unordered_map
+++ b/lib/libcxx/include/unordered_map
@@ -600,6 +600,7 @@ template <class Key, class T, class Hash, class Pred, class Alloc>
 #  include <__memory/addressof.h>
 #  include <__memory/allocator.h>
 #  include <__memory/allocator_traits.h>
+#  include <__memory/compressed_pair.h>
 #  include <__memory/pointer_traits.h>
 #  include <__memory/unique_ptr.h>
 #  include <__memory_resource/polymorphic_allocator.h>
@@ -643,34 +644,9 @@ _LIBCPP_PUSH_MACROS
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-template <class _Key,
-          class _Cp,
-          class _Hash,
-          class _Pred,
-          bool = is_empty<_Hash>::value && !__libcpp_is_final<_Hash>::value>
-class __unordered_map_hasher : private _Hash {
-public:
-  _LIBCPP_HIDE_FROM_ABI __unordered_map_hasher() _NOEXCEPT_(is_nothrow_default_constructible<_Hash>::value) : _Hash() {}
-  _LIBCPP_HIDE_FROM_ABI __unordered_map_hasher(const _Hash& __h) _NOEXCEPT_(is_nothrow_copy_constructible<_Hash>::value)
-      : _Hash(__h) {}
-  _LIBCPP_HIDE_FROM_ABI const _Hash& hash_function() const _NOEXCEPT { return *this; }
-  _LIBCPP_HIDE_FROM_ABI size_t operator()(const _Cp& __x) const { return static_cast<const _Hash&>(*this)(__x.first); }
-  _LIBCPP_HIDE_FROM_ABI size_t operator()(const _Key& __x) const { return static_cast<const _Hash&>(*this)(__x); }
-#  if _LIBCPP_STD_VER >= 20
-  template <typename _K2>
-  _LIBCPP_HIDE_FROM_ABI size_t operator()(const _K2& __x) const {
-    return static_cast<const _Hash&>(*this)(__x);
-  }
-#  endif
-  _LIBCPP_HIDE_FROM_ABI void swap(__unordered_map_hasher& __y) _NOEXCEPT_(__is_nothrow_swappable_v<_Hash>) {
-    using std::swap;
-    swap(static_cast<_Hash&>(*this), static_cast<_Hash&>(__y));
-  }
-};
-
 template <class _Key, class _Cp, class _Hash, class _Pred>
-class __unordered_map_hasher<_Key, _Cp, _Hash, _Pred, false> {
-  _Hash __hash_;
+class __unordered_map_hasher {
+  _LIBCPP_COMPRESSED_ELEMENT(_Hash, __hash_);
 
 public:
   _LIBCPP_HIDE_FROM_ABI __unordered_map_hasher() _NOEXCEPT_(is_nothrow_default_constructible<_Hash>::value)
@@ -692,60 +668,16 @@ public:
   }
 };
 
-template <class _Key, class _Cp, class _Hash, class _Pred, bool __b>
+template <class _Key, class _Cp, class _Hash, class _Pred>
 inline _LIBCPP_HIDE_FROM_ABI void
-swap(__unordered_map_hasher<_Key, _Cp, _Hash, _Pred, __b>& __x,
-     __unordered_map_hasher<_Key, _Cp, _Hash, _Pred, __b>& __y) _NOEXCEPT_(_NOEXCEPT_(__x.swap(__y))) {
+swap(__unordered_map_hasher<_Key, _Cp, _Hash, _Pred>& __x, __unordered_map_hasher<_Key, _Cp, _Hash, _Pred>& __y)
+    _NOEXCEPT_(_NOEXCEPT_(__x.swap(__y))) {
   __x.swap(__y);
 }
 
-template <class _Key,
-          class _Cp,
-          class _Pred,
-          class _Hash,
-          bool = is_empty<_Pred>::value && !__libcpp_is_final<_Pred>::value>
-class __unordered_map_equal : private _Pred {
-public:
-  _LIBCPP_HIDE_FROM_ABI __unordered_map_equal() _NOEXCEPT_(is_nothrow_default_constructible<_Pred>::value) : _Pred() {}
-  _LIBCPP_HIDE_FROM_ABI __unordered_map_equal(const _Pred& __p) _NOEXCEPT_(is_nothrow_copy_constructible<_Pred>::value)
-      : _Pred(__p) {}
-  _LIBCPP_HIDE_FROM_ABI const _Pred& key_eq() const _NOEXCEPT { return *this; }
-  _LIBCPP_HIDE_FROM_ABI bool operator()(const _Cp& __x, const _Cp& __y) const {
-    return static_cast<const _Pred&>(*this)(__x.first, __y.first);
-  }
-  _LIBCPP_HIDE_FROM_ABI bool operator()(const _Cp& __x, const _Key& __y) const {
-    return static_cast<const _Pred&>(*this)(__x.first, __y);
-  }
-  _LIBCPP_HIDE_FROM_ABI bool operator()(const _Key& __x, const _Cp& __y) const {
-    return static_cast<const _Pred&>(*this)(__x, __y.__get_value().first);
-  }
-#  if _LIBCPP_STD_VER >= 20
-  template <typename _K2>
-  _LIBCPP_HIDE_FROM_ABI bool operator()(const _Cp& __x, const _K2& __y) const {
-    return static_cast<const _Pred&>(*this)(__x.first, __y);
-  }
-  template <typename _K2>
-  _LIBCPP_HIDE_FROM_ABI bool operator()(const _K2& __x, const _Cp& __y) const {
-    return static_cast<const _Pred&>(*this)(__x, __y.__get_value().first);
-  }
-  template <typename _K2>
-  _LIBCPP_HIDE_FROM_ABI bool operator()(const _Key& __x, const _K2& __y) const {
-    return static_cast<const _Pred&>(*this)(__x, __y);
-  }
-  template <typename _K2>
-  _LIBCPP_HIDE_FROM_ABI bool operator()(const _K2& __x, const _Key& __y) const {
-    return static_cast<const _Pred&>(*this)(__x, __y);
-  }
-#  endif
-  _LIBCPP_HIDE_FROM_ABI void swap(__unordered_map_equal& __y) _NOEXCEPT_(__is_nothrow_swappable_v<_Pred>) {
-    using std::swap;
-    swap(static_cast<_Pred&>(*this), static_cast<_Pred&>(__y));
-  }
-};
-
 template <class _Key, class _Cp, class _Pred, class _Hash>
-class __unordered_map_equal<_Key, _Cp, _Pred, _Hash, false> {
-  _Pred __pred_;
+class __unordered_map_equal {
+  _LIBCPP_COMPRESSED_ELEMENT(_Pred, __pred_);
 
 public:
   _LIBCPP_HIDE_FROM_ABI __unordered_map_equal() _NOEXCEPT_(is_nothrow_default_constructible<_Pred>::value)
@@ -780,9 +712,9 @@ public:
   }
 };
 
-template <class _Key, class _Cp, class _Pred, class _Hash, bool __b>
+template <class _Key, class _Cp, class _Pred, class _Hash>
 inline _LIBCPP_HIDE_FROM_ABI void
-swap(__unordered_map_equal<_Key, _Cp, _Pred, _Hash, __b>& __x, __unordered_map_equal<_Key, _Cp, _Pred, _Hash, __b>& __y)
+swap(__unordered_map_equal<_Key, _Cp, _Pred, _Hash>& __x, __unordered_map_equal<_Key, _Cp, _Pred, _Hash>& __y)
     _NOEXCEPT_(_NOEXCEPT_(__x.swap(__y))) {
   __x.swap(__y);
 }
@@ -844,10 +776,10 @@ class __hash_map_iterator {
 
 public:
   typedef forward_iterator_tag iterator_category;
-  typedef typename _NodeTypes::__map_value_type value_type;
-  typedef typename _NodeTypes::difference_type difference_type;
+  using value_type      = typename _HashIterator::value_type;
+  using difference_type = ptrdiff_t;
   typedef value_type& reference;
-  typedef typename _NodeTypes::__map_value_type_pointer pointer;
+  using pointer = typename _HashIterator::pointer;
 
   _LIBCPP_HIDE_FROM_ABI __hash_map_iterator() _NOEXCEPT {}
 
@@ -895,10 +827,10 @@ class __hash_map_const_iterator {
 
 public:
   typedef forward_iterator_tag iterator_category;
-  typedef typename _NodeTypes::__map_value_type value_type;
-  typedef typename _NodeTypes::difference_type difference_type;
+  using value_type      = typename _HashIterator::value_type;
+  using difference_type = ptrdiff_t;
   typedef const value_type& reference;
-  typedef typename _NodeTypes::__const_map_value_type_pointer pointer;
+  using pointer = typename _HashIterator::pointer;
 
   _LIBCPP_HIDE_FROM_ABI __hash_map_const_iterator() _NOEXCEPT {}
 
@@ -972,9 +904,6 @@ private:
 
   __table __table_;
 
-  typedef typename __table::_NodeTypes _NodeTypes;
-  typedef typename __table::__node_pointer __node_pointer;
-  typedef typename __table::__node_const_pointer __node_const_pointer;
   typedef typename __table::__node_traits __node_traits;
   typedef typename __table::__node_allocator __node_allocator;
   typedef typename __table::__node __node;
@@ -1046,10 +975,10 @@ public:
 #  endif
 
   _LIBCPP_HIDE_FROM_ABI explicit unordered_map(const allocator_type& __a);
-  _LIBCPP_HIDE_FROM_ABI unordered_map(const unordered_map& __u);
+  _LIBCPP_HIDE_FROM_ABI unordered_map(const unordered_map& __u) = default;
   _LIBCPP_HIDE_FROM_ABI unordered_map(const unordered_map& __u, const allocator_type& __a);
 #  ifndef _LIBCPP_CXX03_LANG
-  _LIBCPP_HIDE_FROM_ABI unordered_map(unordered_map&& __u) _NOEXCEPT_(is_nothrow_move_constructible<__table>::value);
+  _LIBCPP_HIDE_FROM_ABI unordered_map(unordered_map&& __u) = default;
   _LIBCPP_HIDE_FROM_ABI unordered_map(unordered_map&& __u, const allocator_type& __a);
   _LIBCPP_HIDE_FROM_ABI unordered_map(initializer_list<value_type> __il);
   _LIBCPP_HIDE_FROM_ABI
@@ -1099,41 +1028,26 @@ public:
     static_assert(sizeof(std::__diagnose_unordered_container_requirements<_Key, _Hash, _Pred>(0)), "");
   }
 
-  _LIBCPP_HIDE_FROM_ABI unordered_map& operator=(const unordered_map& __u) {
+  _LIBCPP_HIDE_FROM_ABI unordered_map& operator=(const unordered_map& __u) = default;
 #  ifndef _LIBCPP_CXX03_LANG
-    __table_ = __u.__table_;
-#  else
-    if (this != std::addressof(__u)) {
-      __table_.clear();
-      __table_.hash_function()   = __u.__table_.hash_function();
-      __table_.key_eq()          = __u.__table_.key_eq();
-      __table_.max_load_factor() = __u.__table_.max_load_factor();
-      __table_.__copy_assign_alloc(__u.__table_);
-      insert(__u.begin(), __u.end());
-    }
-#  endif
-    return *this;
-  }
-#  ifndef _LIBCPP_CXX03_LANG
-  _LIBCPP_HIDE_FROM_ABI unordered_map& operator=(unordered_map&& __u)
-      _NOEXCEPT_(is_nothrow_move_assignable<__table>::value);
+  _LIBCPP_HIDE_FROM_ABI unordered_map& operator=(unordered_map&& __u) = default;
   _LIBCPP_HIDE_FROM_ABI unordered_map& operator=(initializer_list<value_type> __il);
 #  endif // _LIBCPP_CXX03_LANG
 
-  _LIBCPP_HIDE_FROM_ABI allocator_type get_allocator() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI allocator_type get_allocator() const _NOEXCEPT {
     return allocator_type(__table_.__node_alloc());
   }
 
   [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __table_.size() == 0; }
-  _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __table_.size(); }
-  _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { return __table_.max_size(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __table_.size(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { return __table_.max_size(); }
 
-  _LIBCPP_HIDE_FROM_ABI iterator begin() _NOEXCEPT { return __table_.begin(); }
-  _LIBCPP_HIDE_FROM_ABI iterator end() _NOEXCEPT { return __table_.end(); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator begin() const _NOEXCEPT { return __table_.begin(); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator end() const _NOEXCEPT { return __table_.end(); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator cbegin() const _NOEXCEPT { return __table_.begin(); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator cend() const _NOEXCEPT { return __table_.end(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator begin() _NOEXCEPT { return __table_.begin(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator end() _NOEXCEPT { return __table_.end(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator begin() const _NOEXCEPT { return __table_.begin(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator end() const _NOEXCEPT { return __table_.end(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator cbegin() const _NOEXCEPT { return __table_.begin(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator cend() const _NOEXCEPT { return __table_.end(); }
 
   _LIBCPP_HIDE_FROM_ABI pair<iterator, bool> insert(const value_type& __x) { return __table_.__emplace_unique(__x); }
 
@@ -1187,14 +1101,13 @@ public:
 #  if _LIBCPP_STD_VER >= 17
   template <class... _Args>
   _LIBCPP_HIDE_FROM_ABI pair<iterator, bool> try_emplace(const key_type& __k, _Args&&... __args) {
-    return __table_.__emplace_unique_key_args(
-        __k, piecewise_construct, std::forward_as_tuple(__k), std::forward_as_tuple(std::forward<_Args>(__args)...));
+    return __table_.__emplace_unique(
+        piecewise_construct, std::forward_as_tuple(__k), std::forward_as_tuple(std::forward<_Args>(__args)...));
   }
 
   template <class... _Args>
   _LIBCPP_HIDE_FROM_ABI pair<iterator, bool> try_emplace(key_type&& __k, _Args&&... __args) {
-    return __table_.__emplace_unique_key_args(
-        __k,
+    return __table_.__emplace_unique(
         piecewise_construct,
         std::forward_as_tuple(std::move(__k)),
         std::forward_as_tuple(std::forward<_Args>(__args)...));
@@ -1212,7 +1125,7 @@ public:
 
   template <class _Vp>
   _LIBCPP_HIDE_FROM_ABI pair<iterator, bool> insert_or_assign(const key_type& __k, _Vp&& __v) {
-    pair<iterator, bool> __res = __table_.__emplace_unique_key_args(__k, __k, std::forward<_Vp>(__v));
+    pair<iterator, bool> __res = __table_.__emplace_unique(__k, std::forward<_Vp>(__v));
     if (!__res.second) {
       __res.first->second = std::forward<_Vp>(__v);
     }
@@ -1221,7 +1134,7 @@ public:
 
   template <class _Vp>
   _LIBCPP_HIDE_FROM_ABI pair<iterator, bool> insert_or_assign(key_type&& __k, _Vp&& __v) {
-    pair<iterator, bool> __res = __table_.__emplace_unique_key_args(__k, std::move(__k), std::forward<_Vp>(__v));
+    pair<iterator, bool> __res = __table_.__emplace_unique(std::move(__k), std::forward<_Vp>(__v));
     if (!__res.second) {
       __res.first->second = std::forward<_Vp>(__v);
     }
@@ -1258,10 +1171,10 @@ public:
                                         "node_type with incompatible allocator passed to unordered_map::insert()");
     return __table_.template __node_handle_insert_unique<node_type>(__hint.__i_, std::move(__nh));
   }
-  _LIBCPP_HIDE_FROM_ABI node_type extract(key_type const& __key) {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI node_type extract(key_type const& __key) {
     return __table_.template __node_handle_extract<node_type>(__key);
   }
-  _LIBCPP_HIDE_FROM_ABI node_type extract(const_iterator __it) {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI node_type extract(const_iterator __it) {
     return __table_.template __node_handle_extract<node_type>(__it.__i_);
   }
 
@@ -1295,52 +1208,56 @@ public:
     __table_.swap(__u.__table_);
   }
 
-  _LIBCPP_HIDE_FROM_ABI hasher hash_function() const { return __table_.hash_function().hash_function(); }
-  _LIBCPP_HIDE_FROM_ABI key_equal key_eq() const { return __table_.key_eq().key_eq(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI hasher hash_function() const {
+    return __table_.hash_function().hash_function();
+  }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI key_equal key_eq() const { return __table_.key_eq().key_eq(); }
 
-  _LIBCPP_HIDE_FROM_ABI iterator find(const key_type& __k) { return __table_.find(__k); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator find(const key_type& __k) const { return __table_.find(__k); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator find(const key_type& __k) { return __table_.find(__k); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator find(const key_type& __k) const { return __table_.find(__k); }
 #  if _LIBCPP_STD_VER >= 20
   template <class _K2, enable_if_t<__is_transparent_v<hasher, _K2> && __is_transparent_v<key_equal, _K2>>* = nullptr>
-  _LIBCPP_HIDE_FROM_ABI iterator find(const _K2& __k) {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI iterator find(const _K2& __k) {
     return __table_.find(__k);
   }
   template <class _K2, enable_if_t<__is_transparent_v<hasher, _K2> && __is_transparent_v<key_equal, _K2>>* = nullptr>
-  _LIBCPP_HIDE_FROM_ABI const_iterator find(const _K2& __k) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI const_iterator find(const _K2& __k) const {
     return __table_.find(__k);
   }
 #  endif // _LIBCPP_STD_VER >= 20
 
-  _LIBCPP_HIDE_FROM_ABI size_type count(const key_type& __k) const { return __table_.__count_unique(__k); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type count(const key_type& __k) const {
+    return __table_.__count_unique(__k);
+  }
 #  if _LIBCPP_STD_VER >= 20
   template <class _K2, enable_if_t<__is_transparent_v<hasher, _K2> && __is_transparent_v<key_equal, _K2>>* = nullptr>
-  _LIBCPP_HIDE_FROM_ABI size_type count(const _K2& __k) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI size_type count(const _K2& __k) const {
     return __table_.__count_unique(__k);
   }
 #  endif // _LIBCPP_STD_VER >= 20
 
 #  if _LIBCPP_STD_VER >= 20
-  _LIBCPP_HIDE_FROM_ABI bool contains(const key_type& __k) const { return find(__k) != end(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool contains(const key_type& __k) const { return find(__k) != end(); }
 
   template <class _K2, enable_if_t<__is_transparent_v<hasher, _K2> && __is_transparent_v<key_equal, _K2>>* = nullptr>
-  _LIBCPP_HIDE_FROM_ABI bool contains(const _K2& __k) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool contains(const _K2& __k) const {
     return find(__k) != end();
   }
 #  endif // _LIBCPP_STD_VER >= 20
 
-  _LIBCPP_HIDE_FROM_ABI pair<iterator, iterator> equal_range(const key_type& __k) {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI pair<iterator, iterator> equal_range(const key_type& __k) {
     return __table_.__equal_range_unique(__k);
   }
-  _LIBCPP_HIDE_FROM_ABI pair<const_iterator, const_iterator> equal_range(const key_type& __k) const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI pair<const_iterator, const_iterator> equal_range(const key_type& __k) const {
     return __table_.__equal_range_unique(__k);
   }
 #  if _LIBCPP_STD_VER >= 20
   template <class _K2, enable_if_t<__is_transparent_v<hasher, _K2> && __is_transparent_v<key_equal, _K2>>* = nullptr>
-  _LIBCPP_HIDE_FROM_ABI pair<iterator, iterator> equal_range(const _K2& __k) {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI pair<iterator, iterator> equal_range(const _K2& __k) {
     return __table_.__equal_range_unique(__k);
   }
   template <class _K2, enable_if_t<__is_transparent_v<hasher, _K2> && __is_transparent_v<key_equal, _K2>>* = nullptr>
-  _LIBCPP_HIDE_FROM_ABI pair<const_iterator, const_iterator> equal_range(const _K2& __k) const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI pair<const_iterator, const_iterator> equal_range(const _K2& __k) const {
     return __table_.__equal_range_unique(__k);
   }
 #  endif // _LIBCPP_STD_VER >= 20
@@ -1350,24 +1267,32 @@ public:
   _LIBCPP_HIDE_FROM_ABI mapped_type& operator[](key_type&& __k);
 #  endif
 
-  _LIBCPP_HIDE_FROM_ABI mapped_type& at(const key_type& __k);
-  _LIBCPP_HIDE_FROM_ABI const mapped_type& at(const key_type& __k) const;
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI mapped_type& at(const key_type& __k);
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const mapped_type& at(const key_type& __k) const;
 
-  _LIBCPP_HIDE_FROM_ABI size_type bucket_count() const _NOEXCEPT { return __table_.bucket_count(); }
-  _LIBCPP_HIDE_FROM_ABI size_type max_bucket_count() const _NOEXCEPT { return __table_.max_bucket_count(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type bucket_count() const _NOEXCEPT { return __table_.bucket_count(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type max_bucket_count() const _NOEXCEPT {
+    return __table_.max_bucket_count();
+  }
 
-  _LIBCPP_HIDE_FROM_ABI size_type bucket_size(size_type __n) const { return __table_.bucket_size(__n); }
-  _LIBCPP_HIDE_FROM_ABI size_type bucket(const key_type& __k) const { return __table_.bucket(__k); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type bucket_size(size_type __n) const {
+    return __table_.bucket_size(__n);
+  }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type bucket(const key_type& __k) const { return __table_.bucket(__k); }
 
-  _LIBCPP_HIDE_FROM_ABI local_iterator begin(size_type __n) { return __table_.begin(__n); }
-  _LIBCPP_HIDE_FROM_ABI local_iterator end(size_type __n) { return __table_.end(__n); }
-  _LIBCPP_HIDE_FROM_ABI const_local_iterator begin(size_type __n) const { return __table_.cbegin(__n); }
-  _LIBCPP_HIDE_FROM_ABI const_local_iterator end(size_type __n) const { return __table_.cend(__n); }
-  _LIBCPP_HIDE_FROM_ABI const_local_iterator cbegin(size_type __n) const { return __table_.cbegin(__n); }
-  _LIBCPP_HIDE_FROM_ABI const_local_iterator cend(size_type __n) const { return __table_.cend(__n); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI local_iterator begin(size_type __n) { return __table_.begin(__n); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI local_iterator end(size_type __n) { return __table_.end(__n); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_local_iterator begin(size_type __n) const {
+    return __table_.cbegin(__n);
+  }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_local_iterator end(size_type __n) const { return __table_.cend(__n); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_local_iterator cbegin(size_type __n) const {
+    return __table_.cbegin(__n);
+  }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_local_iterator cend(size_type __n) const { return __table_.cend(__n); }
 
-  _LIBCPP_HIDE_FROM_ABI float load_factor() const _NOEXCEPT { return __table_.load_factor(); }
-  _LIBCPP_HIDE_FROM_ABI float max_load_factor() const _NOEXCEPT { return __table_.max_load_factor(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI float load_factor() const _NOEXCEPT { return __table_.load_factor(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI float max_load_factor() const _NOEXCEPT { return __table_.max_load_factor(); }
   _LIBCPP_HIDE_FROM_ABI void max_load_factor(float __mlf) { __table_.max_load_factor(__mlf); }
   _LIBCPP_HIDE_FROM_ABI void rehash(size_type __n) { __table_.__rehash_unique(__n); }
   _LIBCPP_HIDE_FROM_ABI void reserve(size_type __n) { __table_.__reserve_unique(__n); }
@@ -1384,10 +1309,10 @@ template <class _InputIterator,
           class _Pred      = equal_to<__iter_key_type<_InputIterator>>,
           class _Allocator = allocator<__iter_to_alloc_type<_InputIterator>>,
           class            = enable_if_t<__has_input_iterator_category<_InputIterator>::value>,
-          class            = enable_if_t<!__is_allocator<_Hash>::value>,
+          class            = enable_if_t<!__is_allocator_v<_Hash>>,
           class            = enable_if_t<!is_integral<_Hash>::value>,
-          class            = enable_if_t<!__is_allocator<_Pred>::value>,
-          class            = enable_if_t<__is_allocator<_Allocator>::value>>
+          class            = enable_if_t<!__is_allocator_v<_Pred>>,
+          class            = enable_if_t<__is_allocator_v<_Allocator>>>
 unordered_map(_InputIterator,
               _InputIterator,
               typename allocator_traits<_Allocator>::size_type = 0,
@@ -1401,10 +1326,10 @@ template <ranges::input_range _Range,
           class _Hash      = hash<__range_key_type<_Range>>,
           class _Pred      = equal_to<__range_key_type<_Range>>,
           class _Allocator = allocator<__range_to_alloc_type<_Range>>,
-          class            = enable_if_t<!__is_allocator<_Hash>::value>,
+          class            = enable_if_t<!__is_allocator_v<_Hash>>,
           class            = enable_if_t<!is_integral<_Hash>::value>,
-          class            = enable_if_t<!__is_allocator<_Pred>::value>,
-          class            = enable_if_t<__is_allocator<_Allocator>::value>>
+          class            = enable_if_t<!__is_allocator_v<_Pred>>,
+          class            = enable_if_t<__is_allocator_v<_Allocator>>>
 unordered_map(from_range_t,
               _Range&&,
               typename allocator_traits<_Allocator>::size_type = 0,
@@ -1419,10 +1344,10 @@ template <class _Key,
           class _Hash      = hash<remove_const_t<_Key>>,
           class _Pred      = equal_to<remove_const_t<_Key>>,
           class _Allocator = allocator<pair<const _Key, _Tp>>,
-          class            = enable_if_t<!__is_allocator<_Hash>::value>,
+          class            = enable_if_t<!__is_allocator_v<_Hash>>,
           class            = enable_if_t<!is_integral<_Hash>::value>,
-          class            = enable_if_t<!__is_allocator<_Pred>::value>,
-          class            = enable_if_t<__is_allocator<_Allocator>::value>>
+          class            = enable_if_t<!__is_allocator_v<_Pred>>,
+          class            = enable_if_t<__is_allocator_v<_Allocator>>>
 unordered_map(initializer_list<pair<_Key, _Tp>>,
               typename allocator_traits<_Allocator>::size_type = 0,
               _Hash                                            = _Hash(),
@@ -1432,7 +1357,7 @@ unordered_map(initializer_list<pair<_Key, _Tp>>,
 template <class _InputIterator,
           class _Allocator,
           class = enable_if_t<__has_input_iterator_category<_InputIterator>::value>,
-          class = enable_if_t<__is_allocator<_Allocator>::value>>
+          class = enable_if_t<__is_allocator_v<_Allocator>>>
 unordered_map(_InputIterator, _InputIterator, typename allocator_traits<_Allocator>::size_type, _Allocator)
     -> unordered_map<__iter_key_type<_InputIterator>,
                      __iter_mapped_type<_InputIterator>,
@@ -1443,7 +1368,7 @@ unordered_map(_InputIterator, _InputIterator, typename allocator_traits<_Allocat
 template <class _InputIterator,
           class _Allocator,
           class = enable_if_t<__has_input_iterator_category<_InputIterator>::value>,
-          class = enable_if_t<__is_allocator<_Allocator>::value>>
+          class = enable_if_t<__is_allocator_v<_Allocator>>>
 unordered_map(_InputIterator, _InputIterator, _Allocator)
     -> unordered_map<__iter_key_type<_InputIterator>,
                      __iter_mapped_type<_InputIterator>,
@@ -1455,9 +1380,9 @@ template <class _InputIterator,
           class _Hash,
           class _Allocator,
           class = enable_if_t<__has_input_iterator_category<_InputIterator>::value>,
-          class = enable_if_t<!__is_allocator<_Hash>::value>,
+          class = enable_if_t<!__is_allocator_v<_Hash>>,
           class = enable_if_t<!is_integral<_Hash>::value>,
-          class = enable_if_t<__is_allocator<_Allocator>::value>>
+          class = enable_if_t<__is_allocator_v<_Allocator>>>
 unordered_map(_InputIterator, _InputIterator, typename allocator_traits<_Allocator>::size_type, _Hash, _Allocator)
     -> unordered_map<__iter_key_type<_InputIterator>,
                      __iter_mapped_type<_InputIterator>,
@@ -1467,7 +1392,7 @@ unordered_map(_InputIterator, _InputIterator, typename allocator_traits<_Allocat
 
 #    if _LIBCPP_STD_VER >= 23
 
-template <ranges::input_range _Range, class _Allocator, class = enable_if_t<__is_allocator<_Allocator>::value>>
+template <ranges::input_range _Range, class _Allocator, class = enable_if_t<__is_allocator_v<_Allocator>>>
 unordered_map(from_range_t, _Range&&, typename allocator_traits<_Allocator>::size_type, _Allocator)
     -> unordered_map<__range_key_type<_Range>,
                      __range_mapped_type<_Range>,
@@ -1475,7 +1400,7 @@ unordered_map(from_range_t, _Range&&, typename allocator_traits<_Allocator>::siz
                      equal_to<__range_key_type<_Range>>,
                      _Allocator>;
 
-template <ranges::input_range _Range, class _Allocator, class = enable_if_t<__is_allocator<_Allocator>::value>>
+template <ranges::input_range _Range, class _Allocator, class = enable_if_t<__is_allocator_v<_Allocator>>>
 unordered_map(from_range_t, _Range&&, _Allocator)
     -> unordered_map<__range_key_type<_Range>,
                      __range_mapped_type<_Range>,
@@ -1486,9 +1411,9 @@ unordered_map(from_range_t, _Range&&, _Allocator)
 template <ranges::input_range _Range,
           class _Hash,
           class _Allocator,
-          class = enable_if_t<!__is_allocator<_Hash>::value>,
+          class = enable_if_t<!__is_allocator_v<_Hash>>,
           class = enable_if_t<!is_integral<_Hash>::value>,
-          class = enable_if_t<__is_allocator<_Allocator>::value>>
+          class = enable_if_t<__is_allocator_v<_Allocator>>>
 unordered_map(from_range_t, _Range&&, typename allocator_traits<_Allocator>::size_type, _Hash, _Allocator)
     -> unordered_map<__range_key_type<_Range>,
                      __range_mapped_type<_Range>,
@@ -1498,11 +1423,11 @@ unordered_map(from_range_t, _Range&&, typename allocator_traits<_Allocator>::siz
 
 #    endif
 
-template <class _Key, class _Tp, class _Allocator, class = enable_if_t<__is_allocator<_Allocator>::value>>
+template <class _Key, class _Tp, class _Allocator, class = enable_if_t<__is_allocator_v<_Allocator>>>
 unordered_map(initializer_list<pair<_Key, _Tp>>, typename allocator_traits<_Allocator>::size_type, _Allocator)
     -> unordered_map<remove_const_t<_Key>, _Tp, hash<remove_const_t<_Key>>, equal_to<remove_const_t<_Key>>, _Allocator>;
 
-template <class _Key, class _Tp, class _Allocator, class = enable_if_t<__is_allocator<_Allocator>::value>>
+template <class _Key, class _Tp, class _Allocator, class = enable_if_t<__is_allocator_v<_Allocator>>>
 unordered_map(initializer_list<pair<_Key, _Tp>>, _Allocator)
     -> unordered_map<remove_const_t<_Key>, _Tp, hash<remove_const_t<_Key>>, equal_to<remove_const_t<_Key>>, _Allocator>;
 
@@ -1510,9 +1435,9 @@ template <class _Key,
           class _Tp,
           class _Hash,
           class _Allocator,
-          class = enable_if_t<!__is_allocator<_Hash>::value>,
+          class = enable_if_t<!__is_allocator_v<_Hash>>,
           class = enable_if_t<!is_integral<_Hash>::value>,
-          class = enable_if_t<__is_allocator<_Allocator>::value>>
+          class = enable_if_t<__is_allocator_v<_Allocator>>>
 unordered_map(initializer_list<pair<_Key, _Tp>>, typename allocator_traits<_Allocator>::size_type, _Hash, _Allocator)
     -> unordered_map<remove_const_t<_Key>, _Tp, _Hash, equal_to<remove_const_t<_Key>>, _Allocator>;
 #  endif
@@ -1563,12 +1488,6 @@ unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_map(
   insert(__first, __last);
 }
 
-template <class _Key, class _Tp, class _Hash, class _Pred, class _Alloc>
-unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_map(const unordered_map& __u) : __table_(__u.__table_) {
-  __table_.__rehash_unique(__u.bucket_count());
-  insert(__u.begin(), __u.end());
-}
-
 template <class _Key, class _Tp, class _Hash, class _Pred, class _Alloc>
 unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_map(const unordered_map& __u, const allocator_type& __a)
     : __table_(__u.__table_, typename __table::allocator_type(__a)) {
@@ -1578,11 +1497,6 @@ unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_map(const unordered_ma
 
 #  ifndef _LIBCPP_CXX03_LANG
 
-template <class _Key, class _Tp, class _Hash, class _Pred, class _Alloc>
-inline unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_map(unordered_map&& __u)
-    _NOEXCEPT_(is_nothrow_move_constructible<__table>::value)
-    : __table_(std::move(__u.__table_)) {}
-
 template <class _Key, class _Tp, class _Hash, class _Pred, class _Alloc>
 unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_map(unordered_map&& __u, const allocator_type& __a)
     : __table_(std::move(__u.__table_), typename __table::allocator_type(__a)) {
@@ -1618,14 +1532,6 @@ unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_map(
   insert(__il.begin(), __il.end());
 }
 
-template <class _Key, class _Tp, class _Hash, class _Pred, class _Alloc>
-inline unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>&
-unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::operator=(unordered_map&& __u)
-    _NOEXCEPT_(is_nothrow_move_assignable<__table>::value) {
-  __table_ = std::move(__u.__table_);
-  return *this;
-}
-
 template <class _Key, class _Tp, class _Hash, class _Pred, class _Alloc>
 inline unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>&
 unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::operator=(initializer_list<value_type> __il) {
@@ -1646,16 +1552,13 @@ inline void unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::insert(_InputIterato
 
 template <class _Key, class _Tp, class _Hash, class _Pred, class _Alloc>
 _Tp& unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::operator[](const key_type& __k) {
-  return __table_
-      .__emplace_unique_key_args(__k, piecewise_construct, std::forward_as_tuple(__k), std::forward_as_tuple())
+  return __table_.__emplace_unique(piecewise_construct, std::forward_as_tuple(__k), std::forward_as_tuple())
       .first->second;
 }
 
 template <class _Key, class _Tp, class _Hash, class _Pred, class _Alloc>
 _Tp& unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::operator[](key_type&& __k) {
-  return __table_
-      .__emplace_unique_key_args(
-          __k, piecewise_construct, std::forward_as_tuple(std::move(__k)), std::forward_as_tuple())
+  return __table_.__emplace_unique(piecewise_construct, std::forward_as_tuple(std::move(__k)), std::forward_as_tuple())
       .first->second;
 }
 #  else // _LIBCPP_CXX03_LANG
@@ -1781,12 +1684,8 @@ private:
 
   __table __table_;
 
-  typedef typename __table::_NodeTypes _NodeTypes;
   typedef typename __table::__node_traits __node_traits;
-  typedef typename __table::__node_allocator __node_allocator;
   typedef typename __table::__node __node;
-  typedef __hash_map_node_destructor<__node_allocator> _Dp;
-  typedef unique_ptr<__node, _Dp> __node_holder;
   typedef allocator_traits<allocator_type> __alloc_traits;
   static_assert(is_same<typename __node_traits::size_type, typename __alloc_traits::size_type>::value,
                 "Allocator uses different size_type for different types");
@@ -1852,11 +1751,10 @@ public:
 #  endif
 
   _LIBCPP_HIDE_FROM_ABI explicit unordered_multimap(const allocator_type& __a);
-  _LIBCPP_HIDE_FROM_ABI unordered_multimap(const unordered_multimap& __u);
+  _LIBCPP_HIDE_FROM_ABI unordered_multimap(const unordered_multimap& __u) = default;
   _LIBCPP_HIDE_FROM_ABI unordered_multimap(const unordered_multimap& __u, const allocator_type& __a);
 #  ifndef _LIBCPP_CXX03_LANG
-  _LIBCPP_HIDE_FROM_ABI unordered_multimap(unordered_multimap&& __u)
-      _NOEXCEPT_(is_nothrow_move_constructible<__table>::value);
+  _LIBCPP_HIDE_FROM_ABI unordered_multimap(unordered_multimap&& __u) = default;
   _LIBCPP_HIDE_FROM_ABI unordered_multimap(unordered_multimap&& __u, const allocator_type& __a);
   _LIBCPP_HIDE_FROM_ABI unordered_multimap(initializer_list<value_type> __il);
   _LIBCPP_HIDE_FROM_ABI unordered_multimap(
@@ -1906,41 +1804,26 @@ public:
     static_assert(sizeof(std::__diagnose_unordered_container_requirements<_Key, _Hash, _Pred>(0)), "");
   }
 
-  _LIBCPP_HIDE_FROM_ABI unordered_multimap& operator=(const unordered_multimap& __u) {
+  _LIBCPP_HIDE_FROM_ABI unordered_multimap& operator=(const unordered_multimap& __u) = default;
 #  ifndef _LIBCPP_CXX03_LANG
-    __table_ = __u.__table_;
-#  else
-    if (this != std::addressof(__u)) {
-      __table_.clear();
-      __table_.hash_function()   = __u.__table_.hash_function();
-      __table_.key_eq()          = __u.__table_.key_eq();
-      __table_.max_load_factor() = __u.__table_.max_load_factor();
-      __table_.__copy_assign_alloc(__u.__table_);
-      insert(__u.begin(), __u.end());
-    }
-#  endif
-    return *this;
-  }
-#  ifndef _LIBCPP_CXX03_LANG
-  _LIBCPP_HIDE_FROM_ABI unordered_multimap& operator=(unordered_multimap&& __u)
-      _NOEXCEPT_(is_nothrow_move_assignable<__table>::value);
+  _LIBCPP_HIDE_FROM_ABI unordered_multimap& operator=(unordered_multimap&& __u) = default;
   _LIBCPP_HIDE_FROM_ABI unordered_multimap& operator=(initializer_list<value_type> __il);
 #  endif // _LIBCPP_CXX03_LANG
 
-  _LIBCPP_HIDE_FROM_ABI allocator_type get_allocator() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI allocator_type get_allocator() const _NOEXCEPT {
     return allocator_type(__table_.__node_alloc());
   }
 
   [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __table_.size() == 0; }
-  _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __table_.size(); }
-  _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { return __table_.max_size(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __table_.size(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { return __table_.max_size(); }
 
-  _LIBCPP_HIDE_FROM_ABI iterator begin() _NOEXCEPT { return __table_.begin(); }
-  _LIBCPP_HIDE_FROM_ABI iterator end() _NOEXCEPT { return __table_.end(); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator begin() const _NOEXCEPT { return __table_.begin(); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator end() const _NOEXCEPT { return __table_.end(); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator cbegin() const _NOEXCEPT { return __table_.begin(); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator cend() const _NOEXCEPT { return __table_.end(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator begin() _NOEXCEPT { return __table_.begin(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator end() _NOEXCEPT { return __table_.end(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator begin() const _NOEXCEPT { return __table_.begin(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator end() const _NOEXCEPT { return __table_.end(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator cbegin() const _NOEXCEPT { return __table_.begin(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator cend() const _NOEXCEPT { return __table_.end(); }
 
   _LIBCPP_HIDE_FROM_ABI iterator insert(const value_type& __x) { return __table_.__emplace_multi(__x); }
 
@@ -2008,10 +1891,10 @@ public:
                                         "node_type with incompatible allocator passed to unordered_multimap::insert()");
     return __table_.template __node_handle_insert_multi<node_type>(__hint.__i_, std::move(__nh));
   }
-  _LIBCPP_HIDE_FROM_ABI node_type extract(key_type const& __key) {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI node_type extract(key_type const& __key) {
     return __table_.template __node_handle_extract<node_type>(__key);
   }
-  _LIBCPP_HIDE_FROM_ABI node_type extract(const_iterator __it) {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI node_type extract(const_iterator __it) {
     return __table_.template __node_handle_extract<node_type>(__it.__i_);
   }
 
@@ -2045,71 +1928,83 @@ public:
     __table_.swap(__u.__table_);
   }
 
-  _LIBCPP_HIDE_FROM_ABI hasher hash_function() const { return __table_.hash_function().hash_function(); }
-  _LIBCPP_HIDE_FROM_ABI key_equal key_eq() const { return __table_.key_eq().key_eq(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI hasher hash_function() const {
+    return __table_.hash_function().hash_function();
+  }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI key_equal key_eq() const { return __table_.key_eq().key_eq(); }
 
-  _LIBCPP_HIDE_FROM_ABI iterator find(const key_type& __k) { return __table_.find(__k); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator find(const key_type& __k) const { return __table_.find(__k); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator find(const key_type& __k) { return __table_.find(__k); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator find(const key_type& __k) const { return __table_.find(__k); }
 #  if _LIBCPP_STD_VER >= 20
   template <class _K2, enable_if_t<__is_transparent_v<hasher, _K2> && __is_transparent_v<key_equal, _K2>>* = nullptr>
-  _LIBCPP_HIDE_FROM_ABI iterator find(const _K2& __k) {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator find(const _K2& __k) {
     return __table_.find(__k);
   }
   template <class _K2, enable_if_t<__is_transparent_v<hasher, _K2> && __is_transparent_v<key_equal, _K2>>* = nullptr>
-  _LIBCPP_HIDE_FROM_ABI const_iterator find(const _K2& __k) const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator find(const _K2& __k) const {
     return __table_.find(__k);
   }
 #  endif // _LIBCPP_STD_VER >= 20
 
-  _LIBCPP_HIDE_FROM_ABI size_type count(const key_type& __k) const { return __table_.__count_multi(__k); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type count(const key_type& __k) const {
+    return __table_.__count_multi(__k);
+  }
 #  if _LIBCPP_STD_VER >= 20
   template <class _K2, enable_if_t<__is_transparent_v<hasher, _K2> && __is_transparent_v<key_equal, _K2>>* = nullptr>
-  _LIBCPP_HIDE_FROM_ABI size_type count(const _K2& __k) const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type count(const _K2& __k) const {
     return __table_.__count_multi(__k);
   }
 #  endif // _LIBCPP_STD_VER >= 20
 
 #  if _LIBCPP_STD_VER >= 20
-  _LIBCPP_HIDE_FROM_ABI bool contains(const key_type& __k) const { return find(__k) != end(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool contains(const key_type& __k) const { return find(__k) != end(); }
 
   template <class _K2, enable_if_t<__is_transparent_v<hasher, _K2> && __is_transparent_v<key_equal, _K2>>* = nullptr>
-  _LIBCPP_HIDE_FROM_ABI bool contains(const _K2& __k) const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool contains(const _K2& __k) const {
     return find(__k) != end();
   }
 #  endif // _LIBCPP_STD_VER >= 20
 
-  _LIBCPP_HIDE_FROM_ABI pair<iterator, iterator> equal_range(const key_type& __k) {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI pair<iterator, iterator> equal_range(const key_type& __k) {
     return __table_.__equal_range_multi(__k);
   }
-  _LIBCPP_HIDE_FROM_ABI pair<const_iterator, const_iterator> equal_range(const key_type& __k) const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI pair<const_iterator, const_iterator> equal_range(const key_type& __k) const {
     return __table_.__equal_range_multi(__k);
   }
 #  if _LIBCPP_STD_VER >= 20
   template <class _K2, enable_if_t<__is_transparent_v<hasher, _K2> && __is_transparent_v<key_equal, _K2>>* = nullptr>
-  _LIBCPP_HIDE_FROM_ABI pair<iterator, iterator> equal_range(const _K2& __k) {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI pair<iterator, iterator> equal_range(const _K2& __k) {
     return __table_.__equal_range_multi(__k);
   }
   template <class _K2, enable_if_t<__is_transparent_v<hasher, _K2> && __is_transparent_v<key_equal, _K2>>* = nullptr>
-  _LIBCPP_HIDE_FROM_ABI pair<const_iterator, const_iterator> equal_range(const _K2& __k) const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI pair<const_iterator, const_iterator> equal_range(const _K2& __k) const {
     return __table_.__equal_range_multi(__k);
   }
 #  endif // _LIBCPP_STD_VER >= 20
 
-  _LIBCPP_HIDE_FROM_ABI size_type bucket_count() const _NOEXCEPT { return __table_.bucket_count(); }
-  _LIBCPP_HIDE_FROM_ABI size_type max_bucket_count() const _NOEXCEPT { return __table_.max_bucket_count(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type bucket_count() const _NOEXCEPT { return __table_.bucket_count(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type max_bucket_count() const _NOEXCEPT {
+    return __table_.max_bucket_count();
+  }
 
-  _LIBCPP_HIDE_FROM_ABI size_type bucket_size(size_type __n) const { return __table_.bucket_size(__n); }
-  _LIBCPP_HIDE_FROM_ABI size_type bucket(const key_type& __k) const { return __table_.bucket(__k); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type bucket_size(size_type __n) const {
+    return __table_.bucket_size(__n);
+  }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type bucket(const key_type& __k) const { return __table_.bucket(__k); }
 
-  _LIBCPP_HIDE_FROM_ABI local_iterator begin(size_type __n) { return __table_.begin(__n); }
-  _LIBCPP_HIDE_FROM_ABI local_iterator end(size_type __n) { return __table_.end(__n); }
-  _LIBCPP_HIDE_FROM_ABI const_local_iterator begin(size_type __n) const { return __table_.cbegin(__n); }
-  _LIBCPP_HIDE_FROM_ABI const_local_iterator end(size_type __n) const { return __table_.cend(__n); }
-  _LIBCPP_HIDE_FROM_ABI const_local_iterator cbegin(size_type __n) const { return __table_.cbegin(__n); }
-  _LIBCPP_HIDE_FROM_ABI const_local_iterator cend(size_type __n) const { return __table_.cend(__n); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI local_iterator begin(size_type __n) { return __table_.begin(__n); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI local_iterator end(size_type __n) { return __table_.end(__n); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_local_iterator begin(size_type __n) const {
+    return __table_.cbegin(__n);
+  }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_local_iterator end(size_type __n) const { return __table_.cend(__n); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_local_iterator cbegin(size_type __n) const {
+    return __table_.cbegin(__n);
+  }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_local_iterator cend(size_type __n) const { return __table_.cend(__n); }
 
-  _LIBCPP_HIDE_FROM_ABI float load_factor() const _NOEXCEPT { return __table_.load_factor(); }
-  _LIBCPP_HIDE_FROM_ABI float max_load_factor() const _NOEXCEPT { return __table_.max_load_factor(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI float load_factor() const _NOEXCEPT { return __table_.load_factor(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI float max_load_factor() const _NOEXCEPT { return __table_.max_load_factor(); }
   _LIBCPP_HIDE_FROM_ABI void max_load_factor(float __mlf) { __table_.max_load_factor(__mlf); }
   _LIBCPP_HIDE_FROM_ABI void rehash(size_type __n) { __table_.__rehash_multi(__n); }
   _LIBCPP_HIDE_FROM_ABI void reserve(size_type __n) { __table_.__reserve_multi(__n); }
@@ -2121,10 +2016,10 @@ template <class _InputIterator,
           class _Pred      = equal_to<__iter_key_type<_InputIterator>>,
           class _Allocator = allocator<__iter_to_alloc_type<_InputIterator>>,
           class            = enable_if_t<__has_input_iterator_category<_InputIterator>::value>,
-          class            = enable_if_t<!__is_allocator<_Hash>::value>,
+          class            = enable_if_t<!__is_allocator_v<_Hash>>,
           class            = enable_if_t<!is_integral<_Hash>::value>,
-          class            = enable_if_t<!__is_allocator<_Pred>::value>,
-          class            = enable_if_t<__is_allocator<_Allocator>::value>>
+          class            = enable_if_t<!__is_allocator_v<_Pred>>,
+          class            = enable_if_t<__is_allocator_v<_Allocator>>>
 unordered_multimap(_InputIterator,
                    _InputIterator,
                    typename allocator_traits<_Allocator>::size_type = 0,
@@ -2142,10 +2037,10 @@ template <ranges::input_range _Range,
           class _Hash      = hash<__range_key_type<_Range>>,
           class _Pred      = equal_to<__range_key_type<_Range>>,
           class _Allocator = allocator<__range_to_alloc_type<_Range>>,
-          class            = enable_if_t<!__is_allocator<_Hash>::value>,
+          class            = enable_if_t<!__is_allocator_v<_Hash>>,
           class            = enable_if_t<!is_integral<_Hash>::value>,
-          class            = enable_if_t<!__is_allocator<_Pred>::value>,
-          class            = enable_if_t<__is_allocator<_Allocator>::value>>
+          class            = enable_if_t<!__is_allocator_v<_Pred>>,
+          class            = enable_if_t<__is_allocator_v<_Allocator>>>
 unordered_multimap(from_range_t,
                    _Range&&,
                    typename allocator_traits<_Allocator>::size_type = 0,
@@ -2160,21 +2055,21 @@ template <class _Key,
           class _Hash      = hash<remove_const_t<_Key>>,
           class _Pred      = equal_to<remove_const_t<_Key>>,
           class _Allocator = allocator<pair<const _Key, _Tp>>,
-          class            = enable_if_t<!__is_allocator<_Hash>::value>,
+          class            = enable_if_t<!__is_allocator_v<_Hash>>,
           class            = enable_if_t<!is_integral<_Hash>::value>,
-          class            = enable_if_t<!__is_allocator<_Pred>::value>,
-          class            = enable_if_t<__is_allocator<_Allocator>::value>>
-unordered_multimap(
-    initializer_list<pair<_Key, _Tp>>,
-    typename allocator_traits<_Allocator>::size_type = 0,
-    _Hash                                            = _Hash(),
-    _Pred                                            = _Pred(),
-    _Allocator = _Allocator()) -> unordered_multimap<remove_const_t<_Key>, _Tp, _Hash, _Pred, _Allocator>;
+          class            = enable_if_t<!__is_allocator_v<_Pred>>,
+          class            = enable_if_t<__is_allocator_v<_Allocator>>>
+unordered_multimap(initializer_list<pair<_Key, _Tp>>,
+                   typename allocator_traits<_Allocator>::size_type = 0,
+                   _Hash                                            = _Hash(),
+                   _Pred                                            = _Pred(),
+                   _Allocator                                       = _Allocator())
+    -> unordered_multimap<remove_const_t<_Key>, _Tp, _Hash, _Pred, _Allocator>;
 
 template <class _InputIterator,
           class _Allocator,
           class = enable_if_t<__has_input_iterator_category<_InputIterator>::value>,
-          class = enable_if_t<__is_allocator<_Allocator>::value>>
+          class = enable_if_t<__is_allocator_v<_Allocator>>>
 unordered_multimap(_InputIterator, _InputIterator, typename allocator_traits<_Allocator>::size_type, _Allocator)
     -> unordered_multimap<__iter_key_type<_InputIterator>,
                           __iter_mapped_type<_InputIterator>,
@@ -2185,7 +2080,7 @@ unordered_multimap(_InputIterator, _InputIterator, typename allocator_traits<_Al
 template <class _InputIterator,
           class _Allocator,
           class = enable_if_t<__has_input_iterator_category<_InputIterator>::value>,
-          class = enable_if_t<__is_allocator<_Allocator>::value>>
+          class = enable_if_t<__is_allocator_v<_Allocator>>>
 unordered_multimap(_InputIterator, _InputIterator, _Allocator)
     -> unordered_multimap<__iter_key_type<_InputIterator>,
                           __iter_mapped_type<_InputIterator>,
@@ -2197,9 +2092,9 @@ template <class _InputIterator,
           class _Hash,
           class _Allocator,
           class = enable_if_t<__has_input_iterator_category<_InputIterator>::value>,
-          class = enable_if_t<!__is_allocator<_Hash>::value>,
+          class = enable_if_t<!__is_allocator_v<_Hash>>,
           class = enable_if_t<!is_integral<_Hash>::value>,
-          class = enable_if_t<__is_allocator<_Allocator>::value>>
+          class = enable_if_t<__is_allocator_v<_Allocator>>>
 unordered_multimap(_InputIterator, _InputIterator, typename allocator_traits<_Allocator>::size_type, _Hash, _Allocator)
     -> unordered_multimap<__iter_key_type<_InputIterator>,
                           __iter_mapped_type<_InputIterator>,
@@ -2209,7 +2104,7 @@ unordered_multimap(_InputIterator, _InputIterator, typename allocator_traits<_Al
 
 #    if _LIBCPP_STD_VER >= 23
 
-template <ranges::input_range _Range, class _Allocator, class = enable_if_t<__is_allocator<_Allocator>::value>>
+template <ranges::input_range _Range, class _Allocator, class = enable_if_t<__is_allocator_v<_Allocator>>>
 unordered_multimap(from_range_t, _Range&&, typename allocator_traits<_Allocator>::size_type, _Allocator)
     -> unordered_multimap<__range_key_type<_Range>,
                           __range_mapped_type<_Range>,
@@ -2217,7 +2112,7 @@ unordered_multimap(from_range_t, _Range&&, typename allocator_traits<_Allocator>
                           equal_to<__range_key_type<_Range>>,
                           _Allocator>;
 
-template <ranges::input_range _Range, class _Allocator, class = enable_if_t<__is_allocator<_Allocator>::value>>
+template <ranges::input_range _Range, class _Allocator, class = enable_if_t<__is_allocator_v<_Allocator>>>
 unordered_multimap(from_range_t, _Range&&, _Allocator)
     -> unordered_multimap<__range_key_type<_Range>,
                           __range_mapped_type<_Range>,
@@ -2228,9 +2123,9 @@ unordered_multimap(from_range_t, _Range&&, _Allocator)
 template <ranges::input_range _Range,
           class _Hash,
           class _Allocator,
-          class = enable_if_t<!__is_allocator<_Hash>::value>,
+          class = enable_if_t<!__is_allocator_v<_Hash>>,
           class = enable_if_t<!is_integral<_Hash>::value>,
-          class = enable_if_t<__is_allocator<_Allocator>::value>>
+          class = enable_if_t<__is_allocator_v<_Allocator>>>
 unordered_multimap(from_range_t, _Range&&, typename allocator_traits<_Allocator>::size_type, _Hash, _Allocator)
     -> unordered_multimap<__range_key_type<_Range>,
                           __range_mapped_type<_Range>,
@@ -2240,7 +2135,7 @@ unordered_multimap(from_range_t, _Range&&, typename allocator_traits<_Allocator>
 
 #    endif
 
-template <class _Key, class _Tp, class _Allocator, class = enable_if_t<__is_allocator<_Allocator>::value>>
+template <class _Key, class _Tp, class _Allocator, class = enable_if_t<__is_allocator_v<_Allocator>>>
 unordered_multimap(initializer_list<pair<_Key, _Tp>>, typename allocator_traits<_Allocator>::size_type, _Allocator)
     -> unordered_multimap<remove_const_t<_Key>,
                           _Tp,
@@ -2248,7 +2143,7 @@ unordered_multimap(initializer_list<pair<_Key, _Tp>>, typename allocator_traits<
                           equal_to<remove_const_t<_Key>>,
                           _Allocator>;
 
-template <class _Key, class _Tp, class _Allocator, class = enable_if_t<__is_allocator<_Allocator>::value>>
+template <class _Key, class _Tp, class _Allocator, class = enable_if_t<__is_allocator_v<_Allocator>>>
 unordered_multimap(initializer_list<pair<_Key, _Tp>>, _Allocator)
     -> unordered_multimap<remove_const_t<_Key>,
                           _Tp,
@@ -2260,9 +2155,9 @@ template <class _Key,
           class _Tp,
           class _Hash,
           class _Allocator,
-          class = enable_if_t<!__is_allocator<_Hash>::value>,
+          class = enable_if_t<!__is_allocator_v<_Hash>>,
           class = enable_if_t<!is_integral<_Hash>::value>,
-          class = enable_if_t<__is_allocator<_Allocator>::value>>
+          class = enable_if_t<__is_allocator_v<_Allocator>>>
 unordered_multimap(
     initializer_list<pair<_Key, _Tp>>, typename allocator_traits<_Allocator>::size_type, _Hash, _Allocator)
     -> unordered_multimap<remove_const_t<_Key>, _Tp, _Hash, equal_to<remove_const_t<_Key>>, _Allocator>;
@@ -2315,13 +2210,6 @@ template <class _Key, class _Tp, class _Hash, class _Pred, class _Alloc>
 inline unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_multimap(const allocator_type& __a)
     : __table_(typename __table::allocator_type(__a)) {}
 
-template <class _Key, class _Tp, class _Hash, class _Pred, class _Alloc>
-unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_multimap(const unordered_multimap& __u)
-    : __table_(__u.__table_) {
-  __table_.__rehash_multi(__u.bucket_count());
-  insert(__u.begin(), __u.end());
-}
-
 template <class _Key, class _Tp, class _Hash, class _Pred, class _Alloc>
 unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_multimap(
     const unordered_multimap& __u, const allocator_type& __a)
@@ -2332,11 +2220,6 @@ unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_multimap(
 
 #  ifndef _LIBCPP_CXX03_LANG
 
-template <class _Key, class _Tp, class _Hash, class _Pred, class _Alloc>
-inline unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_multimap(unordered_multimap&& __u)
-    _NOEXCEPT_(is_nothrow_move_constructible<__table>::value)
-    : __table_(std::move(__u.__table_)) {}
-
 template <class _Key, class _Tp, class _Hash, class _Pred, class _Alloc>
 unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_multimap(
     unordered_multimap&& __u, const allocator_type& __a)
@@ -2373,14 +2256,6 @@ unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_multimap(
   insert(__il.begin(), __il.end());
 }
 
-template <class _Key, class _Tp, class _Hash, class _Pred, class _Alloc>
-inline unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>&
-unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::operator=(unordered_multimap&& __u)
-    _NOEXCEPT_(is_nothrow_move_assignable<__table>::value) {
-  __table_ = std::move(__u.__table_);
-  return *this;
-}
-
 template <class _Key, class _Tp, class _Hash, class _Pred, class _Alloc>
 inline unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>&
 unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::operator=(initializer_list<value_type> __il) {
diff --git a/lib/libcxx/include/unordered_set b/lib/libcxx/include/unordered_set
index 475715db62..760f145091 100644
--- a/lib/libcxx/include/unordered_set
+++ b/lib/libcxx/include/unordered_set
@@ -544,8 +544,6 @@ template <class Value, class Hash, class Pred, class Alloc>
 #  include <__iterator/distance.h>
 #  include <__iterator/erase_if_container.h>
 #  include <__iterator/iterator_traits.h>
-#  include <__iterator/ranges_iterator_traits.h>
-#  include <__memory/addressof.h>
 #  include <__memory/allocator.h>
 #  include <__memory/allocator_traits.h>
 #  include <__memory_resource/polymorphic_allocator.h>
@@ -558,7 +556,6 @@ template <class Value, class Hash, class Pred, class Alloc>
 #  include <__type_traits/invoke.h>
 #  include <__type_traits/is_allocator.h>
 #  include <__type_traits/is_integral.h>
-#  include <__type_traits/is_nothrow_assignable.h>
 #  include <__type_traits/is_nothrow_constructible.h>
 #  include <__type_traits/is_same.h>
 #  include <__type_traits/is_swappable.h>
@@ -703,10 +700,10 @@ public:
 #  endif
 
   _LIBCPP_HIDE_FROM_ABI explicit unordered_set(const allocator_type& __a);
-  _LIBCPP_HIDE_FROM_ABI unordered_set(const unordered_set& __u);
+  _LIBCPP_HIDE_FROM_ABI unordered_set(const unordered_set& __u) = default;
   _LIBCPP_HIDE_FROM_ABI unordered_set(const unordered_set& __u, const allocator_type& __a);
 #  ifndef _LIBCPP_CXX03_LANG
-  _LIBCPP_HIDE_FROM_ABI unordered_set(unordered_set&& __u) _NOEXCEPT_(is_nothrow_move_constructible<__table>::value);
+  _LIBCPP_HIDE_FROM_ABI unordered_set(unordered_set&& __u) = default;
   _LIBCPP_HIDE_FROM_ABI unordered_set(unordered_set&& __u, const allocator_type& __a);
   _LIBCPP_HIDE_FROM_ABI unordered_set(initializer_list<value_type> __il);
   _LIBCPP_HIDE_FROM_ABI
@@ -733,30 +730,26 @@ public:
     static_assert(sizeof(std::__diagnose_unordered_container_requirements<_Value, _Hash, _Pred>(0)), "");
   }
 
-  _LIBCPP_HIDE_FROM_ABI unordered_set& operator=(const unordered_set& __u) {
-    __table_ = __u.__table_;
-    return *this;
-  }
+  _LIBCPP_HIDE_FROM_ABI unordered_set& operator=(const unordered_set& __u) = default;
 #  ifndef _LIBCPP_CXX03_LANG
-  _LIBCPP_HIDE_FROM_ABI unordered_set& operator=(unordered_set&& __u)
-      _NOEXCEPT_(is_nothrow_move_assignable<__table>::value);
+  _LIBCPP_HIDE_FROM_ABI unordered_set& operator=(unordered_set&& __u) = default;
   _LIBCPP_HIDE_FROM_ABI unordered_set& operator=(initializer_list<value_type> __il);
 #  endif // _LIBCPP_CXX03_LANG
 
-  _LIBCPP_HIDE_FROM_ABI allocator_type get_allocator() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI allocator_type get_allocator() const _NOEXCEPT {
     return allocator_type(__table_.__node_alloc());
   }
 
   [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __table_.size() == 0; }
-  _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __table_.size(); }
-  _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { return __table_.max_size(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __table_.size(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { return __table_.max_size(); }
 
-  _LIBCPP_HIDE_FROM_ABI iterator begin() _NOEXCEPT { return __table_.begin(); }
-  _LIBCPP_HIDE_FROM_ABI iterator end() _NOEXCEPT { return __table_.end(); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator begin() const _NOEXCEPT { return __table_.begin(); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator end() const _NOEXCEPT { return __table_.end(); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator cbegin() const _NOEXCEPT { return __table_.begin(); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator cend() const _NOEXCEPT { return __table_.end(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator begin() _NOEXCEPT { return __table_.begin(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator end() _NOEXCEPT { return __table_.end(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator begin() const _NOEXCEPT { return __table_.begin(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator end() const _NOEXCEPT { return __table_.end(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator cbegin() const _NOEXCEPT { return __table_.begin(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator cend() const _NOEXCEPT { return __table_.end(); }
 
 #  ifndef _LIBCPP_CXX03_LANG
   template <class... _Args>
@@ -808,10 +801,10 @@ public:
                                         "node_type with incompatible allocator passed to unordered_set::insert()");
     return __table_.template __node_handle_insert_unique<node_type>(__h, std::move(__nh));
   }
-  _LIBCPP_HIDE_FROM_ABI node_type extract(key_type const& __key) {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI node_type extract(key_type const& __key) {
     return __table_.template __node_handle_extract<node_type>(__key);
   }
-  _LIBCPP_HIDE_FROM_ABI node_type extract(const_iterator __it) {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI node_type extract(const_iterator __it) {
     return __table_.template __node_handle_extract<node_type>(__it);
   }
 
@@ -845,71 +838,81 @@ public:
     __table_.swap(__u.__table_);
   }
 
-  _LIBCPP_HIDE_FROM_ABI hasher hash_function() const { return __table_.hash_function(); }
-  _LIBCPP_HIDE_FROM_ABI key_equal key_eq() const { return __table_.key_eq(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI hasher hash_function() const { return __table_.hash_function(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI key_equal key_eq() const { return __table_.key_eq(); }
 
-  _LIBCPP_HIDE_FROM_ABI iterator find(const key_type& __k) { return __table_.find(__k); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator find(const key_type& __k) const { return __table_.find(__k); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator find(const key_type& __k) { return __table_.find(__k); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator find(const key_type& __k) const { return __table_.find(__k); }
 #  if _LIBCPP_STD_VER >= 20
   template <class _K2, enable_if_t<__is_transparent_v<hasher, _K2> && __is_transparent_v<key_equal, _K2>>* = nullptr>
-  _LIBCPP_HIDE_FROM_ABI iterator find(const _K2& __k) {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI iterator find(const _K2& __k) {
     return __table_.find(__k);
   }
   template <class _K2, enable_if_t<__is_transparent_v<hasher, _K2> && __is_transparent_v<key_equal, _K2>>* = nullptr>
-  _LIBCPP_HIDE_FROM_ABI const_iterator find(const _K2& __k) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI const_iterator find(const _K2& __k) const {
     return __table_.find(__k);
   }
 #  endif // _LIBCPP_STD_VER >= 20
 
-  _LIBCPP_HIDE_FROM_ABI size_type count(const key_type& __k) const { return __table_.__count_unique(__k); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type count(const key_type& __k) const {
+    return __table_.__count_unique(__k);
+  }
 #  if _LIBCPP_STD_VER >= 20
   template <class _K2, enable_if_t<__is_transparent_v<hasher, _K2> && __is_transparent_v<key_equal, _K2>>* = nullptr>
-  _LIBCPP_HIDE_FROM_ABI size_type count(const _K2& __k) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI size_type count(const _K2& __k) const {
     return __table_.__count_unique(__k);
   }
 #  endif // _LIBCPP_STD_VER >= 20
 
 #  if _LIBCPP_STD_VER >= 20
-  _LIBCPP_HIDE_FROM_ABI bool contains(const key_type& __k) const { return find(__k) != end(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool contains(const key_type& __k) const { return find(__k) != end(); }
 
   template <class _K2, enable_if_t<__is_transparent_v<hasher, _K2> && __is_transparent_v<key_equal, _K2>>* = nullptr>
-  _LIBCPP_HIDE_FROM_ABI bool contains(const _K2& __k) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool contains(const _K2& __k) const {
     return find(__k) != end();
   }
 #  endif // _LIBCPP_STD_VER >= 20
 
-  _LIBCPP_HIDE_FROM_ABI pair<iterator, iterator> equal_range(const key_type& __k) {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI pair<iterator, iterator> equal_range(const key_type& __k) {
     return __table_.__equal_range_unique(__k);
   }
-  _LIBCPP_HIDE_FROM_ABI pair<const_iterator, const_iterator> equal_range(const key_type& __k) const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI pair<const_iterator, const_iterator> equal_range(const key_type& __k) const {
     return __table_.__equal_range_unique(__k);
   }
 #  if _LIBCPP_STD_VER >= 20
   template <class _K2, enable_if_t<__is_transparent_v<hasher, _K2> && __is_transparent_v<key_equal, _K2>>* = nullptr>
-  _LIBCPP_HIDE_FROM_ABI pair<iterator, iterator> equal_range(const _K2& __k) {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI pair<iterator, iterator> equal_range(const _K2& __k) {
     return __table_.__equal_range_unique(__k);
   }
   template <class _K2, enable_if_t<__is_transparent_v<hasher, _K2> && __is_transparent_v<key_equal, _K2>>* = nullptr>
-  _LIBCPP_HIDE_FROM_ABI pair<const_iterator, const_iterator> equal_range(const _K2& __k) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI pair<const_iterator, const_iterator> equal_range(const _K2& __k) const {
     return __table_.__equal_range_unique(__k);
   }
 #  endif // _LIBCPP_STD_VER >= 20
 
-  _LIBCPP_HIDE_FROM_ABI size_type bucket_count() const _NOEXCEPT { return __table_.bucket_count(); }
-  _LIBCPP_HIDE_FROM_ABI size_type max_bucket_count() const _NOEXCEPT { return __table_.max_bucket_count(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type bucket_count() const _NOEXCEPT { return __table_.bucket_count(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type max_bucket_count() const _NOEXCEPT {
+    return __table_.max_bucket_count();
+  }
 
-  _LIBCPP_HIDE_FROM_ABI size_type bucket_size(size_type __n) const { return __table_.bucket_size(__n); }
-  _LIBCPP_HIDE_FROM_ABI size_type bucket(const key_type& __k) const { return __table_.bucket(__k); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type bucket_size(size_type __n) const {
+    return __table_.bucket_size(__n);
+  }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type bucket(const key_type& __k) const { return __table_.bucket(__k); }
 
-  _LIBCPP_HIDE_FROM_ABI local_iterator begin(size_type __n) { return __table_.begin(__n); }
-  _LIBCPP_HIDE_FROM_ABI local_iterator end(size_type __n) { return __table_.end(__n); }
-  _LIBCPP_HIDE_FROM_ABI const_local_iterator begin(size_type __n) const { return __table_.cbegin(__n); }
-  _LIBCPP_HIDE_FROM_ABI const_local_iterator end(size_type __n) const { return __table_.cend(__n); }
-  _LIBCPP_HIDE_FROM_ABI const_local_iterator cbegin(size_type __n) const { return __table_.cbegin(__n); }
-  _LIBCPP_HIDE_FROM_ABI const_local_iterator cend(size_type __n) const { return __table_.cend(__n); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI local_iterator begin(size_type __n) { return __table_.begin(__n); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI local_iterator end(size_type __n) { return __table_.end(__n); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_local_iterator begin(size_type __n) const {
+    return __table_.cbegin(__n);
+  }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_local_iterator end(size_type __n) const { return __table_.cend(__n); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_local_iterator cbegin(size_type __n) const {
+    return __table_.cbegin(__n);
+  }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_local_iterator cend(size_type __n) const { return __table_.cend(__n); }
 
-  _LIBCPP_HIDE_FROM_ABI float load_factor() const _NOEXCEPT { return __table_.load_factor(); }
-  _LIBCPP_HIDE_FROM_ABI float max_load_factor() const _NOEXCEPT { return __table_.max_load_factor(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI float load_factor() const _NOEXCEPT { return __table_.load_factor(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI float max_load_factor() const _NOEXCEPT { return __table_.max_load_factor(); }
   _LIBCPP_HIDE_FROM_ABI void max_load_factor(float __mlf) { __table_.max_load_factor(__mlf); }
   _LIBCPP_HIDE_FROM_ABI void rehash(size_type __n) { __table_.__rehash_unique(__n); }
   _LIBCPP_HIDE_FROM_ABI void reserve(size_type __n) { __table_.__reserve_unique(__n); }
@@ -917,47 +920,48 @@ public:
 
 #  if _LIBCPP_STD_VER >= 17
 template <class _InputIterator,
-          class _Hash      = hash<__iter_value_type<_InputIterator>>,
-          class _Pred      = equal_to<__iter_value_type<_InputIterator>>,
-          class _Allocator = allocator<__iter_value_type<_InputIterator>>,
+          class _Hash      = hash<__iterator_value_type<_InputIterator>>,
+          class _Pred      = equal_to<__iterator_value_type<_InputIterator>>,
+          class _Allocator = allocator<__iterator_value_type<_InputIterator>>,
           class            = enable_if_t<__has_input_iterator_category<_InputIterator>::value>,
-          class            = enable_if_t<!__is_allocator<_Hash>::value>,
+          class            = enable_if_t<!__is_allocator_v<_Hash>>,
           class            = enable_if_t<!is_integral<_Hash>::value>,
-          class            = enable_if_t<!__is_allocator<_Pred>::value>,
-          class            = enable_if_t<__is_allocator<_Allocator>::value>>
+          class            = enable_if_t<!__is_allocator_v<_Pred>>,
+          class            = enable_if_t<__is_allocator_v<_Allocator>>>
 unordered_set(_InputIterator,
               _InputIterator,
               typename allocator_traits<_Allocator>::size_type = 0,
               _Hash                                            = _Hash(),
               _Pred                                            = _Pred(),
-              _Allocator = _Allocator()) -> unordered_set<__iter_value_type<_InputIterator>, _Hash, _Pred, _Allocator>;
+              _Allocator                                       = _Allocator())
+    -> unordered_set<__iterator_value_type<_InputIterator>, _Hash, _Pred, _Allocator>;
 
 #    if _LIBCPP_STD_VER >= 23
 template <ranges::input_range _Range,
           class _Hash      = hash<ranges::range_value_t<_Range>>,
           class _Pred      = equal_to<ranges::range_value_t<_Range>>,
           class _Allocator = allocator<ranges::range_value_t<_Range>>,
-          class            = enable_if_t<!__is_allocator<_Hash>::value>,
+          class            = enable_if_t<!__is_allocator_v<_Hash>>,
           class            = enable_if_t<!is_integral<_Hash>::value>,
-          class            = enable_if_t<!__is_allocator<_Pred>::value>,
-          class            = enable_if_t<__is_allocator<_Allocator>::value>>
-unordered_set(
-    from_range_t,
-    _Range&&,
-    typename allocator_traits<_Allocator>::size_type = 0,
-    _Hash                                            = _Hash(),
-    _Pred                                            = _Pred(),
-    _Allocator = _Allocator()) -> unordered_set<ranges::range_value_t<_Range>, _Hash, _Pred, _Allocator>; // C++23
+          class            = enable_if_t<!__is_allocator_v<_Pred>>,
+          class            = enable_if_t<__is_allocator_v<_Allocator>>>
+unordered_set(from_range_t,
+              _Range&&,
+              typename allocator_traits<_Allocator>::size_type = 0,
+              _Hash                                            = _Hash(),
+              _Pred                                            = _Pred(),
+              _Allocator                                       = _Allocator())
+    -> unordered_set<ranges::range_value_t<_Range>, _Hash, _Pred, _Allocator>; // C++23
 #    endif
 
 template <class _Tp,
           class _Hash      = hash<_Tp>,
           class _Pred      = equal_to<_Tp>,
           class _Allocator = allocator<_Tp>,
-          class            = enable_if_t<!__is_allocator<_Hash>::value>,
+          class            = enable_if_t<!__is_allocator_v<_Hash>>,
           class            = enable_if_t<!is_integral<_Hash>::value>,
-          class            = enable_if_t<!__is_allocator<_Pred>::value>,
-          class            = enable_if_t<__is_allocator<_Allocator>::value>>
+          class            = enable_if_t<!__is_allocator_v<_Pred>>,
+          class            = enable_if_t<__is_allocator_v<_Allocator>>>
 unordered_set(initializer_list<_Tp>,
               typename allocator_traits<_Allocator>::size_type = 0,
               _Hash                                            = _Hash(),
@@ -967,33 +971,36 @@ unordered_set(initializer_list<_Tp>,
 template <class _InputIterator,
           class _Allocator,
           class = enable_if_t<__has_input_iterator_category<_InputIterator>::value>,
-          class = enable_if_t<__is_allocator<_Allocator>::value>>
+          class = enable_if_t<__is_allocator_v<_Allocator>>>
 unordered_set(_InputIterator, _InputIterator, typename allocator_traits<_Allocator>::size_type, _Allocator)
-    -> unordered_set<__iter_value_type<_InputIterator>,
-                     hash<__iter_value_type<_InputIterator>>,
-                     equal_to<__iter_value_type<_InputIterator>>,
+    -> unordered_set<__iterator_value_type<_InputIterator>,
+                     hash<__iterator_value_type<_InputIterator>>,
+                     equal_to<__iterator_value_type<_InputIterator>>,
                      _Allocator>;
 
 template <class _InputIterator,
           class _Hash,
           class _Allocator,
           class = enable_if_t<__has_input_iterator_category<_InputIterator>::value>,
-          class = enable_if_t<!__is_allocator<_Hash>::value>,
+          class = enable_if_t<!__is_allocator_v<_Hash>>,
           class = enable_if_t<!is_integral<_Hash>::value>,
-          class = enable_if_t<__is_allocator<_Allocator>::value>>
+          class = enable_if_t<__is_allocator_v<_Allocator>>>
 unordered_set(_InputIterator, _InputIterator, typename allocator_traits<_Allocator>::size_type, _Hash, _Allocator)
-    -> unordered_set<__iter_value_type<_InputIterator>, _Hash, equal_to<__iter_value_type<_InputIterator>>, _Allocator>;
+    -> unordered_set<__iterator_value_type<_InputIterator>,
+                     _Hash,
+                     equal_to<__iterator_value_type<_InputIterator>>,
+                     _Allocator>;
 
 #    if _LIBCPP_STD_VER >= 23
 
-template <ranges::input_range _Range, class _Allocator, class = enable_if_t<__is_allocator<_Allocator>::value>>
+template <ranges::input_range _Range, class _Allocator, class = enable_if_t<__is_allocator_v<_Allocator>>>
 unordered_set(from_range_t, _Range&&, typename allocator_traits<_Allocator>::size_type, _Allocator)
     -> unordered_set<ranges::range_value_t<_Range>,
                      hash<ranges::range_value_t<_Range>>,
                      equal_to<ranges::range_value_t<_Range>>,
                      _Allocator>;
 
-template <ranges::input_range _Range, class _Allocator, class = enable_if_t<__is_allocator<_Allocator>::value>>
+template <ranges::input_range _Range, class _Allocator, class = enable_if_t<__is_allocator_v<_Allocator>>>
 unordered_set(from_range_t, _Range&&, _Allocator)
     -> unordered_set<ranges::range_value_t<_Range>,
                      hash<ranges::range_value_t<_Range>>,
@@ -1003,24 +1010,24 @@ unordered_set(from_range_t, _Range&&, _Allocator)
 template <ranges::input_range _Range,
           class _Hash,
           class _Allocator,
-          class = enable_if_t<!__is_allocator<_Hash>::value>,
+          class = enable_if_t<!__is_allocator_v<_Hash>>,
           class = enable_if_t<!is_integral<_Hash>::value>,
-          class = enable_if_t<__is_allocator<_Allocator>::value>>
+          class = enable_if_t<__is_allocator_v<_Allocator>>>
 unordered_set(from_range_t, _Range&&, typename allocator_traits<_Allocator>::size_type, _Hash, _Allocator)
     -> unordered_set<ranges::range_value_t<_Range>, _Hash, equal_to<ranges::range_value_t<_Range>>, _Allocator>;
 
 #    endif
 
-template <class _Tp, class _Allocator, class = enable_if_t<__is_allocator<_Allocator>::value>>
+template <class _Tp, class _Allocator, class = enable_if_t<__is_allocator_v<_Allocator>>>
 unordered_set(initializer_list<_Tp>, typename allocator_traits<_Allocator>::size_type, _Allocator)
     -> unordered_set<_Tp, hash<_Tp>, equal_to<_Tp>, _Allocator>;
 
 template <class _Tp,
           class _Hash,
           class _Allocator,
-          class = enable_if_t<!__is_allocator<_Hash>::value>,
+          class = enable_if_t<!__is_allocator_v<_Hash>>,
           class = enable_if_t<!is_integral<_Hash>::value>,
-          class = enable_if_t<__is_allocator<_Allocator>::value>>
+          class = enable_if_t<__is_allocator_v<_Allocator>>>
 unordered_set(initializer_list<_Tp>, typename allocator_traits<_Allocator>::size_type, _Hash, _Allocator)
     -> unordered_set<_Tp, _Hash, equal_to<_Tp>, _Allocator>;
 #  endif
@@ -1070,12 +1077,6 @@ unordered_set<_Value, _Hash, _Pred, _Alloc>::unordered_set(
 template <class _Value, class _Hash, class _Pred, class _Alloc>
 inline unordered_set<_Value, _Hash, _Pred, _Alloc>::unordered_set(const allocator_type& __a) : __table_(__a) {}
 
-template <class _Value, class _Hash, class _Pred, class _Alloc>
-unordered_set<_Value, _Hash, _Pred, _Alloc>::unordered_set(const unordered_set& __u) : __table_(__u.__table_) {
-  __table_.__rehash_unique(__u.bucket_count());
-  insert(__u.begin(), __u.end());
-}
-
 template <class _Value, class _Hash, class _Pred, class _Alloc>
 unordered_set<_Value, _Hash, _Pred, _Alloc>::unordered_set(const unordered_set& __u, const allocator_type& __a)
     : __table_(__u.__table_, __a) {
@@ -1085,11 +1086,6 @@ unordered_set<_Value, _Hash, _Pred, _Alloc>::unordered_set(const unordered_set&
 
 #  ifndef _LIBCPP_CXX03_LANG
 
-template <class _Value, class _Hash, class _Pred, class _Alloc>
-inline unordered_set<_Value, _Hash, _Pred, _Alloc>::unordered_set(unordered_set&& __u)
-    _NOEXCEPT_(is_nothrow_move_constructible<__table>::value)
-    : __table_(std::move(__u.__table_)) {}
-
 template <class _Value, class _Hash, class _Pred, class _Alloc>
 unordered_set<_Value, _Hash, _Pred, _Alloc>::unordered_set(unordered_set&& __u, const allocator_type& __a)
     : __table_(std::move(__u.__table_), __a) {
@@ -1125,14 +1121,6 @@ unordered_set<_Value, _Hash, _Pred, _Alloc>::unordered_set(
   insert(__il.begin(), __il.end());
 }
 
-template <class _Value, class _Hash, class _Pred, class _Alloc>
-inline unordered_set<_Value, _Hash, _Pred, _Alloc>&
-unordered_set<_Value, _Hash, _Pred, _Alloc>::operator=(unordered_set&& __u)
-    _NOEXCEPT_(is_nothrow_move_assignable<__table>::value) {
-  __table_ = std::move(__u.__table_);
-  return *this;
-}
-
 template <class _Value, class _Hash, class _Pred, class _Alloc>
 inline unordered_set<_Value, _Hash, _Pred, _Alloc>&
 unordered_set<_Value, _Hash, _Pred, _Alloc>::operator=(initializer_list<value_type> __il) {
@@ -1308,11 +1296,10 @@ public:
 #  endif
 
   _LIBCPP_HIDE_FROM_ABI explicit unordered_multiset(const allocator_type& __a);
-  _LIBCPP_HIDE_FROM_ABI unordered_multiset(const unordered_multiset& __u);
+  _LIBCPP_HIDE_FROM_ABI unordered_multiset(const unordered_multiset& __u) = default;
   _LIBCPP_HIDE_FROM_ABI unordered_multiset(const unordered_multiset& __u, const allocator_type& __a);
 #  ifndef _LIBCPP_CXX03_LANG
-  _LIBCPP_HIDE_FROM_ABI unordered_multiset(unordered_multiset&& __u)
-      _NOEXCEPT_(is_nothrow_move_constructible<__table>::value);
+  _LIBCPP_HIDE_FROM_ABI unordered_multiset(unordered_multiset&& __u) = default;
   _LIBCPP_HIDE_FROM_ABI unordered_multiset(unordered_multiset&& __u, const allocator_type& __a);
   _LIBCPP_HIDE_FROM_ABI unordered_multiset(initializer_list<value_type> __il);
   _LIBCPP_HIDE_FROM_ABI unordered_multiset(
@@ -1339,30 +1326,26 @@ public:
     static_assert(sizeof(std::__diagnose_unordered_container_requirements<_Value, _Hash, _Pred>(0)), "");
   }
 
-  _LIBCPP_HIDE_FROM_ABI unordered_multiset& operator=(const unordered_multiset& __u) {
-    __table_ = __u.__table_;
-    return *this;
-  }
+  _LIBCPP_HIDE_FROM_ABI unordered_multiset& operator=(const unordered_multiset& __u) = default;
 #  ifndef _LIBCPP_CXX03_LANG
-  _LIBCPP_HIDE_FROM_ABI unordered_multiset& operator=(unordered_multiset&& __u)
-      _NOEXCEPT_(is_nothrow_move_assignable<__table>::value);
+  _LIBCPP_HIDE_FROM_ABI unordered_multiset& operator=(unordered_multiset&& __u) = default;
   _LIBCPP_HIDE_FROM_ABI unordered_multiset& operator=(initializer_list<value_type> __il);
 #  endif // _LIBCPP_CXX03_LANG
 
-  _LIBCPP_HIDE_FROM_ABI allocator_type get_allocator() const _NOEXCEPT {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI allocator_type get_allocator() const _NOEXCEPT {
     return allocator_type(__table_.__node_alloc());
   }
 
   [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __table_.size() == 0; }
-  _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __table_.size(); }
-  _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { return __table_.max_size(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __table_.size(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { return __table_.max_size(); }
 
-  _LIBCPP_HIDE_FROM_ABI iterator begin() _NOEXCEPT { return __table_.begin(); }
-  _LIBCPP_HIDE_FROM_ABI iterator end() _NOEXCEPT { return __table_.end(); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator begin() const _NOEXCEPT { return __table_.begin(); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator end() const _NOEXCEPT { return __table_.end(); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator cbegin() const _NOEXCEPT { return __table_.begin(); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator cend() const _NOEXCEPT { return __table_.end(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator begin() _NOEXCEPT { return __table_.begin(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator end() _NOEXCEPT { return __table_.end(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator begin() const _NOEXCEPT { return __table_.begin(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator end() const _NOEXCEPT { return __table_.end(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator cbegin() const _NOEXCEPT { return __table_.begin(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator cend() const _NOEXCEPT { return __table_.end(); }
 
 #  ifndef _LIBCPP_CXX03_LANG
   template <class... _Args>
@@ -1410,10 +1393,10 @@ public:
                                         "node_type with incompatible allocator passed to unordered_multiset::insert()");
     return __table_.template __node_handle_insert_multi<node_type>(__hint, std::move(__nh));
   }
-  _LIBCPP_HIDE_FROM_ABI node_type extract(const_iterator __position) {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI node_type extract(const_iterator __position) {
     return __table_.template __node_handle_extract<node_type>(__position);
   }
-  _LIBCPP_HIDE_FROM_ABI node_type extract(key_type const& __key) {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI node_type extract(key_type const& __key) {
     return __table_.template __node_handle_extract<node_type>(__key);
   }
 
@@ -1454,71 +1437,81 @@ public:
     __table_.swap(__u.__table_);
   }
 
-  _LIBCPP_HIDE_FROM_ABI hasher hash_function() const { return __table_.hash_function(); }
-  _LIBCPP_HIDE_FROM_ABI key_equal key_eq() const { return __table_.key_eq(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI hasher hash_function() const { return __table_.hash_function(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI key_equal key_eq() const { return __table_.key_eq(); }
 
-  _LIBCPP_HIDE_FROM_ABI iterator find(const key_type& __k) { return __table_.find(__k); }
-  _LIBCPP_HIDE_FROM_ABI const_iterator find(const key_type& __k) const { return __table_.find(__k); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator find(const key_type& __k) { return __table_.find(__k); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator find(const key_type& __k) const { return __table_.find(__k); }
 #  if _LIBCPP_STD_VER >= 20
   template <class _K2, enable_if_t<__is_transparent_v<hasher, _K2> && __is_transparent_v<key_equal, _K2>>* = nullptr>
-  _LIBCPP_HIDE_FROM_ABI iterator find(const _K2& __k) {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator find(const _K2& __k) {
     return __table_.find(__k);
   }
   template <class _K2, enable_if_t<__is_transparent_v<hasher, _K2> && __is_transparent_v<key_equal, _K2>>* = nullptr>
-  _LIBCPP_HIDE_FROM_ABI const_iterator find(const _K2& __k) const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator find(const _K2& __k) const {
     return __table_.find(__k);
   }
 #  endif // _LIBCPP_STD_VER >= 20
 
-  _LIBCPP_HIDE_FROM_ABI size_type count(const key_type& __k) const { return __table_.__count_multi(__k); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type count(const key_type& __k) const {
+    return __table_.__count_multi(__k);
+  }
 #  if _LIBCPP_STD_VER >= 20
   template <class _K2, enable_if_t<__is_transparent_v<hasher, _K2> && __is_transparent_v<key_equal, _K2>>* = nullptr>
-  _LIBCPP_HIDE_FROM_ABI size_type count(const _K2& __k) const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type count(const _K2& __k) const {
     return __table_.__count_multi(__k);
   }
 #  endif // _LIBCPP_STD_VER >= 20
 
 #  if _LIBCPP_STD_VER >= 20
-  _LIBCPP_HIDE_FROM_ABI bool contains(const key_type& __k) const { return find(__k) != end(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool contains(const key_type& __k) const { return find(__k) != end(); }
 
   template <class _K2, enable_if_t<__is_transparent_v<hasher, _K2> && __is_transparent_v<key_equal, _K2>>* = nullptr>
-  _LIBCPP_HIDE_FROM_ABI bool contains(const _K2& __k) const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool contains(const _K2& __k) const {
     return find(__k) != end();
   }
 #  endif // _LIBCPP_STD_VER >= 20
 
-  _LIBCPP_HIDE_FROM_ABI pair<iterator, iterator> equal_range(const key_type& __k) {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI pair<iterator, iterator> equal_range(const key_type& __k) {
     return __table_.__equal_range_multi(__k);
   }
-  _LIBCPP_HIDE_FROM_ABI pair<const_iterator, const_iterator> equal_range(const key_type& __k) const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI pair<const_iterator, const_iterator> equal_range(const key_type& __k) const {
     return __table_.__equal_range_multi(__k);
   }
 #  if _LIBCPP_STD_VER >= 20
   template <class _K2, enable_if_t<__is_transparent_v<hasher, _K2> && __is_transparent_v<key_equal, _K2>>* = nullptr>
-  _LIBCPP_HIDE_FROM_ABI pair<iterator, iterator> equal_range(const _K2& __k) {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI pair<iterator, iterator> equal_range(const _K2& __k) {
     return __table_.__equal_range_multi(__k);
   }
   template <class _K2, enable_if_t<__is_transparent_v<hasher, _K2> && __is_transparent_v<key_equal, _K2>>* = nullptr>
-  _LIBCPP_HIDE_FROM_ABI pair<const_iterator, const_iterator> equal_range(const _K2& __k) const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI pair<const_iterator, const_iterator> equal_range(const _K2& __k) const {
     return __table_.__equal_range_multi(__k);
   }
 #  endif // _LIBCPP_STD_VER >= 20
 
-  _LIBCPP_HIDE_FROM_ABI size_type bucket_count() const _NOEXCEPT { return __table_.bucket_count(); }
-  _LIBCPP_HIDE_FROM_ABI size_type max_bucket_count() const _NOEXCEPT { return __table_.max_bucket_count(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type bucket_count() const _NOEXCEPT { return __table_.bucket_count(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type max_bucket_count() const _NOEXCEPT {
+    return __table_.max_bucket_count();
+  }
 
-  _LIBCPP_HIDE_FROM_ABI size_type bucket_size(size_type __n) const { return __table_.bucket_size(__n); }
-  _LIBCPP_HIDE_FROM_ABI size_type bucket(const key_type& __k) const { return __table_.bucket(__k); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type bucket_size(size_type __n) const {
+    return __table_.bucket_size(__n);
+  }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type bucket(const key_type& __k) const { return __table_.bucket(__k); }
 
-  _LIBCPP_HIDE_FROM_ABI local_iterator begin(size_type __n) { return __table_.begin(__n); }
-  _LIBCPP_HIDE_FROM_ABI local_iterator end(size_type __n) { return __table_.end(__n); }
-  _LIBCPP_HIDE_FROM_ABI const_local_iterator begin(size_type __n) const { return __table_.cbegin(__n); }
-  _LIBCPP_HIDE_FROM_ABI const_local_iterator end(size_type __n) const { return __table_.cend(__n); }
-  _LIBCPP_HIDE_FROM_ABI const_local_iterator cbegin(size_type __n) const { return __table_.cbegin(__n); }
-  _LIBCPP_HIDE_FROM_ABI const_local_iterator cend(size_type __n) const { return __table_.cend(__n); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI local_iterator begin(size_type __n) { return __table_.begin(__n); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI local_iterator end(size_type __n) { return __table_.end(__n); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_local_iterator begin(size_type __n) const {
+    return __table_.cbegin(__n);
+  }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_local_iterator end(size_type __n) const { return __table_.cend(__n); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_local_iterator cbegin(size_type __n) const {
+    return __table_.cbegin(__n);
+  }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_local_iterator cend(size_type __n) const { return __table_.cend(__n); }
 
-  _LIBCPP_HIDE_FROM_ABI float load_factor() const _NOEXCEPT { return __table_.load_factor(); }
-  _LIBCPP_HIDE_FROM_ABI float max_load_factor() const _NOEXCEPT { return __table_.max_load_factor(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI float load_factor() const _NOEXCEPT { return __table_.load_factor(); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI float max_load_factor() const _NOEXCEPT { return __table_.max_load_factor(); }
   _LIBCPP_HIDE_FROM_ABI void max_load_factor(float __mlf) { __table_.max_load_factor(__mlf); }
   _LIBCPP_HIDE_FROM_ABI void rehash(size_type __n) { __table_.__rehash_multi(__n); }
   _LIBCPP_HIDE_FROM_ABI void reserve(size_type __n) { __table_.__reserve_multi(__n); }
@@ -1526,31 +1519,31 @@ public:
 
 #  if _LIBCPP_STD_VER >= 17
 template <class _InputIterator,
-          class _Hash      = hash<__iter_value_type<_InputIterator>>,
-          class _Pred      = equal_to<__iter_value_type<_InputIterator>>,
-          class _Allocator = allocator<__iter_value_type<_InputIterator>>,
+          class _Hash      = hash<__iterator_value_type<_InputIterator>>,
+          class _Pred      = equal_to<__iterator_value_type<_InputIterator>>,
+          class _Allocator = allocator<__iterator_value_type<_InputIterator>>,
           class            = enable_if_t<__has_input_iterator_category<_InputIterator>::value>,
-          class            = enable_if_t<!__is_allocator<_Hash>::value>,
+          class            = enable_if_t<!__is_allocator_v<_Hash>>,
           class            = enable_if_t<!is_integral<_Hash>::value>,
-          class            = enable_if_t<!__is_allocator<_Pred>::value>,
-          class            = enable_if_t<__is_allocator<_Allocator>::value>>
+          class            = enable_if_t<!__is_allocator_v<_Pred>>,
+          class            = enable_if_t<__is_allocator_v<_Allocator>>>
 unordered_multiset(
     _InputIterator,
     _InputIterator,
     typename allocator_traits<_Allocator>::size_type = 0,
     _Hash                                            = _Hash(),
     _Pred                                            = _Pred(),
-    _Allocator = _Allocator()) -> unordered_multiset<__iter_value_type<_InputIterator>, _Hash, _Pred, _Allocator>;
+    _Allocator = _Allocator()) -> unordered_multiset<__iterator_value_type<_InputIterator>, _Hash, _Pred, _Allocator>;
 
 #    if _LIBCPP_STD_VER >= 23
 template <ranges::input_range _Range,
           class _Hash      = hash<ranges::range_value_t<_Range>>,
           class _Pred      = equal_to<ranges::range_value_t<_Range>>,
           class _Allocator = allocator<ranges::range_value_t<_Range>>,
-          class            = enable_if_t<!__is_allocator<_Hash>::value>,
+          class            = enable_if_t<!__is_allocator_v<_Hash>>,
           class            = enable_if_t<!is_integral<_Hash>::value>,
-          class            = enable_if_t<!__is_allocator<_Pred>::value>,
-          class            = enable_if_t<__is_allocator<_Allocator>::value>>
+          class            = enable_if_t<!__is_allocator_v<_Pred>>,
+          class            = enable_if_t<__is_allocator_v<_Allocator>>>
 unordered_multiset(
     from_range_t,
     _Range&&,
@@ -1564,10 +1557,10 @@ template <class _Tp,
           class _Hash      = hash<_Tp>,
           class _Pred      = equal_to<_Tp>,
           class _Allocator = allocator<_Tp>,
-          class            = enable_if_t<!__is_allocator<_Hash>::value>,
+          class            = enable_if_t<!__is_allocator_v<_Hash>>,
           class            = enable_if_t<!is_integral<_Hash>::value>,
-          class            = enable_if_t<!__is_allocator<_Pred>::value>,
-          class            = enable_if_t<__is_allocator<_Allocator>::value>>
+          class            = enable_if_t<!__is_allocator_v<_Pred>>,
+          class            = enable_if_t<__is_allocator_v<_Allocator>>>
 unordered_multiset(initializer_list<_Tp>,
                    typename allocator_traits<_Allocator>::size_type = 0,
                    _Hash                                            = _Hash(),
@@ -1577,36 +1570,36 @@ unordered_multiset(initializer_list<_Tp>,
 template <class _InputIterator,
           class _Allocator,
           class = enable_if_t<__has_input_iterator_category<_InputIterator>::value>,
-          class = enable_if_t<__is_allocator<_Allocator>::value>>
+          class = enable_if_t<__is_allocator_v<_Allocator>>>
 unordered_multiset(_InputIterator, _InputIterator, typename allocator_traits<_Allocator>::size_type, _Allocator)
-    -> unordered_multiset<__iter_value_type<_InputIterator>,
-                          hash<__iter_value_type<_InputIterator>>,
-                          equal_to<__iter_value_type<_InputIterator>>,
+    -> unordered_multiset<__iterator_value_type<_InputIterator>,
+                          hash<__iterator_value_type<_InputIterator>>,
+                          equal_to<__iterator_value_type<_InputIterator>>,
                           _Allocator>;
 
 template <class _InputIterator,
           class _Hash,
           class _Allocator,
           class = enable_if_t<__has_input_iterator_category<_InputIterator>::value>,
-          class = enable_if_t<!__is_allocator<_Hash>::value>,
+          class = enable_if_t<!__is_allocator_v<_Hash>>,
           class = enable_if_t<!is_integral<_Hash>::value>,
-          class = enable_if_t<__is_allocator<_Allocator>::value>>
+          class = enable_if_t<__is_allocator_v<_Allocator>>>
 unordered_multiset(_InputIterator, _InputIterator, typename allocator_traits<_Allocator>::size_type, _Hash, _Allocator)
-    -> unordered_multiset<__iter_value_type<_InputIterator>,
+    -> unordered_multiset<__iterator_value_type<_InputIterator>,
                           _Hash,
-                          equal_to<__iter_value_type<_InputIterator>>,
+                          equal_to<__iterator_value_type<_InputIterator>>,
                           _Allocator>;
 
 #    if _LIBCPP_STD_VER >= 23
 
-template <ranges::input_range _Range, class _Allocator, class = enable_if_t<__is_allocator<_Allocator>::value>>
+template <ranges::input_range _Range, class _Allocator, class = enable_if_t<__is_allocator_v<_Allocator>>>
 unordered_multiset(from_range_t, _Range&&, typename allocator_traits<_Allocator>::size_type, _Allocator)
     -> unordered_multiset<ranges::range_value_t<_Range>,
                           hash<ranges::range_value_t<_Range>>,
                           equal_to<ranges::range_value_t<_Range>>,
                           _Allocator>;
 
-template <ranges::input_range _Range, class _Allocator, class = enable_if_t<__is_allocator<_Allocator>::value>>
+template <ranges::input_range _Range, class _Allocator, class = enable_if_t<__is_allocator_v<_Allocator>>>
 unordered_multiset(from_range_t, _Range&&, _Allocator)
     -> unordered_multiset<ranges::range_value_t<_Range>,
                           hash<ranges::range_value_t<_Range>>,
@@ -1616,24 +1609,24 @@ unordered_multiset(from_range_t, _Range&&, _Allocator)
 template <ranges::input_range _Range,
           class _Hash,
           class _Allocator,
-          class = enable_if_t<!__is_allocator<_Hash>::value>,
+          class = enable_if_t<!__is_allocator_v<_Hash>>,
           class = enable_if_t<!is_integral<_Hash>::value>,
-          class = enable_if_t<__is_allocator<_Allocator>::value>>
+          class = enable_if_t<__is_allocator_v<_Allocator>>>
 unordered_multiset(from_range_t, _Range&&, typename allocator_traits<_Allocator>::size_type, _Hash, _Allocator)
     -> unordered_multiset<ranges::range_value_t<_Range>, _Hash, equal_to<ranges::range_value_t<_Range>>, _Allocator>;
 
 #    endif
 
-template <class _Tp, class _Allocator, class = enable_if_t<__is_allocator<_Allocator>::value>>
+template <class _Tp, class _Allocator, class = enable_if_t<__is_allocator_v<_Allocator>>>
 unordered_multiset(initializer_list<_Tp>, typename allocator_traits<_Allocator>::size_type, _Allocator)
     -> unordered_multiset<_Tp, hash<_Tp>, equal_to<_Tp>, _Allocator>;
 
 template <class _Tp,
           class _Hash,
           class _Allocator,
-          class = enable_if_t<!__is_allocator<_Hash>::value>,
+          class = enable_if_t<!__is_allocator_v<_Hash>>,
           class = enable_if_t<!is_integral<_Hash>::value>,
-          class = enable_if_t<__is_allocator<_Allocator>::value>>
+          class = enable_if_t<__is_allocator_v<_Allocator>>>
 unordered_multiset(initializer_list<_Tp>, typename allocator_traits<_Allocator>::size_type, _Hash, _Allocator)
     -> unordered_multiset<_Tp, _Hash, equal_to<_Tp>, _Allocator>;
 #  endif
@@ -1685,13 +1678,6 @@ template <class _Value, class _Hash, class _Pred, class _Alloc>
 inline unordered_multiset<_Value, _Hash, _Pred, _Alloc>::unordered_multiset(const allocator_type& __a)
     : __table_(__a) {}
 
-template <class _Value, class _Hash, class _Pred, class _Alloc>
-unordered_multiset<_Value, _Hash, _Pred, _Alloc>::unordered_multiset(const unordered_multiset& __u)
-    : __table_(__u.__table_) {
-  __table_.__rehash_multi(__u.bucket_count());
-  insert(__u.begin(), __u.end());
-}
-
 template <class _Value, class _Hash, class _Pred, class _Alloc>
 unordered_multiset<_Value, _Hash, _Pred, _Alloc>::unordered_multiset(
     const unordered_multiset& __u, const allocator_type& __a)
@@ -1702,11 +1688,6 @@ unordered_multiset<_Value, _Hash, _Pred, _Alloc>::unordered_multiset(
 
 #  ifndef _LIBCPP_CXX03_LANG
 
-template <class _Value, class _Hash, class _Pred, class _Alloc>
-inline unordered_multiset<_Value, _Hash, _Pred, _Alloc>::unordered_multiset(unordered_multiset&& __u)
-    _NOEXCEPT_(is_nothrow_move_constructible<__table>::value)
-    : __table_(std::move(__u.__table_)) {}
-
 template <class _Value, class _Hash, class _Pred, class _Alloc>
 unordered_multiset<_Value, _Hash, _Pred, _Alloc>::unordered_multiset(
     unordered_multiset&& __u, const allocator_type& __a)
@@ -1743,14 +1724,6 @@ unordered_multiset<_Value, _Hash, _Pred, _Alloc>::unordered_multiset(
   insert(__il.begin(), __il.end());
 }
 
-template <class _Value, class _Hash, class _Pred, class _Alloc>
-inline unordered_multiset<_Value, _Hash, _Pred, _Alloc>&
-unordered_multiset<_Value, _Hash, _Pred, _Alloc>::operator=(unordered_multiset&& __u)
-    _NOEXCEPT_(is_nothrow_move_assignable<__table>::value) {
-  __table_ = std::move(__u.__table_);
-  return *this;
-}
-
 template <class _Value, class _Hash, class _Pred, class _Alloc>
 inline unordered_multiset<_Value, _Hash, _Pred, _Alloc>&
 unordered_multiset<_Value, _Hash, _Pred, _Alloc>::operator=(initializer_list<value_type> __il) {
diff --git a/lib/libcxx/include/utility b/lib/libcxx/include/utility
index bc4eaf6a0c..1b19243afc 100644
--- a/lib/libcxx/include/utility
+++ b/lib/libcxx/include/utility
@@ -216,6 +216,18 @@ template<size_t N>
 template<class... T>
   using index_sequence_for = make_index_sequence<sizeof...(T)>;
 
+template<class T, T... Values>                                                  // C++26
+  struct tuple_size<integer_sequence<T, Values...>>;
+
+template<size_t I, class T, T... Values>                                        // C++26
+  struct tuple_element<I, integer_sequence<T, Values...>>;
+
+template<size_t I, class T, T... Values>                                        // C++26
+  struct tuple_element<I, const integer_sequence<T, Values...>>;
+
+template<size_t I, class T, T... Values>                                        // C++26
+  constexpr T get(integer_sequence<T, Values...>) noexcept;
+
 template<class T, class U=T>
     constexpr T exchange(T& obj, U&& new_value)                                 // constexpr in C++17, noexcept in C++23
       noexcept(is_nothrow_move_constructible<T>::value && is_nothrow_assignable<T&, U>::value);
diff --git a/lib/libcxx/include/valarray b/lib/libcxx/include/valarray
index 96501caaff..58287b60dd 100644
--- a/lib/libcxx/include/valarray
+++ b/lib/libcxx/include/valarray
@@ -362,6 +362,7 @@ template <class T> unspecified2 end(const valarray<T>& v);
 #  include <__memory/uninitialized_algorithms.h>
 #  include <__type_traits/decay.h>
 #  include <__type_traits/remove_reference.h>
+#  include <__utility/exception_guard.h>
 #  include <__utility/move.h>
 #  include <__utility/swap.h>
 #  include <cmath>
@@ -395,9 +396,9 @@ public:
   _LIBCPP_HIDE_FROM_ABI slice(size_t __start, size_t __size, size_t __stride)
       : __start_(__start), __size_(__size), __stride_(__stride) {}
 
-  _LIBCPP_HIDE_FROM_ABI size_t start() const { return __start_; }
-  _LIBCPP_HIDE_FROM_ABI size_t size() const { return __size_; }
-  _LIBCPP_HIDE_FROM_ABI size_t stride() const { return __stride_; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t start() const { return __start_; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t size() const { return __size_; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t stride() const { return __stride_; }
 
 #  if _LIBCPP_STD_VER >= 20
 
@@ -792,7 +793,7 @@ private:
 public:
   // construct/destroy:
   _LIBCPP_HIDE_FROM_ABI valarray() : __begin_(nullptr), __end_(nullptr) {}
-  inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 explicit valarray(size_t __n);
+  inline _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 explicit valarray(size_t __n);
   _LIBCPP_HIDE_FROM_ABI valarray(const value_type& __x, size_t __n);
   valarray(const value_type* __p, size_t __n);
   valarray(const valarray& __v);
@@ -804,7 +805,7 @@ public:
   valarray(const gslice_array<value_type>& __ga);
   valarray(const mask_array<value_type>& __ma);
   valarray(const indirect_array<value_type>& __ia);
-  inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 ~valarray();
+  inline _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 ~valarray();
 
   // assignment:
   valarray& operator=(const valarray& __v);
@@ -821,36 +822,41 @@ public:
   _LIBCPP_HIDE_FROM_ABI valarray& operator=(const __val_expr<_ValExpr>& __v);
 
   // element access:
-  _LIBCPP_HIDE_FROM_ABI const value_type& operator[](size_t __i) const {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const value_type& operator[](size_t __i) const {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(__i < size(), "valarray::operator[] index out of bounds");
     return __begin_[__i];
   }
 
-  _LIBCPP_HIDE_FROM_ABI value_type& operator[](size_t __i) {
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI value_type& operator[](size_t __i) {
     _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(__i < size(), "valarray::operator[] index out of bounds");
     return __begin_[__i];
   }
 
   // subset operations:
-  _LIBCPP_HIDE_FROM_ABI __val_expr<__slice_expr<const valarray&> > operator[](slice __s) const;
-  _LIBCPP_HIDE_FROM_ABI slice_array<value_type> operator[](slice __s);
-  _LIBCPP_HIDE_FROM_ABI __val_expr<__indirect_expr<const valarray&> > operator[](const gslice& __gs) const;
-  _LIBCPP_HIDE_FROM_ABI gslice_array<value_type> operator[](const gslice& __gs);
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI __val_expr<__slice_expr<const valarray&> > operator[](slice __s) const;
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI slice_array<value_type> operator[](slice __s);
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI __val_expr<__indirect_expr<const valarray&> >
+  operator[](const gslice& __gs) const;
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI gslice_array<value_type> operator[](const gslice& __gs);
 #  ifndef _LIBCPP_CXX03_LANG
-  _LIBCPP_HIDE_FROM_ABI __val_expr<__indirect_expr<const valarray&> > operator[](gslice&& __gs) const;
-  _LIBCPP_HIDE_FROM_ABI gslice_array<value_type> operator[](gslice&& __gs);
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI __val_expr<__indirect_expr<const valarray&> > operator[](gslice&& __gs) const;
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI gslice_array<value_type> operator[](gslice&& __gs);
 #  endif // _LIBCPP_CXX03_LANG
+  [[__nodiscard__]]
   _LIBCPP_HIDE_FROM_ABI __val_expr<__mask_expr<const valarray&> > operator[](const valarray<bool>& __vb) const;
-  _LIBCPP_HIDE_FROM_ABI mask_array<value_type> operator[](const valarray<bool>& __vb);
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI mask_array<value_type> operator[](const valarray<bool>& __vb);
 #  ifndef _LIBCPP_CXX03_LANG
+  [[__nodiscard__]]
   _LIBCPP_HIDE_FROM_ABI __val_expr<__mask_expr<const valarray&> > operator[](valarray<bool>&& __vb) const;
-  _LIBCPP_HIDE_FROM_ABI mask_array<value_type> operator[](valarray<bool>&& __vb);
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI mask_array<value_type> operator[](valarray<bool>&& __vb);
 #  endif // _LIBCPP_CXX03_LANG
+  [[__nodiscard__]]
   _LIBCPP_HIDE_FROM_ABI __val_expr<__indirect_expr<const valarray&> > operator[](const valarray<size_t>& __vs) const;
-  _LIBCPP_HIDE_FROM_ABI indirect_array<value_type> operator[](const valarray<size_t>& __vs);
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI indirect_array<value_type> operator[](const valarray<size_t>& __vs);
 #  ifndef _LIBCPP_CXX03_LANG
+  [[__nodiscard__]]
   _LIBCPP_HIDE_FROM_ABI __val_expr<__indirect_expr<const valarray&> > operator[](valarray<size_t>&& __vs) const;
-  _LIBCPP_HIDE_FROM_ABI indirect_array<value_type> operator[](valarray<size_t>&& __vs);
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI indirect_array<value_type> operator[](valarray<size_t>&& __vs);
 #  endif // _LIBCPP_CXX03_LANG
 
   // unary operators:
@@ -904,16 +910,16 @@ public:
   // member functions:
   _LIBCPP_HIDE_FROM_ABI void swap(valarray& __v) _NOEXCEPT;
 
-  _LIBCPP_HIDE_FROM_ABI size_t size() const { return static_cast<size_t>(__end_ - __begin_); }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t size() const { return static_cast<size_t>(__end_ - __begin_); }
 
-  _LIBCPP_HIDE_FROM_ABI value_type sum() const;
-  _LIBCPP_HIDE_FROM_ABI value_type min() const;
-  _LIBCPP_HIDE_FROM_ABI value_type max() const;
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI value_type sum() const;
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI value_type min() const;
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI value_type max() const;
 
-  valarray shift(int __i) const;
-  valarray cshift(int __i) const;
-  valarray apply(value_type __f(value_type)) const;
-  valarray apply(value_type __f(const value_type&)) const;
+  [[__nodiscard__]] valarray shift(int __i) const;
+  [[__nodiscard__]] valarray cshift(int __i) const;
+  [[__nodiscard__]] valarray apply(value_type __f(value_type)) const;
+  [[__nodiscard__]] valarray apply(value_type __f(const value_type&)) const;
   void resize(size_t __n, value_type __x = value_type());
 
 private:
@@ -1248,11 +1254,11 @@ public:
 
 #  endif // _LIBCPP_CXX03_LANG
 
-  _LIBCPP_HIDE_FROM_ABI size_t start() const { return __1d_.size() ? __1d_[0] : 0; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t start() const { return __1d_.size() ? __1d_[0] : 0; }
 
-  _LIBCPP_HIDE_FROM_ABI valarray<size_t> size() const { return __size_; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI valarray<size_t> size() const { return __size_; }
 
-  _LIBCPP_HIDE_FROM_ABI valarray<size_t> stride() const { return __stride_; }
+  [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI valarray<size_t> stride() const { return __stride_; }
 
 private:
   void __init(size_t __start);
@@ -1992,17 +1998,10 @@ template <class _Tp>
 inline valarray<_Tp>::valarray(size_t __n) : __begin_(nullptr), __end_(nullptr) {
   if (__n) {
     __begin_ = __end_ = allocator<value_type>().allocate(__n);
-#  if _LIBCPP_HAS_EXCEPTIONS
-    try {
-#  endif // _LIBCPP_HAS_EXCEPTIONS
-      for (size_t __n_left = __n; __n_left; --__n_left, ++__end_)
-        ::new ((void*)__end_) value_type();
-#  if _LIBCPP_HAS_EXCEPTIONS
-    } catch (...) {
-      __clear(__n);
-      throw;
-    }
-#  endif // _LIBCPP_HAS_EXCEPTIONS
+    auto __guard      = std::__make_exception_guard([&] { __clear(__n); });
+    for (size_t __n_left = __n; __n_left; --__n_left, ++__end_)
+      ::new ((void*)__end_) value_type();
+    __guard.__complete();
   }
 }
 
@@ -2015,17 +2014,10 @@ template <class _Tp>
 valarray<_Tp>::valarray(const value_type* __p, size_t __n) : __begin_(nullptr), __end_(nullptr) {
   if (__n) {
     __begin_ = __end_ = allocator<value_type>().allocate(__n);
-#  if _LIBCPP_HAS_EXCEPTIONS
-    try {
-#  endif // _LIBCPP_HAS_EXCEPTIONS
-      for (size_t __n_left = __n; __n_left; ++__end_, ++__p, --__n_left)
-        ::new ((void*)__end_) value_type(*__p);
-#  if _LIBCPP_HAS_EXCEPTIONS
-    } catch (...) {
-      __clear(__n);
-      throw;
-    }
-#  endif // _LIBCPP_HAS_EXCEPTIONS
+    auto __guard      = std::__make_exception_guard([&] { __clear(__n); });
+    for (size_t __n_left = __n; __n_left; ++__end_, ++__p, --__n_left)
+      ::new ((void*)__end_) value_type(*__p);
+    __guard.__complete();
   }
 }
 
@@ -2033,17 +2025,10 @@ template <class _Tp>
 valarray<_Tp>::valarray(const valarray& __v) : __begin_(nullptr), __end_(nullptr) {
   if (__v.size()) {
     __begin_ = __end_ = allocator<value_type>().allocate(__v.size());
-#  if _LIBCPP_HAS_EXCEPTIONS
-    try {
-#  endif // _LIBCPP_HAS_EXCEPTIONS
-      for (value_type* __p = __v.__begin_; __p != __v.__end_; ++__end_, ++__p)
-        ::new ((void*)__end_) value_type(*__p);
-#  if _LIBCPP_HAS_EXCEPTIONS
-    } catch (...) {
-      __clear(__v.size());
-      throw;
-    }
-#  endif // _LIBCPP_HAS_EXCEPTIONS
+    auto __guard      = std::__make_exception_guard([&] { __clear(__v.size()); });
+    for (value_type* __p = __v.__begin_; __p != __v.__end_; ++__end_, ++__p)
+      ::new ((void*)__end_) value_type(*__p);
+    __guard.__complete();
   }
 }
 
@@ -2059,18 +2044,11 @@ valarray<_Tp>::valarray(initializer_list<value_type> __il) : __begin_(nullptr),
   const size_t __n = __il.size();
   if (__n) {
     __begin_ = __end_ = allocator<value_type>().allocate(__n);
-#    if _LIBCPP_HAS_EXCEPTIONS
-    try {
-#    endif // _LIBCPP_HAS_EXCEPTIONS
-      size_t __n_left = __n;
-      for (const value_type* __p = __il.begin(); __n_left; ++__end_, ++__p, --__n_left)
-        ::new ((void*)__end_) value_type(*__p);
-#    if _LIBCPP_HAS_EXCEPTIONS
-    } catch (...) {
-      __clear(__n);
-      throw;
-    }
-#    endif // _LIBCPP_HAS_EXCEPTIONS
+    auto __guard      = std::__make_exception_guard([&] { __clear(__n); });
+    size_t __n_left   = __n;
+    for (const value_type* __p = __il.begin(); __n_left; ++__end_, ++__p, --__n_left)
+      ::new ((void*)__end_) value_type(*__p);
+    __guard.__complete();
   }
 }
 
@@ -2081,18 +2059,11 @@ valarray<_Tp>::valarray(const slice_array<value_type>& __sa) : __begin_(nullptr)
   const size_t __n = __sa.__size_;
   if (__n) {
     __begin_ = __end_ = allocator<value_type>().allocate(__n);
-#  if _LIBCPP_HAS_EXCEPTIONS
-    try {
-#  endif // _LIBCPP_HAS_EXCEPTIONS
-      size_t __n_left = __n;
-      for (const value_type* __p = __sa.__vp_; __n_left; ++__end_, __p += __sa.__stride_, --__n_left)
-        ::new ((void*)__end_) value_type(*__p);
-#  if _LIBCPP_HAS_EXCEPTIONS
-    } catch (...) {
-      __clear(__n);
-      throw;
-    }
-#  endif // _LIBCPP_HAS_EXCEPTIONS
+    auto __guard      = std::__make_exception_guard([&] { __clear(__n); });
+    size_t __n_left   = __n;
+    for (const value_type* __p = __sa.__vp_; __n_left; ++__end_, __p += __sa.__stride_, --__n_left)
+      ::new ((void*)__end_) value_type(*__p);
+    __guard.__complete();
   }
 }
 
@@ -2101,19 +2072,12 @@ valarray<_Tp>::valarray(const gslice_array<value_type>& __ga) : __begin_(nullptr
   const size_t __n = __ga.__1d_.size();
   if (__n) {
     __begin_ = __end_ = allocator<value_type>().allocate(__n);
-#  if _LIBCPP_HAS_EXCEPTIONS
-    try {
-#  endif // _LIBCPP_HAS_EXCEPTIONS
-      typedef const size_t* _Ip;
-      const value_type* __s = __ga.__vp_;
-      for (_Ip __i = __ga.__1d_.__begin_, __e = __ga.__1d_.__end_; __i != __e; ++__i, ++__end_)
-        ::new ((void*)__end_) value_type(__s[*__i]);
-#  if _LIBCPP_HAS_EXCEPTIONS
-    } catch (...) {
-      __clear(__n);
-      throw;
-    }
-#  endif // _LIBCPP_HAS_EXCEPTIONS
+    auto __guard      = std::__make_exception_guard([&] { __clear(__n); });
+    typedef const size_t* _Ip;
+    const value_type* __s = __ga.__vp_;
+    for (_Ip __i = __ga.__1d_.__begin_, __e = __ga.__1d_.__end_; __i != __e; ++__i, ++__end_)
+      ::new ((void*)__end_) value_type(__s[*__i]);
+    __guard.__complete();
   }
 }
 
@@ -2122,19 +2086,12 @@ valarray<_Tp>::valarray(const mask_array<value_type>& __ma) : __begin_(nullptr),
   const size_t __n = __ma.__1d_.size();
   if (__n) {
     __begin_ = __end_ = allocator<value_type>().allocate(__n);
-#  if _LIBCPP_HAS_EXCEPTIONS
-    try {
-#  endif // _LIBCPP_HAS_EXCEPTIONS
-      typedef const size_t* _Ip;
-      const value_type* __s = __ma.__vp_;
-      for (_Ip __i = __ma.__1d_.__begin_, __e = __ma.__1d_.__end_; __i != __e; ++__i, ++__end_)
-        ::new ((void*)__end_) value_type(__s[*__i]);
-#  if _LIBCPP_HAS_EXCEPTIONS
-    } catch (...) {
-      __clear(__n);
-      throw;
-    }
-#  endif // _LIBCPP_HAS_EXCEPTIONS
+    auto __guard      = std::__make_exception_guard([&] { __clear(__n); });
+    typedef const size_t* _Ip;
+    const value_type* __s = __ma.__vp_;
+    for (_Ip __i = __ma.__1d_.__begin_, __e = __ma.__1d_.__end_; __i != __e; ++__i, ++__end_)
+      ::new ((void*)__end_) value_type(__s[*__i]);
+    __guard.__complete();
   }
 }
 
@@ -2143,19 +2100,12 @@ valarray<_Tp>::valarray(const indirect_array<value_type>& __ia) : __begin_(nullp
   const size_t __n = __ia.__1d_.size();
   if (__n) {
     __begin_ = __end_ = allocator<value_type>().allocate(__n);
-#  if _LIBCPP_HAS_EXCEPTIONS
-    try {
-#  endif // _LIBCPP_HAS_EXCEPTIONS
-      typedef const size_t* _Ip;
-      const value_type* __s = __ia.__vp_;
-      for (_Ip __i = __ia.__1d_.__begin_, __e = __ia.__1d_.__end_; __i != __e; ++__i, ++__end_)
-        ::new ((void*)__end_) value_type(__s[*__i]);
-#  if _LIBCPP_HAS_EXCEPTIONS
-    } catch (...) {
-      __clear(__n);
-      throw;
-    }
-#  endif // _LIBCPP_HAS_EXCEPTIONS
+    auto __guard      = std::__make_exception_guard([&] { __clear(__n); });
+    typedef const size_t* _Ip;
+    const value_type* __s = __ia.__vp_;
+    for (_Ip __i = __ia.__1d_.__begin_, __e = __ia.__1d_.__end_; __i != __e; ++__i, ++__end_)
+      ::new ((void*)__end_) value_type(__s[*__i]);
+    __guard.__complete();
   }
 }
 
@@ -2644,17 +2594,10 @@ void valarray<_Tp>::resize(size_t __n, value_type __x) {
   __clear(size());
   if (__n) {
     __begin_ = __end_ = allocator<value_type>().allocate(__n);
-#  if _LIBCPP_HAS_EXCEPTIONS
-    try {
-#  endif // _LIBCPP_HAS_EXCEPTIONS
-      for (size_t __n_left = __n; __n_left; --__n_left, ++__end_)
-        ::new ((void*)__end_) value_type(__x);
-#  if _LIBCPP_HAS_EXCEPTIONS
-    } catch (...) {
-      __clear(__n);
-      throw;
-    }
-#  endif // _LIBCPP_HAS_EXCEPTIONS
+    auto __guard      = std::__make_exception_guard([&] { __clear(__n); });
+    for (size_t __n_left = __n; __n_left; --__n_left, ++__end_)
+      ::new ((void*)__end_) value_type(__x);
+    __guard.__complete();
   }
 }
 
@@ -3168,7 +3111,7 @@ operator>=(const typename _Expr::value_type& __x, const _Expr& __y) {
 }
 
 template <class _Expr, __enable_if_t<__is_val_expr<_Expr>::value, int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI __val_expr<_UnaryOp<__abs_expr<typename _Expr::value_type>, _Expr> >
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI __val_expr<_UnaryOp<__abs_expr<typename _Expr::value_type>, _Expr> >
 abs(const _Expr& __x) {
   typedef typename _Expr::value_type value_type;
   typedef _UnaryOp<__abs_expr<value_type>, _Expr> _Op;
@@ -3176,7 +3119,7 @@ abs(const _Expr& __x) {
 }
 
 template <class _Expr, __enable_if_t<__is_val_expr<_Expr>::value, int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI __val_expr<_UnaryOp<__acos_expr<typename _Expr::value_type>, _Expr> >
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI __val_expr<_UnaryOp<__acos_expr<typename _Expr::value_type>, _Expr> >
 acos(const _Expr& __x) {
   typedef typename _Expr::value_type value_type;
   typedef _UnaryOp<__acos_expr<value_type>, _Expr> _Op;
@@ -3184,7 +3127,7 @@ acos(const _Expr& __x) {
 }
 
 template <class _Expr, __enable_if_t<__is_val_expr<_Expr>::value, int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI __val_expr<_UnaryOp<__asin_expr<typename _Expr::value_type>, _Expr> >
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI __val_expr<_UnaryOp<__asin_expr<typename _Expr::value_type>, _Expr> >
 asin(const _Expr& __x) {
   typedef typename _Expr::value_type value_type;
   typedef _UnaryOp<__asin_expr<value_type>, _Expr> _Op;
@@ -3192,7 +3135,7 @@ asin(const _Expr& __x) {
 }
 
 template <class _Expr, __enable_if_t<__is_val_expr<_Expr>::value, int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI __val_expr<_UnaryOp<__atan_expr<typename _Expr::value_type>, _Expr> >
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI __val_expr<_UnaryOp<__atan_expr<typename _Expr::value_type>, _Expr> >
 atan(const _Expr& __x) {
   typedef typename _Expr::value_type value_type;
   typedef _UnaryOp<__atan_expr<value_type>, _Expr> _Op;
@@ -3202,15 +3145,16 @@ atan(const _Expr& __x) {
 template <class _Expr1,
           class _Expr2,
           __enable_if_t<__is_val_expr<_Expr1>::value && __is_val_expr<_Expr2>::value, int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI __val_expr<_BinaryOp<__atan2_expr<typename _Expr1::value_type>, _Expr1, _Expr2> >
-atan2(const _Expr1& __x, const _Expr2& __y) {
+[[__nodiscard__]] inline
+    _LIBCPP_HIDE_FROM_ABI __val_expr<_BinaryOp<__atan2_expr<typename _Expr1::value_type>, _Expr1, _Expr2> >
+    atan2(const _Expr1& __x, const _Expr2& __y) {
   typedef typename _Expr1::value_type value_type;
   typedef _BinaryOp<__atan2_expr<value_type>, _Expr1, _Expr2> _Op;
   return __val_expr<_Op>(_Op(__atan2_expr<value_type>(), __x, __y));
 }
 
 template <class _Expr, __enable_if_t<__is_val_expr<_Expr>::value, int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI
 __val_expr<_BinaryOp<__atan2_expr<typename _Expr::value_type>, _Expr, __scalar_expr<typename _Expr::value_type> > >
 atan2(const _Expr& __x, const typename _Expr::value_type& __y) {
   typedef typename _Expr::value_type value_type;
@@ -3219,7 +3163,7 @@ atan2(const _Expr& __x, const typename _Expr::value_type& __y) {
 }
 
 template <class _Expr, __enable_if_t<__is_val_expr<_Expr>::value, int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI
 __val_expr<_BinaryOp<__atan2_expr<typename _Expr::value_type>, __scalar_expr<typename _Expr::value_type>, _Expr> >
 atan2(const typename _Expr::value_type& __x, const _Expr& __y) {
   typedef typename _Expr::value_type value_type;
@@ -3228,7 +3172,7 @@ atan2(const typename _Expr::value_type& __x, const _Expr& __y) {
 }
 
 template <class _Expr, __enable_if_t<__is_val_expr<_Expr>::value, int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI __val_expr<_UnaryOp<__cos_expr<typename _Expr::value_type>, _Expr> >
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI __val_expr<_UnaryOp<__cos_expr<typename _Expr::value_type>, _Expr> >
 cos(const _Expr& __x) {
   typedef typename _Expr::value_type value_type;
   typedef _UnaryOp<__cos_expr<value_type>, _Expr> _Op;
@@ -3236,7 +3180,7 @@ cos(const _Expr& __x) {
 }
 
 template <class _Expr, __enable_if_t<__is_val_expr<_Expr>::value, int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI __val_expr<_UnaryOp<__cosh_expr<typename _Expr::value_type>, _Expr> >
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI __val_expr<_UnaryOp<__cosh_expr<typename _Expr::value_type>, _Expr> >
 cosh(const _Expr& __x) {
   typedef typename _Expr::value_type value_type;
   typedef _UnaryOp<__cosh_expr<value_type>, _Expr> _Op;
@@ -3244,7 +3188,7 @@ cosh(const _Expr& __x) {
 }
 
 template <class _Expr, __enable_if_t<__is_val_expr<_Expr>::value, int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI __val_expr<_UnaryOp<__exp_expr<typename _Expr::value_type>, _Expr> >
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI __val_expr<_UnaryOp<__exp_expr<typename _Expr::value_type>, _Expr> >
 exp(const _Expr& __x) {
   typedef typename _Expr::value_type value_type;
   typedef _UnaryOp<__exp_expr<value_type>, _Expr> _Op;
@@ -3252,7 +3196,7 @@ exp(const _Expr& __x) {
 }
 
 template <class _Expr, __enable_if_t<__is_val_expr<_Expr>::value, int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI __val_expr<_UnaryOp<__log_expr<typename _Expr::value_type>, _Expr> >
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI __val_expr<_UnaryOp<__log_expr<typename _Expr::value_type>, _Expr> >
 log(const _Expr& __x) {
   typedef typename _Expr::value_type value_type;
   typedef _UnaryOp<__log_expr<value_type>, _Expr> _Op;
@@ -3260,7 +3204,7 @@ log(const _Expr& __x) {
 }
 
 template <class _Expr, __enable_if_t<__is_val_expr<_Expr>::value, int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI __val_expr<_UnaryOp<__log10_expr<typename _Expr::value_type>, _Expr> >
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI __val_expr<_UnaryOp<__log10_expr<typename _Expr::value_type>, _Expr> >
 log10(const _Expr& __x) {
   typedef typename _Expr::value_type value_type;
   typedef _UnaryOp<__log10_expr<value_type>, _Expr> _Op;
@@ -3270,15 +3214,16 @@ log10(const _Expr& __x) {
 template <class _Expr1,
           class _Expr2,
           __enable_if_t<__is_val_expr<_Expr1>::value && __is_val_expr<_Expr2>::value, int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI __val_expr<_BinaryOp<__pow_expr<typename _Expr1::value_type>, _Expr1, _Expr2> >
-pow(const _Expr1& __x, const _Expr2& __y) {
+[[__nodiscard__]] inline
+    _LIBCPP_HIDE_FROM_ABI __val_expr<_BinaryOp<__pow_expr<typename _Expr1::value_type>, _Expr1, _Expr2> >
+    pow(const _Expr1& __x, const _Expr2& __y) {
   typedef typename _Expr1::value_type value_type;
   typedef _BinaryOp<__pow_expr<value_type>, _Expr1, _Expr2> _Op;
   return __val_expr<_Op>(_Op(__pow_expr<value_type>(), __x, __y));
 }
 
 template <class _Expr, __enable_if_t<__is_val_expr<_Expr>::value, int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI
 __val_expr<_BinaryOp<__pow_expr<typename _Expr::value_type>, _Expr, __scalar_expr<typename _Expr::value_type> > >
 pow(const _Expr& __x, const typename _Expr::value_type& __y) {
   typedef typename _Expr::value_type value_type;
@@ -3287,7 +3232,7 @@ pow(const _Expr& __x, const typename _Expr::value_type& __y) {
 }
 
 template <class _Expr, __enable_if_t<__is_val_expr<_Expr>::value, int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI
 __val_expr<_BinaryOp<__pow_expr<typename _Expr::value_type>, __scalar_expr<typename _Expr::value_type>, _Expr> >
 pow(const typename _Expr::value_type& __x, const _Expr& __y) {
   typedef typename _Expr::value_type value_type;
@@ -3296,7 +3241,7 @@ pow(const typename _Expr::value_type& __x, const _Expr& __y) {
 }
 
 template <class _Expr, __enable_if_t<__is_val_expr<_Expr>::value, int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI __val_expr<_UnaryOp<__sin_expr<typename _Expr::value_type>, _Expr> >
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI __val_expr<_UnaryOp<__sin_expr<typename _Expr::value_type>, _Expr> >
 sin(const _Expr& __x) {
   typedef typename _Expr::value_type value_type;
   typedef _UnaryOp<__sin_expr<value_type>, _Expr> _Op;
@@ -3304,7 +3249,7 @@ sin(const _Expr& __x) {
 }
 
 template <class _Expr, __enable_if_t<__is_val_expr<_Expr>::value, int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI __val_expr<_UnaryOp<__sinh_expr<typename _Expr::value_type>, _Expr> >
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI __val_expr<_UnaryOp<__sinh_expr<typename _Expr::value_type>, _Expr> >
 sinh(const _Expr& __x) {
   typedef typename _Expr::value_type value_type;
   typedef _UnaryOp<__sinh_expr<value_type>, _Expr> _Op;
@@ -3312,7 +3257,7 @@ sinh(const _Expr& __x) {
 }
 
 template <class _Expr, __enable_if_t<__is_val_expr<_Expr>::value, int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI __val_expr<_UnaryOp<__sqrt_expr<typename _Expr::value_type>, _Expr> >
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI __val_expr<_UnaryOp<__sqrt_expr<typename _Expr::value_type>, _Expr> >
 sqrt(const _Expr& __x) {
   typedef typename _Expr::value_type value_type;
   typedef _UnaryOp<__sqrt_expr<value_type>, _Expr> _Op;
@@ -3320,7 +3265,7 @@ sqrt(const _Expr& __x) {
 }
 
 template <class _Expr, __enable_if_t<__is_val_expr<_Expr>::value, int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI __val_expr<_UnaryOp<__tan_expr<typename _Expr::value_type>, _Expr> >
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI __val_expr<_UnaryOp<__tan_expr<typename _Expr::value_type>, _Expr> >
 tan(const _Expr& __x) {
   typedef typename _Expr::value_type value_type;
   typedef _UnaryOp<__tan_expr<value_type>, _Expr> _Op;
@@ -3328,7 +3273,7 @@ tan(const _Expr& __x) {
 }
 
 template <class _Expr, __enable_if_t<__is_val_expr<_Expr>::value, int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI __val_expr<_UnaryOp<__tanh_expr<typename _Expr::value_type>, _Expr> >
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI __val_expr<_UnaryOp<__tanh_expr<typename _Expr::value_type>, _Expr> >
 tanh(const _Expr& __x) {
   typedef typename _Expr::value_type value_type;
   typedef _UnaryOp<__tanh_expr<value_type>, _Expr> _Op;
@@ -3336,22 +3281,22 @@ tanh(const _Expr& __x) {
 }
 
 template <class _Tp>
-inline _LIBCPP_HIDE_FROM_ABI _Tp* begin(valarray<_Tp>& __v) {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _Tp* begin(valarray<_Tp>& __v) {
   return __v.__begin_;
 }
 
 template <class _Tp>
-inline _LIBCPP_HIDE_FROM_ABI const _Tp* begin(const valarray<_Tp>& __v) {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI const _Tp* begin(const valarray<_Tp>& __v) {
   return __v.__begin_;
 }
 
 template <class _Tp>
-inline _LIBCPP_HIDE_FROM_ABI _Tp* end(valarray<_Tp>& __v) {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _Tp* end(valarray<_Tp>& __v) {
   return __v.__end_;
 }
 
 template <class _Tp>
-inline _LIBCPP_HIDE_FROM_ABI const _Tp* end(const valarray<_Tp>& __v) {
+[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI const _Tp* end(const valarray<_Tp>& __v) {
   return __v.__end_;
 }
 
diff --git a/lib/libcxx/include/variant b/lib/libcxx/include/variant
index ede9f486ec..56c5efe92b 100644
--- a/lib/libcxx/include/variant
+++ b/lib/libcxx/include/variant
@@ -247,7 +247,6 @@ namespace std {
 #  include <__type_traits/is_nothrow_assignable.h>
 #  include <__type_traits/is_nothrow_constructible.h>
 #  include <__type_traits/is_reference.h>
-#  include <__type_traits/is_replaceable.h>
 #  include <__type_traits/is_same.h>
 #  include <__type_traits/is_swappable.h>
 #  include <__type_traits/is_trivially_assignable.h>
@@ -289,7 +288,7 @@ _LIBCPP_BEGIN_UNVERSIONED_NAMESPACE_STD
 
 class _LIBCPP_EXPORTED_FROM_ABI bad_variant_access : public exception {
 public:
-  const char* what() const _NOEXCEPT override;
+  [[__nodiscard__]] const char* what() const _NOEXCEPT override;
 };
 
 _LIBCPP_END_UNVERSIONED_NAMESPACE_STD
@@ -918,18 +917,10 @@ protected:
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __assign_alt(__alt<_Ip, _Tp>& __a, _Arg&& __arg) {
     if (this->index() == _Ip) {
       __a.__value = std::forward<_Arg>(__arg);
+    } else if constexpr (is_nothrow_constructible_v<_Tp, _Arg> || !is_nothrow_move_constructible_v<_Tp>) {
+      this->__emplace<_Ip>(std::forward<_Arg>(__arg));
     } else {
-      struct {
-        _LIBCPP_HIDDEN _LIBCPP_CONSTEXPR_SINCE_CXX20 void operator()(true_type) const {
-          __this->__emplace<_Ip>(std::forward<_Arg>(__arg));
-        }
-        _LIBCPP_HIDDEN _LIBCPP_CONSTEXPR_SINCE_CXX20 void operator()(false_type) const {
-          __this->__emplace<_Ip>(_Tp(std::forward<_Arg>(__arg)));
-        }
-        __assignment* __this;
-        _Arg&& __arg;
-      } __impl{this, std::forward<_Arg>(__arg)};
-      __impl(bool_constant < is_nothrow_constructible_v<_Tp, _Arg> || !is_nothrow_move_constructible_v < _Tp >> {});
+      this->__emplace<_Ip>(_Tp(std::forward<_Arg>(__arg)));
     }
   }
 
@@ -1127,14 +1118,14 @@ template <class _IdxSeq>
 struct __make_overloads_imp;
 
 template <size_t... _Idx>
-struct __make_overloads_imp<__tuple_indices<_Idx...> > {
+struct __make_overloads_imp<index_sequence<_Idx...> > {
   template <class... _Types>
   using _Apply _LIBCPP_NODEBUG = __all_overloads<__overload<_Types, _Idx>...>;
 };
 
 template <class... _Types>
 using _MakeOverloads _LIBCPP_NODEBUG =
-    typename __make_overloads_imp< __make_indices_imp<sizeof...(_Types), 0> >::template _Apply<_Types...>;
+    typename __make_overloads_imp<make_index_sequence<sizeof...(_Types)>>::template _Apply<_Types...>;
 
 template <class _Tp, class... _Types>
 using __best_match_t _LIBCPP_NODEBUG = typename invoke_result_t<_MakeOverloads<_Types...>, _Tp, _Tp>::type;
@@ -1172,7 +1163,6 @@ class _LIBCPP_DECLSPEC_EMPTY_BASES _LIBCPP_NO_SPECIALIZATIONS variant
 public:
   using __trivially_relocatable _LIBCPP_NODEBUG =
       conditional_t<_And<__libcpp_is_trivially_relocatable<_Types>...>::value, variant, void>;
-  using __replaceable _LIBCPP_NODEBUG = conditional_t<_And<__is_replaceable<_Types>...>::value, variant, void>;
 
   template <bool _Dummy                                                                               = true,
             enable_if_t<__dependent_type<is_default_constructible<__first_type>, _Dummy>::value, int> = 0>
@@ -1284,11 +1274,11 @@ public:
     return __impl_.template __emplace<_Ip>(__il, std::forward<_Args>(__args)...);
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr bool valueless_by_exception() const noexcept {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool valueless_by_exception() const noexcept {
     return __impl_.valueless_by_exception();
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr size_t index() const noexcept { return __impl_.index(); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr size_t index() const noexcept { return __impl_.index(); }
 
   template < bool _Dummy       = true,
              enable_if_t< __all<(__dependent_type<is_move_constructible<_Types>, _Dummy>::value &&
@@ -1299,7 +1289,7 @@ public:
     __impl_.__swap(__that.__impl_);
   }
 
-#    if _LIBCPP_STD_VER >= 26 && _LIBCPP_HAS_EXPLICIT_THIS_PARAMETER
+#    if _LIBCPP_STD_VER >= 26
   // Helper class to implement [variant.visit]/10
   //   Constraints: The call to visit does not use an explicit template-argument-list
   //   that begins with a type template-argument.
@@ -1331,7 +1321,7 @@ _LIBCPP_HIDE_FROM_ABI constexpr bool __holds_alternative(const variant<_Types...
 }
 
 template <class _Tp, class... _Types>
-_LIBCPP_HIDE_FROM_ABI constexpr bool holds_alternative(const variant<_Types...>& __v) noexcept {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool holds_alternative(const variant<_Types...>& __v) noexcept {
   return std::__holds_alternative<__find_exactly_one_t<_Tp, _Types...>::value>(__v);
 }
 
@@ -1345,21 +1335,23 @@ _LIBCPP_HIDE_FROM_ABI constexpr auto&& __generic_get(_Vp&& __v) {
 }
 
 template <size_t _Ip, class... _Types>
-_LIBCPP_HIDE_FROM_ABI constexpr variant_alternative_t<_Ip, variant<_Types...>>& get(variant<_Types...>& __v) {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr variant_alternative_t<_Ip, variant<_Types...>>&
+get(variant<_Types...>& __v) {
   static_assert(_Ip < sizeof...(_Types));
   static_assert(!is_void_v<variant_alternative_t<_Ip, variant<_Types...>>>);
   return std::__generic_get<_Ip>(__v);
 }
 
 template <size_t _Ip, class... _Types>
-_LIBCPP_HIDE_FROM_ABI constexpr variant_alternative_t<_Ip, variant<_Types...>>&& get(variant<_Types...>&& __v) {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr variant_alternative_t<_Ip, variant<_Types...>>&&
+get(variant<_Types...>&& __v) {
   static_assert(_Ip < sizeof...(_Types));
   static_assert(!is_void_v<variant_alternative_t<_Ip, variant<_Types...>>>);
   return std::__generic_get<_Ip>(std::move(__v));
 }
 
 template <size_t _Ip, class... _Types>
-_LIBCPP_HIDE_FROM_ABI constexpr const variant_alternative_t<_Ip, variant<_Types...>>&
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const variant_alternative_t<_Ip, variant<_Types...>>&
 get(const variant<_Types...>& __v) {
   static_assert(_Ip < sizeof...(_Types));
   static_assert(!is_void_v<variant_alternative_t<_Ip, variant<_Types...>>>);
@@ -1367,7 +1359,7 @@ get(const variant<_Types...>& __v) {
 }
 
 template <size_t _Ip, class... _Types>
-_LIBCPP_HIDE_FROM_ABI constexpr const variant_alternative_t<_Ip, variant<_Types...>>&&
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const variant_alternative_t<_Ip, variant<_Types...>>&&
 get(const variant<_Types...>&& __v) {
   static_assert(_Ip < sizeof...(_Types));
   static_assert(!is_void_v<variant_alternative_t<_Ip, variant<_Types...>>>);
@@ -1375,25 +1367,25 @@ get(const variant<_Types...>&& __v) {
 }
 
 template <class _Tp, class... _Types>
-_LIBCPP_HIDE_FROM_ABI constexpr _Tp& get(variant<_Types...>& __v) {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp& get(variant<_Types...>& __v) {
   static_assert(!is_void_v<_Tp>);
   return std::get<__find_exactly_one_t<_Tp, _Types...>::value>(__v);
 }
 
 template <class _Tp, class... _Types>
-_LIBCPP_HIDE_FROM_ABI constexpr _Tp&& get(variant<_Types...>&& __v) {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp&& get(variant<_Types...>&& __v) {
   static_assert(!is_void_v<_Tp>);
   return std::get<__find_exactly_one_t<_Tp, _Types...>::value>(std::move(__v));
 }
 
 template <class _Tp, class... _Types>
-_LIBCPP_HIDE_FROM_ABI constexpr const _Tp& get(const variant<_Types...>& __v) {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const _Tp& get(const variant<_Types...>& __v) {
   static_assert(!is_void_v<_Tp>);
   return std::get<__find_exactly_one_t<_Tp, _Types...>::value>(__v);
 }
 
 template <class _Tp, class... _Types>
-_LIBCPP_HIDE_FROM_ABI constexpr const _Tp&& get(const variant<_Types...>&& __v) {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const _Tp&& get(const variant<_Types...>&& __v) {
   static_assert(!is_void_v<_Tp>);
   return std::get<__find_exactly_one_t<_Tp, _Types...>::value>(std::move(__v));
 }
@@ -1405,7 +1397,7 @@ _LIBCPP_HIDE_FROM_ABI constexpr auto* __generic_get_if(_Vp* __v) noexcept {
 }
 
 template <size_t _Ip, class... _Types>
-_LIBCPP_HIDE_FROM_ABI constexpr add_pointer_t<variant_alternative_t<_Ip, variant<_Types...>>>
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr add_pointer_t<variant_alternative_t<_Ip, variant<_Types...>>>
 get_if(variant<_Types...>* __v) noexcept {
   static_assert(_Ip < sizeof...(_Types));
   static_assert(!is_void_v<variant_alternative_t<_Ip, variant<_Types...>>>);
@@ -1413,7 +1405,7 @@ get_if(variant<_Types...>* __v) noexcept {
 }
 
 template <size_t _Ip, class... _Types>
-_LIBCPP_HIDE_FROM_ABI constexpr add_pointer_t<const variant_alternative_t<_Ip, variant<_Types...>>>
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr add_pointer_t<const variant_alternative_t<_Ip, variant<_Types...>>>
 get_if(const variant<_Types...>* __v) noexcept {
   static_assert(_Ip < sizeof...(_Types));
   static_assert(!is_void_v<variant_alternative_t<_Ip, variant<_Types...>>>);
@@ -1421,13 +1413,13 @@ get_if(const variant<_Types...>* __v) noexcept {
 }
 
 template <class _Tp, class... _Types>
-_LIBCPP_HIDE_FROM_ABI constexpr add_pointer_t<_Tp> get_if(variant<_Types...>* __v) noexcept {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr add_pointer_t<_Tp> get_if(variant<_Types...>* __v) noexcept {
   static_assert(!is_void_v<_Tp>);
   return std::get_if<__find_exactly_one_t<_Tp, _Types...>::value>(__v);
 }
 
 template <class _Tp, class... _Types>
-_LIBCPP_HIDE_FROM_ABI constexpr add_pointer_t<const _Tp> get_if(const variant<_Types...>* __v) noexcept {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr add_pointer_t<const _Tp> get_if(const variant<_Types...>* __v) noexcept {
   static_assert(!is_void_v<_Tp>);
   return std::get_if<__find_exactly_one_t<_Tp, _Types...>::value>(__v);
 }
@@ -1608,7 +1600,7 @@ struct hash< __enable_hash_helper<variant<_Types...>, remove_const_t<_Types>...>
   using result_type _LIBCPP_DEPRECATED_IN_CXX17   = size_t;
 #    endif
 
-  _LIBCPP_HIDE_FROM_ABI size_t operator()(const variant<_Types...>& __v) const {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI size_t operator()(const variant<_Types...>& __v) const {
     using __variant_detail::__visitation::__variant;
     size_t __res =
         __v.valueless_by_exception()
diff --git a/lib/libcxx/include/version b/lib/libcxx/include/version
index d98049bd57..7d77677a01 100644
--- a/lib/libcxx/include/version
+++ b/lib/libcxx/include/version
@@ -37,7 +37,8 @@ __cpp_lib_atomic_float                                  201711L <atomic>
 __cpp_lib_atomic_is_always_lock_free                    201603L <atomic>
 __cpp_lib_atomic_lock_free_type_aliases                 201907L <atomic>
 __cpp_lib_atomic_min_max                                202403L <atomic>
-__cpp_lib_atomic_ref                                    201806L <atomic>
+__cpp_lib_atomic_ref                                    202411L <atomic>
+                                                        201806L // C++20
 __cpp_lib_atomic_shared_ptr                             201711L <atomic>
 __cpp_lib_atomic_value_initialization                   201911L <atomic> <memory>
 __cpp_lib_atomic_wait                                   201907L <atomic>
@@ -62,7 +63,7 @@ __cpp_lib_clamp                                         201603L <algorithm>
 __cpp_lib_common_reference                              202302L <type_traits>
 __cpp_lib_common_reference_wrapper                      202302L <functional>
 __cpp_lib_complex_udls                                  201309L <complex>
-__cpp_lib_concepts                                      202002L <concepts>
+__cpp_lib_concepts                                      202207L <concepts>
 __cpp_lib_constexpr_algorithms                          202306L <algorithm> <utility>
                                                         201806L // C++20
 __cpp_lib_constexpr_bitset                              202207L <bitset>
@@ -70,6 +71,8 @@ __cpp_lib_constexpr_charconv                            202207L <charconv>
 __cpp_lib_constexpr_cmath                               202202L <cmath> <cstdlib>
 __cpp_lib_constexpr_complex                             201711L <complex>
 __cpp_lib_constexpr_dynamic_alloc                       201907L <memory>
+__cpp_lib_constexpr_flat_map                            202502L <flat_map>
+__cpp_lib_constexpr_flat_set                            202502L <flat_set>
 __cpp_lib_constexpr_forward_list                        202502L <forward_list>
 __cpp_lib_constexpr_functional                          201907L <functional>
 __cpp_lib_constexpr_iterator                            201811L <iterator>
@@ -108,8 +111,8 @@ __cpp_lib_execution                                     201902L <execution>
                                                         201603L // C++17
 __cpp_lib_expected                                      202211L <expected>
 __cpp_lib_filesystem                                    201703L <filesystem>
-__cpp_lib_flat_map                                      202207L <flat_map>
-__cpp_lib_flat_set                                      202207L <flat_set>
+__cpp_lib_flat_map                                      202511L <flat_map>
+__cpp_lib_flat_set                                      202511L <flat_set>
 __cpp_lib_format                                        202110L <format>
 __cpp_lib_format_path                                   202403L <filesystem>
 __cpp_lib_format_ranges                                 202207L <format>
@@ -138,7 +141,8 @@ __cpp_lib_incomplete_container_elements                 201505L <forward_list> <
 __cpp_lib_inplace_vector                                202406L <inplace_vector>
 __cpp_lib_int_pow2                                      202002L <bit>
 __cpp_lib_integer_comparison_functions                  202002L <utility>
-__cpp_lib_integer_sequence                              201304L <utility>
+__cpp_lib_integer_sequence                              202511L <utility>
+                                                        201304L // C++14
 __cpp_lib_integral_constant_callable                    201304L <type_traits>
 __cpp_lib_interpolate                                   201902L <cmath> <numeric>
 __cpp_lib_invoke                                        201411L <functional>
@@ -184,7 +188,8 @@ __cpp_lib_nonmember_container_access                    201411L <array> <deque>
 __cpp_lib_not_fn                                        202306L <functional>
                                                         201603L // C++17
 __cpp_lib_null_iterators                                201304L <iterator>
-__cpp_lib_optional                                      202110L <optional>
+__cpp_lib_optional                                      202506L <optional>
+                                                        202110L // C++23
                                                         202106L // C++20
                                                         201606L // C++17
 __cpp_lib_optional_range_support                        202406L <optional>
@@ -205,6 +210,7 @@ __cpp_lib_ranges_chunk_by                               202202L <ranges>
 __cpp_lib_ranges_concat                                 202403L <ranges>
 __cpp_lib_ranges_contains                               202207L <algorithm>
 __cpp_lib_ranges_find_last                              202207L <algorithm>
+__cpp_lib_ranges_indices                                202506L <ranges>
 __cpp_lib_ranges_iota                                   202202L <numeric>
 __cpp_lib_ranges_join_with                              202202L <ranges>
 __cpp_lib_ranges_repeat                                 202207L <ranges>
@@ -245,6 +251,7 @@ __cpp_lib_starts_ends_with                              201711L <string> <string
 __cpp_lib_stdatomic_h                                   202011L <stdatomic.h>
 __cpp_lib_string_contains                               202011L <string> <string_view>
 __cpp_lib_string_resize_and_overwrite                   202110L <string>
+__cpp_lib_string_subview                                202506L <string> <string_view>
 __cpp_lib_string_udls                                   201304L <string>
 __cpp_lib_string_view                                   202403L <string> <string_view>
                                                         201803L // C++20
@@ -333,13 +340,11 @@ __cpp_lib_void_t                                        201411L <type_traits>
 # define __cpp_lib_clamp                                201603L
 # define __cpp_lib_enable_shared_from_this              201603L
 // # define __cpp_lib_execution                            201603L
-# if _LIBCPP_HAS_FILESYSTEM && _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_LIBRARY
+# if _LIBCPP_HAS_FILESYSTEM
 #   define __cpp_lib_filesystem                         201703L
 # endif
 # define __cpp_lib_gcd_lcm                              201606L
-# if defined(__GCC_DESTRUCTIVE_SIZE) && defined(__GCC_CONSTRUCTIVE_SIZE)
-#   define __cpp_lib_hardware_interference_size         201703L
-# endif
+# define __cpp_lib_hardware_interference_size           201703L
 # define __cpp_lib_has_unique_object_representations    201606L
 # define __cpp_lib_hypot                                201603L
 # define __cpp_lib_incomplete_container_elements        201505L
@@ -391,10 +396,8 @@ __cpp_lib_void_t                                        201411L <type_traits>
 # define __cpp_lib_atomic_ref                           201806L
 // # define __cpp_lib_atomic_shared_ptr                    201711L
 # define __cpp_lib_atomic_value_initialization          201911L
-# if _LIBCPP_AVAILABILITY_HAS_SYNC
-#   define __cpp_lib_atomic_wait                        201907L
-# endif
-# if _LIBCPP_HAS_THREADS && _LIBCPP_AVAILABILITY_HAS_SYNC
+# define __cpp_lib_atomic_wait                          201907L
+# if _LIBCPP_HAS_THREADS
 #   define __cpp_lib_barrier                            201907L
 # endif
 # define __cpp_lib_bind_front                           201907L
@@ -406,7 +409,7 @@ __cpp_lib_void_t                                        201411L <type_traits>
 # endif
 # define __cpp_lib_common_reference                     202302L
 # define __cpp_lib_common_reference_wrapper             202302L
-# define __cpp_lib_concepts                             202002L
+# define __cpp_lib_concepts                             202207L
 # define __cpp_lib_constexpr_algorithms                 201806L
 # define __cpp_lib_constexpr_complex                    201711L
 # define __cpp_lib_constexpr_dynamic_alloc              201907L
@@ -439,10 +442,10 @@ __cpp_lib_void_t                                        201411L <type_traits>
 // # define __cpp_lib_is_layout_compatible                 201907L
 # define __cpp_lib_is_nothrow_convertible               201806L
 // # define __cpp_lib_is_pointer_interconvertible          201907L
-# if _LIBCPP_HAS_THREADS && _LIBCPP_AVAILABILITY_HAS_SYNC
+# if _LIBCPP_HAS_THREADS
 #   define __cpp_lib_jthread                            201911L
 # endif
-# if _LIBCPP_HAS_THREADS && _LIBCPP_AVAILABILITY_HAS_SYNC
+# if _LIBCPP_HAS_THREADS
 #   define __cpp_lib_latch                              201907L
 # endif
 # define __cpp_lib_list_remove_return_type              201806L
@@ -455,7 +458,7 @@ __cpp_lib_void_t                                        201411L <type_traits>
 # endif
 # define __cpp_lib_ranges                               202110L
 # define __cpp_lib_remove_cvref                         201711L
-# if _LIBCPP_HAS_THREADS && _LIBCPP_AVAILABILITY_HAS_SYNC
+# if _LIBCPP_HAS_THREADS
 #   define __cpp_lib_semaphore                          201907L
 # endif
 # undef  __cpp_lib_shared_ptr_arrays
@@ -494,8 +497,8 @@ __cpp_lib_void_t                                        201411L <type_traits>
 # define __cpp_lib_constexpr_typeinfo                   202106L
 # define __cpp_lib_containers_ranges                    202202L
 # define __cpp_lib_expected                             202211L
-# define __cpp_lib_flat_map                             202207L
-# define __cpp_lib_flat_set                             202207L
+# define __cpp_lib_flat_map                             202511L
+# define __cpp_lib_flat_set                             202511L
 # define __cpp_lib_format_ranges                        202207L
 // # define __cpp_lib_formatters                           202302L
 # define __cpp_lib_forward_like                         202207L
@@ -528,7 +531,7 @@ __cpp_lib_void_t                                        201411L <type_traits>
 // # define __cpp_lib_ranges_slide                         202202L
 # define __cpp_lib_ranges_starts_ends_with              202106L
 # define __cpp_lib_ranges_to_container                  202202L
-// # define __cpp_lib_ranges_zip                           202110L
+# define __cpp_lib_ranges_zip                           202110L
 // # define __cpp_lib_reference_from_temporary             202202L
 // # define __cpp_lib_spanstream                           202106L
 // # define __cpp_lib_stacktrace                           202011L
@@ -544,18 +547,22 @@ __cpp_lib_void_t                                        201411L <type_traits>
 # define __cpp_lib_aligned_accessor                     202411L
 // # define __cpp_lib_associative_heterogeneous_insertion  202306L
 // # define __cpp_lib_atomic_min_max                       202403L
+# undef  __cpp_lib_atomic_ref
+# define __cpp_lib_atomic_ref                           202411L
 # undef  __cpp_lib_bind_front
 # define __cpp_lib_bind_front                           202306L
 # define __cpp_lib_bitset                               202306L
 # undef  __cpp_lib_constexpr_algorithms
 # define __cpp_lib_constexpr_algorithms                 202306L
+# define __cpp_lib_constexpr_flat_map                   202502L
+# define __cpp_lib_constexpr_flat_set                   202502L
 # define __cpp_lib_constexpr_forward_list               202502L
 # define __cpp_lib_constexpr_list                       202502L
 # if !defined(_LIBCPP_ABI_VCRUNTIME)
 #   define __cpp_lib_constexpr_new                      202406L
 # endif
 # define __cpp_lib_constexpr_queue                      202502L
-// # define __cpp_lib_constrained_equality                 202411L
+# define __cpp_lib_constrained_equality                 202411L
 // # define __cpp_lib_copyable_function                    202306L
 // # define __cpp_lib_debugging                            202311L
 // # define __cpp_lib_default_template_type_for_algorithm_values 202403L
@@ -575,21 +582,30 @@ __cpp_lib_void_t                                        201411L <type_traits>
 // # define __cpp_lib_generate_random                      202403L
 // # define __cpp_lib_hazard_pointer                       202306L
 // # define __cpp_lib_inplace_vector                       202406L
+# undef  __cpp_lib_integer_sequence
+# define __cpp_lib_integer_sequence                     202511L
 # define __cpp_lib_is_sufficiently_aligned              202411L
 # if __has_builtin(__builtin_is_virtual_base_of)
 #   define __cpp_lib_is_virtual_base_of                 202406L
 # endif
-// # define __cpp_lib_is_within_lifetime                   202306L
+# if __has_builtin(__builtin_is_within_lifetime)
+#   define __cpp_lib_is_within_lifetime                 202306L
+# endif
 // # define __cpp_lib_linalg                               202311L
 # undef  __cpp_lib_mdspan
 # define __cpp_lib_mdspan                               202406L
 # undef  __cpp_lib_not_fn
 # define __cpp_lib_not_fn                               202306L
-// # define __cpp_lib_optional_range_support               202406L
+# undef  __cpp_lib_optional
+# define __cpp_lib_optional                             202506L
+# if _LIBCPP_HAS_EXPERIMENTAL_OPTIONAL_ITERATOR
+#   define __cpp_lib_optional_range_support             202406L
+# endif
 # undef  __cpp_lib_out_ptr
 # define __cpp_lib_out_ptr                              202311L
 // # define __cpp_lib_philox_engine                        202406L
 // # define __cpp_lib_ranges_concat                        202403L
+# define __cpp_lib_ranges_indices                       202506L
 # define __cpp_lib_ratio                                202306L
 // # define __cpp_lib_rcu                                  202306L
 # define __cpp_lib_reference_wrapper                    202403L
@@ -599,6 +615,7 @@ __cpp_lib_void_t                                        201411L <type_traits>
 # define __cpp_lib_span_at                              202311L
 # define __cpp_lib_span_initializer_list                202311L
 # define __cpp_lib_sstream_from_string_view             202306L
+# define __cpp_lib_string_subview                       202506L
 # undef  __cpp_lib_string_view
 # define __cpp_lib_string_view                          202403L
 // # define __cpp_lib_submdspan                            202306L
diff --git a/lib/libcxx/include/wctype.h b/lib/libcxx/include/wctype.h
index 9a5b163ba4..d2607406d8 100644
--- a/lib/libcxx/include/wctype.h
+++ b/lib/libcxx/include/wctype.h
@@ -45,13 +45,14 @@ wctrans_t wctrans(const char* property);
 */
 
 #if defined(__cplusplus) && __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)
-#  include <__cxx03/wctype.h>
+#  include <__cxx03/__config>
 #else
 #  include <__config>
+#endif
 
-#  if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
-#    pragma GCC system_header
-#  endif
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
 
 // TODO:
 // In the future, we should unconditionally include_next <wctype.h> here and instead
@@ -62,33 +63,32 @@ wctrans_t wctrans(const char* property);
 // nothing (with using_if_exists), and if we include another header that defines one
 // of these declarations (e.g. <wchar.h>), the second `using ::wint_t` with using_if_exists
 // will fail because it does not refer to the same declaration.
-#  if __has_include_next(<wctype.h>)
-#    include_next <wctype.h>
-#    define _LIBCPP_INCLUDED_C_LIBRARY_WCTYPE_H
-#  endif
+#if __has_include_next(<wctype.h>)
+#  include_next <wctype.h>
+#  define _LIBCPP_INCLUDED_C_LIBRARY_WCTYPE_H
+#endif
 
-#  ifdef __cplusplus
+#ifdef __cplusplus
 
-#    undef iswalnum
-#    undef iswalpha
-#    undef iswblank
-#    undef iswcntrl
-#    undef iswdigit
-#    undef iswgraph
-#    undef iswlower
-#    undef iswprint
-#    undef iswpunct
-#    undef iswspace
-#    undef iswupper
-#    undef iswxdigit
-#    undef iswctype
-#    undef wctype
-#    undef towlower
-#    undef towupper
-#    undef towctrans
-#    undef wctrans
+#  undef iswalnum
+#  undef iswalpha
+#  undef iswblank
+#  undef iswcntrl
+#  undef iswdigit
+#  undef iswgraph
+#  undef iswlower
+#  undef iswprint
+#  undef iswpunct
+#  undef iswspace
+#  undef iswupper
+#  undef iswxdigit
+#  undef iswctype
+#  undef wctype
+#  undef towlower
+#  undef towupper
+#  undef towctrans
+#  undef wctrans
 
-#  endif // __cplusplus
-#endif   // defined(__cplusplus) && __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)
+#endif // __cplusplus
 
 #endif // _LIBCPP_WCTYPE_H
diff --git a/lib/libcxx/src/any.cpp b/lib/libcxx/src/any.cpp
index f3fc715d51..47058f55d8 100644
--- a/lib/libcxx/src/any.cpp
+++ b/lib/libcxx/src/any.cpp
@@ -14,6 +14,8 @@ const char* bad_any_cast::what() const noexcept { return "bad any cast"; }
 
 #include <__config>
 
+#if _LIBCPP_AVAILABILITY_MINIMUM_HEADER_VERSION < 7
+
 //  Preserve std::experimental::any_bad_cast for ABI compatibility
 //  Even though it no longer exists in a header file
 _LIBCPP_BEGIN_NAMESPACE_LFTS
@@ -25,4 +27,6 @@ public:
 
 const char* bad_any_cast::what() const noexcept { return "bad any cast"; }
 
+#endif
+
 _LIBCPP_END_NAMESPACE_LFTS
diff --git a/lib/libcxx/src/atomic.cpp b/lib/libcxx/src/atomic.cpp
index 903084da05..3948f8b037 100644
--- a/lib/libcxx/src/atomic.cpp
+++ b/lib/libcxx/src/atomic.cpp
@@ -9,8 +9,13 @@
 #include <__thread/timed_backoff_policy.h>
 #include <atomic>
 #include <climits>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
 #include <functional>
+#include <new>
 #include <thread>
+#include <type_traits>
 
 #include "include/apple_availability.h"
 
@@ -41,6 +46,11 @@
 // OpenBSD has no indirect syscalls
 #  define _LIBCPP_FUTEX(...) futex(__VA_ARGS__)
 
+#elif defined(_WIN32)
+
+#  include <memory>
+#  include <windows.h>
+
 #else // <- Add other operating systems here
 
 // Baseline needs no new headers
@@ -49,17 +59,34 @@
 
 #endif
 
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
 _LIBCPP_BEGIN_NAMESPACE_STD
 
+struct NoTimeout {};
+
 #ifdef __linux__
 
-static void
-__libcpp_platform_wait_on_address(__cxx_atomic_contention_t const volatile* __ptr, __cxx_contention_t __val) {
-  static constexpr timespec __timeout = {2, 0};
-  _LIBCPP_FUTEX(__ptr, FUTEX_WAIT_PRIVATE, __val, &__timeout, 0, 0);
+template <std::size_t _Size, class MaybeTimeout>
+static void __platform_wait_on_address(void const* __ptr, void const* __val, MaybeTimeout maybe_timeout_ns) {
+  static_assert(_Size == 4, "Can only wait on 4 bytes value");
+  alignas(__cxx_contention_t) char buffer[_Size];
+  std::memcpy(&buffer, const_cast<const void*>(__val), _Size);
+  static constexpr timespec __default_timeout = {2, 0};
+  timespec __timeout;
+  if constexpr (is_same_v<MaybeTimeout, NoTimeout>) {
+    __timeout = __default_timeout;
+  } else {
+    __timeout.tv_sec  = maybe_timeout_ns / 1'000'000'000;
+    __timeout.tv_nsec = maybe_timeout_ns % 1'000'000'000;
+  }
+  _LIBCPP_FUTEX(__ptr, FUTEX_WAIT_PRIVATE, *reinterpret_cast<__cxx_contention_t const*>(&buffer), &__timeout, 0, 0);
 }
 
-static void __libcpp_platform_wake_by_address(__cxx_atomic_contention_t const volatile* __ptr, bool __notify_one) {
+template <std::size_t _Size>
+static void __platform_wake_by_address(void const* __ptr, bool __notify_one) {
+  static_assert(_Size == 4, "Can only wake up on 4 bytes value");
   _LIBCPP_FUTEX(__ptr, FUTEX_WAKE_PRIVATE, __notify_one ? 1 : INT_MAX, 0, 0, 0);
 }
 
@@ -70,19 +97,41 @@ extern "C" int __ulock_wait(
 extern "C" int __ulock_wake(uint32_t operation, void* addr, uint64_t wake_value);
 
 // https://github.com/apple/darwin-xnu/blob/2ff845c2e033bd0ff64b5b6aa6063a1f8f65aa32/bsd/sys/ulock.h#L82
+#  define UL_COMPARE_AND_WAIT 1
 #  define UL_COMPARE_AND_WAIT64 5
 #  define ULF_WAKE_ALL 0x00000100
 
-static void
-__libcpp_platform_wait_on_address(__cxx_atomic_contention_t const volatile* __ptr, __cxx_contention_t __val) {
-  static_assert(sizeof(__cxx_atomic_contention_t) == 8, "Waiting on 8 bytes value");
-  __ulock_wait(UL_COMPARE_AND_WAIT64, const_cast<__cxx_atomic_contention_t*>(__ptr), __val, 0);
+template <std::size_t _Size, class MaybeTimeout>
+static void __platform_wait_on_address(void const* __ptr, void const* __val, MaybeTimeout maybe_timeout_ns) {
+  static_assert(_Size == 8 || _Size == 4, "Can only wait on 8 bytes or 4 bytes value");
+  auto __timeout_us = [&] {
+    if constexpr (is_same_v<MaybeTimeout, NoTimeout>) {
+      return uint32_t(0);
+    } else {
+      return std::max(static_cast<uint32_t>(maybe_timeout_ns / 1000), uint32_t(1));
+    }
+  }();
+  if constexpr (_Size == 4) {
+    alignas(uint32_t) char buffer[_Size];
+    std::memcpy(&buffer, const_cast<const void*>(__val), _Size);
+    __ulock_wait(
+        UL_COMPARE_AND_WAIT, const_cast<void*>(__ptr), *reinterpret_cast<uint32_t const*>(&buffer), __timeout_us);
+  } else {
+    alignas(uint64_t) char buffer[_Size];
+    std::memcpy(&buffer, const_cast<const void*>(__val), _Size);
+    __ulock_wait(
+        UL_COMPARE_AND_WAIT64, const_cast<void*>(__ptr), *reinterpret_cast<uint64_t const*>(&buffer), __timeout_us);
+  }
 }
 
-static void __libcpp_platform_wake_by_address(__cxx_atomic_contention_t const volatile* __ptr, bool __notify_one) {
-  static_assert(sizeof(__cxx_atomic_contention_t) == 8, "Waking up on 8 bytes value");
-  __ulock_wake(
-      UL_COMPARE_AND_WAIT64 | (__notify_one ? 0 : ULF_WAKE_ALL), const_cast<__cxx_atomic_contention_t*>(__ptr), 0);
+template <std::size_t _Size>
+static void __platform_wake_by_address(void const* __ptr, bool __notify_one) {
+  static_assert(_Size == 8 || _Size == 4, "Can only wake up on 8 bytes or 4 bytes value");
+
+  if constexpr (_Size == 4)
+    __ulock_wake(UL_COMPARE_AND_WAIT | (__notify_one ? 0 : ULF_WAKE_ALL), const_cast<void*>(__ptr), 0);
+  else
+    __ulock_wake(UL_COMPARE_AND_WAIT64 | (__notify_one ? 0 : ULF_WAKE_ALL), const_cast<void*>(__ptr), 0);
 }
 
 #elif defined(__FreeBSD__) && __SIZEOF_LONG__ == 8
@@ -92,119 +141,355 @@ static void __libcpp_platform_wake_by_address(__cxx_atomic_contention_t const vo
  * limit its use to architectures where long and int64_t are synonyms.
  */
 
-static void
-__libcpp_platform_wait_on_address(__cxx_atomic_contention_t const volatile* __ptr, __cxx_contention_t __val) {
-  _umtx_op(const_cast<__cxx_atomic_contention_t*>(__ptr), UMTX_OP_WAIT, __val, nullptr, nullptr);
+template <std::size_t _Size, class MaybeTimeout>
+static void __platform_wait_on_address(void const* __ptr, void const* __val, MaybeTimeout maybe_timeout_ns) {
+  static_assert(_Size == 8, "Can only wait on 8 bytes value");
+  // At the moment, FreeBSD is stuck on stable ABI, which only supports platform wait with __cxx_contention_t
+  // It is safe to reinterpret_cast the val as it is ever going to be passed a __cxx_contention_t under this ABI
+  // If in the future FreeBSD decides to experiment unstable ABI to support more types, this cast will no longer be
+  // safe.
+  __cxx_contention_t value = *reinterpret_cast<const __cxx_contention_t*>(__val);
+  if constexpr (is_same_v<MaybeTimeout, NoTimeout>) {
+    _umtx_op(const_cast<void*>(__ptr), UMTX_OP_WAIT, value, nullptr, nullptr);
+  } else {
+    timespec timeout{};
+    timeout.tv_sec  = maybe_timeout_ns / 1'000'000'000;
+    timeout.tv_nsec = maybe_timeout_ns % 1'000'000'000;
+
+    _umtx_op(const_cast<void*>(__ptr),
+             UMTX_OP_WAIT,
+             value,
+             reinterpret_cast<void*>(static_cast<uintptr_t>(sizeof(timeout))),
+             &timeout);
+  }
 }
 
-static void __libcpp_platform_wake_by_address(__cxx_atomic_contention_t const volatile* __ptr, bool __notify_one) {
-  _umtx_op(const_cast<__cxx_atomic_contention_t*>(__ptr), UMTX_OP_WAKE, __notify_one ? 1 : INT_MAX, nullptr, nullptr);
+template <std::size_t _Size>
+static void __platform_wake_by_address(void const* __ptr, bool __notify_one) {
+  static_assert(_Size == 8, "Can only wake up on 8 bytes value");
+  _umtx_op(const_cast<void*>(__ptr), UMTX_OP_WAKE, __notify_one ? 1 : INT_MAX, nullptr, nullptr);
+}
+
+#elif defined(_WIN32)
+
+static void* win32_get_synch_api_function(const char* function_name) {
+  // Attempt to load the API set. Note that as per the Microsoft STL implementation, we assume this API is already
+  // loaded and accessible. While this isn't explicitly guaranteed by publicly available Win32 API documentation, it is
+  // true in practice, and may be guaranteed by internal documentation not released publicly. In any case the fact that
+  // the Microsoft STL made this assumption is reasonable basis to say that we can too. The alternative to this would be
+  // to use LoadLibrary, but then leak the module handle. We can't call FreeLibrary, as this would have to be triggered
+  // by a global static destructor, which would hang off DllMain, and calling FreeLibrary from DllMain is explicitly
+  // mentioned as not being allowed:
+  // https://learn.microsoft.com/en-us/windows/win32/dlls/dllmain
+  // Given the range of bad options here, we have chosen to mirror what Microsoft did, as it seems fair to assume that
+  // Microsoft will guarantee compatibility for us, as we are exposed to the same conditions as all existing Windows
+  // apps using the Microsoft STL VS2015/2017/2019/2022 runtimes, where Windows 7 support has not been excluded at
+  // compile time.
+  static auto module_handle = GetModuleHandleW(L"api-ms-win-core-synch-l1-2-0.dll");
+  if (module_handle == nullptr) {
+    return nullptr;
+  }
+
+  // Attempt to locate the function in the API and return the result to the caller. Note that the NULL return from this
+  // method is documented as being interchangeable with nullptr.
+  // https://devblogs.microsoft.com/oldnewthing/20180307-00/?p=98175
+  return reinterpret_cast<void*>(GetProcAddress(module_handle, function_name));
+}
+
+template <std::size_t _Size, class MaybeTimeout>
+static void __platform_wait_on_address(void const* __ptr, void const* __val, MaybeTimeout maybe_timeout_ns) {
+  static_assert(_Size == 8, "Can only wait on 8 bytes value");
+  // WaitOnAddress was added in Windows 8 (build 9200)
+  static auto wait_on_address =
+      reinterpret_cast<BOOL(WINAPI*)(void*, PVOID, SIZE_T, DWORD)>(win32_get_synch_api_function("WaitOnAddress"));
+  if (wait_on_address != nullptr) {
+    auto timeout_ms = [&]() -> DWORD {
+      if constexpr (is_same_v<MaybeTimeout, NoTimeout>) {
+        return INFINITE;
+      } else {
+        uint64_t ms = maybe_timeout_ns / 1'000'000;
+        if (ms == 0 && maybe_timeout_ns > 100'000)
+          // Round up to 1ms if requested between 100us - 1ms
+          return 1;
+
+        return static_cast<DWORD>(std::min(static_cast<uint64_t>(INFINITE), ms));
+      }
+    }();
+    wait_on_address(const_cast<void*>(__ptr), const_cast<void*>(__val), _Size, timeout_ms);
+  } else {
+    std::chrono::nanoseconds timeout = std::chrono::nanoseconds(0);
+    if constexpr (!is_same_v<MaybeTimeout, NoTimeout>) {
+      timeout = std::chrono::nanoseconds(maybe_timeout_ns);
+    }
+    __libcpp_thread_poll_with_backoff(
+        [=]() -> bool { return std::memcmp(const_cast<const void*>(__ptr), __val, _Size) != 0; },
+        __libcpp_timed_backoff_policy(),
+        timeout);
+  }
+}
+
+template <std::size_t _Size>
+static void __platform_wake_by_address(void const* __ptr, bool __notify_one) {
+  static_assert(_Size == 8, "Can only wake up on 8 bytes value");
+  if (__notify_one) {
+    // WakeByAddressSingle was added in Windows 8 (build 9200)
+    static auto wake_by_address_single =
+        reinterpret_cast<void(WINAPI*)(PVOID)>(win32_get_synch_api_function("WakeByAddressSingle"));
+    if (wake_by_address_single != nullptr) {
+      wake_by_address_single(const_cast<void*>(__ptr));
+    } else {
+      // The fallback implementation of waking does nothing, as the fallback wait implementation just does polling, so
+      // there's nothing to do here.
+    }
+  } else {
+    // WakeByAddressAll was added in Windows 8 (build 9200)
+    static auto wake_by_address_all =
+        reinterpret_cast<void(WINAPI*)(PVOID)>(win32_get_synch_api_function("WakeByAddressAll"));
+    if (wake_by_address_all != nullptr) {
+      wake_by_address_all(const_cast<void*>(__ptr));
+    } else {
+      // The fallback implementation of waking does nothing, as the fallback wait implementation just does polling, so
+      // there's nothing to do here.
+    }
+  }
 }
 
 #else // <- Add other operating systems here
 
 // Baseline is just a timed backoff
 
-static void
-__libcpp_platform_wait_on_address(__cxx_atomic_contention_t const volatile* __ptr, __cxx_contention_t __val) {
+template <std::size_t _Size, class MaybeTimeout>
+static void __platform_wait_on_address(void const* __ptr, void const* __val, MaybeTimeout maybe_timeout_ns) {
+  std::chrono::nanoseconds timeout = std::chrono::nanoseconds(0);
+  if constexpr (!is_same_v<MaybeTimeout, NoTimeout>) {
+    timeout = std::chrono::nanoseconds(maybe_timeout_ns);
+  }
   __libcpp_thread_poll_with_backoff(
-      [=]() -> bool { return !__cxx_nonatomic_compare_equal(__cxx_atomic_load(__ptr, memory_order_relaxed), __val); },
-      __libcpp_timed_backoff_policy());
+      [=]() -> bool { return std::memcmp(const_cast<const void*>(__ptr), __val, _Size) != 0; },
+      __libcpp_timed_backoff_policy(),
+      timeout);
 }
 
-static void __libcpp_platform_wake_by_address(__cxx_atomic_contention_t const volatile*, bool) {}
+template <std::size_t _Size>
+static void __platform_wake_by_address(void const*, bool) {}
 
 #endif // __linux__
 
-static constexpr size_t __libcpp_contention_table_size = (1 << 8); /* < there's no magic in this number */
-
-struct alignas(64) /*  aim to avoid false sharing */ __libcpp_contention_table_entry {
-  __cxx_atomic_contention_t __contention_state;
-  __cxx_atomic_contention_t __platform_state;
-  inline constexpr __libcpp_contention_table_entry() : __contention_state(0), __platform_state(0) {}
-};
-
-static __libcpp_contention_table_entry __libcpp_contention_table[__libcpp_contention_table_size];
-
-static hash<void const volatile*> __libcpp_contention_hasher;
-
-static __libcpp_contention_table_entry* __libcpp_contention_state(void const volatile* p) {
-  return &__libcpp_contention_table[__libcpp_contention_hasher(p) & (__libcpp_contention_table_size - 1)];
-}
+// =============================
+// Local hidden helper functions
+// =============================
 
 /* Given an atomic to track contention and an atomic to actually wait on, which may be
    the same atomic, we try to detect contention to avoid spuriously calling the platform. */
 
-static void __libcpp_contention_notify(__cxx_atomic_contention_t volatile* __contention_state,
-                                       __cxx_atomic_contention_t const volatile* __platform_state,
-                                       bool __notify_one) {
-  if (0 != __cxx_atomic_load(__contention_state, memory_order_seq_cst))
+template <std::size_t _Size>
+static void
+__contention_notify(__cxx_atomic_contention_t* __waiter_count, void const* __address_to_notify, bool __notify_one) {
+  if (0 != __cxx_atomic_load(__waiter_count, memory_order_seq_cst))
     // We only call 'wake' if we consumed a contention bit here.
-    __libcpp_platform_wake_by_address(__platform_state, __notify_one);
+    __platform_wake_by_address<_Size>(__address_to_notify, __notify_one);
 }
-static __cxx_contention_t
-__libcpp_contention_monitor_for_wait(__cxx_atomic_contention_t volatile* /*__contention_state*/,
-                                     __cxx_atomic_contention_t const volatile* __platform_state) {
-  // We will monitor this value.
-  return __cxx_atomic_load(__platform_state, memory_order_acquire);
-}
-static void __libcpp_contention_wait(__cxx_atomic_contention_t volatile* __contention_state,
-                                     __cxx_atomic_contention_t const volatile* __platform_state,
-                                     __cxx_contention_t __old_value) {
-  __cxx_atomic_fetch_add(__contention_state, __cxx_contention_t(1), memory_order_relaxed);
-  // https://github.com/llvm/llvm-project/issues/109290
+
+template <std::size_t _Size, class MaybeTimeout>
+static void __contention_wait(__cxx_atomic_contention_t* __waiter_count,
+                              void const* __address_to_wait,
+                              void const* __old_value,
+                              MaybeTimeout maybe_timeout_ns) {
+  __cxx_atomic_fetch_add(__waiter_count, __cxx_contention_t(1), memory_order_relaxed);
+  // https://llvm.org/PR109290
   // There are no platform guarantees of a memory barrier in the platform wait implementation
   __cxx_atomic_thread_fence(memory_order_seq_cst);
   // We sleep as long as the monitored value hasn't changed.
-  __libcpp_platform_wait_on_address(__platform_state, __old_value);
-  __cxx_atomic_fetch_sub(__contention_state, __cxx_contention_t(1), memory_order_release);
+  __platform_wait_on_address<_Size>(__address_to_wait, __old_value, maybe_timeout_ns);
+  __cxx_atomic_fetch_sub(__waiter_count, __cxx_contention_t(1), memory_order_release);
+}
+
+static constexpr size_t __contention_table_size = (1 << 8); /* < there's no magic in this number */
+
+static constexpr hash<void const*> __contention_hasher;
+
+// Waiter count table for all atomics with the correct size that use itself as the wait/notify address.
+
+struct alignas(
+    std::hardware_constructive_interference_size) /*  aim to avoid false sharing */ __contention_state_native {
+  __cxx_atomic_contention_t __waiter_count;
+  constexpr __contention_state_native() : __waiter_count(0) {}
+};
+
+static __contention_state_native __contention_table_native[__contention_table_size];
+
+static __cxx_atomic_contention_t* __get_native_waiter_count(void const* p) {
+  return &__contention_table_native[__contention_hasher(p) & (__contention_table_size - 1)].__waiter_count;
+}
+
+// Global contention table for all atomics with the wrong size that use the global table's atomic as wait/notify
+// address.
+
+struct alignas(
+    std::hardware_constructive_interference_size) /*  aim to avoid false sharing */ __contention_state_global {
+  __cxx_atomic_contention_t __waiter_count;
+  __cxx_atomic_contention_t __platform_state;
+  constexpr __contention_state_global() : __waiter_count(0), __platform_state(0) {}
+};
+
+static __contention_state_global __contention_table_global[__contention_table_size];
+
+static __contention_state_global* __get_global_contention_state(void const* p) {
+  return &__contention_table_global[__contention_hasher(p) & (__contention_table_size - 1)];
 }
 
 /* When the incoming atomic is the wrong size for the platform wait size, need to
    launder the value sequence through an atomic from our table. */
 
-static void __libcpp_atomic_notify(void const volatile* __location) {
-  auto const __entry = __libcpp_contention_state(__location);
+static void __atomic_notify_global_table(void const* __location) {
+  auto const __entry = __get_global_contention_state(__location);
   // The value sequence laundering happens on the next line below.
   __cxx_atomic_fetch_add(&__entry->__platform_state, __cxx_contention_t(1), memory_order_seq_cst);
-  __libcpp_contention_notify(
-      &__entry->__contention_state,
-      &__entry->__platform_state,
-      false /* when laundering, we can't handle notify_one */);
+  __contention_notify<sizeof(__cxx_atomic_contention_t)>(
+      &__entry->__waiter_count, &__entry->__platform_state, false /* when laundering, we can't handle notify_one */);
 }
+
+// =============================
+// New dylib exported symbols
+// =============================
+
+// global
+_LIBCPP_EXPORTED_FROM_ABI __cxx_contention_t __atomic_monitor_global(void const* __location) noexcept {
+  auto const __entry = __get_global_contention_state(__location);
+  return __cxx_atomic_load(&__entry->__platform_state, memory_order_acquire);
+}
+
+_LIBCPP_EXPORTED_FROM_ABI void
+__atomic_wait_global_table(void const* __location, __cxx_contention_t __old_value) noexcept {
+  auto const __entry = __get_global_contention_state(__location);
+  __contention_wait<sizeof(__cxx_atomic_contention_t)>(
+      &__entry->__waiter_count, &__entry->__platform_state, &__old_value, NoTimeout{});
+}
+
+_LIBCPP_EXPORTED_FROM_ABI void __atomic_wait_global_table_with_timeout(
+    void const* __location, __cxx_contention_t __old_value, uint64_t __timeout_ns) _NOEXCEPT {
+  auto const __entry = __get_global_contention_state(__location);
+  __contention_wait<sizeof(__cxx_atomic_contention_t)>(
+      &__entry->__waiter_count, &__entry->__platform_state, &__old_value, __timeout_ns);
+}
+
+_LIBCPP_EXPORTED_FROM_ABI void __atomic_notify_one_global_table(void const* __location) noexcept {
+  __atomic_notify_global_table(__location);
+}
+
+_LIBCPP_EXPORTED_FROM_ABI void __atomic_notify_all_global_table(void const* __location) noexcept {
+  __atomic_notify_global_table(__location);
+}
+
+// native
+
+template <std::size_t _Size>
+_LIBCPP_EXPORTED_FROM_ABI void __atomic_wait_native(void const* __address, void const* __old_value) noexcept {
+  __contention_wait<_Size>(__get_native_waiter_count(__address), __address, __old_value, NoTimeout{});
+}
+
+template <std::size_t _Size>
+_LIBCPP_EXPORTED_FROM_ABI void
+__atomic_wait_native_with_timeout(void const* __address, void const* __old_value, uint64_t __timeout_ns) noexcept {
+  __contention_wait<_Size>(__get_native_waiter_count(__address), __address, __old_value, __timeout_ns);
+}
+
+template <std::size_t _Size>
+_LIBCPP_EXPORTED_FROM_ABI void __atomic_notify_one_native(void const* __location) noexcept {
+  __contention_notify<_Size>(__get_native_waiter_count(__location), __location, true);
+}
+
+template <std::size_t _Size>
+_LIBCPP_EXPORTED_FROM_ABI void __atomic_notify_all_native(void const* __location) noexcept {
+  __contention_notify<_Size>(__get_native_waiter_count(__location), __location, false);
+}
+
+// ==================================================
+// Instantiation of the templates with supported size
+// ==================================================
+
+#if defined(_LIBCPP_ABI_ATOMIC_WAIT_NATIVE_BY_SIZE)
+
+#  define _INSTANTIATE(_SIZE)                                                                                          \
+    template _LIBCPP_EXPORTED_FROM_ABI void __atomic_wait_native<_SIZE>(void const*, void const*) noexcept;            \
+    template _LIBCPP_EXPORTED_FROM_ABI void __atomic_wait_native_with_timeout<_SIZE>(                                  \
+        void const*, void const*, uint64_t) noexcept;                                                                  \
+    template _LIBCPP_EXPORTED_FROM_ABI void __atomic_notify_one_native<_SIZE>(void const*) noexcept;                   \
+    template _LIBCPP_EXPORTED_FROM_ABI void __atomic_notify_all_native<_SIZE>(void const*) noexcept;
+
+_LIBCPP_NATIVE_PLATFORM_WAIT_SIZES(_INSTANTIATE)
+
+#  undef _INSTANTIATE
+
+#else // _LIBCPP_ABI_ATOMIC_WAIT_NATIVE_BY_SIZE
+
+template _LIBCPP_EXPORTED_FROM_ABI void
+__atomic_wait_native<sizeof(__cxx_contention_t)>(void const* __address, void const* __old_value) noexcept;
+
+template _LIBCPP_EXPORTED_FROM_ABI void __atomic_wait_native_with_timeout<sizeof(__cxx_contention_t)>(
+    void const* __address, void const* __old_value, uint64_t) noexcept;
+
+template _LIBCPP_EXPORTED_FROM_ABI void
+__atomic_notify_one_native<sizeof(__cxx_contention_t)>(void const* __location) noexcept;
+
+template _LIBCPP_EXPORTED_FROM_ABI void
+__atomic_notify_all_native<sizeof(__cxx_contention_t)>(void const* __location) noexcept;
+
+#endif // _LIBCPP_ABI_ATOMIC_WAIT_NATIVE_BY_SIZE
+
+// =============================================================
+// Old dylib exported symbols, for backwards compatibility
+// =============================================================
+_LIBCPP_DIAGNOSTIC_PUSH
+_LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wmissing-prototypes")
+
 _LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_one(void const volatile* __location) noexcept {
-  __libcpp_atomic_notify(__location);
+  __atomic_notify_global_table(const_cast<void const*>(__location));
 }
+
 _LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_all(void const volatile* __location) noexcept {
-  __libcpp_atomic_notify(__location);
+  __atomic_notify_global_table(const_cast<void const*>(__location));
 }
+
 _LIBCPP_EXPORTED_FROM_ABI __cxx_contention_t __libcpp_atomic_monitor(void const volatile* __location) noexcept {
-  auto const __entry = __libcpp_contention_state(__location);
-  return __libcpp_contention_monitor_for_wait(&__entry->__contention_state, &__entry->__platform_state);
+  auto const __entry = __get_global_contention_state(const_cast<void const*>(__location));
+  return __cxx_atomic_load(&__entry->__platform_state, memory_order_acquire);
 }
+
 _LIBCPP_EXPORTED_FROM_ABI void
 __libcpp_atomic_wait(void const volatile* __location, __cxx_contention_t __old_value) noexcept {
-  auto const __entry = __libcpp_contention_state(__location);
-  __libcpp_contention_wait(&__entry->__contention_state, &__entry->__platform_state, __old_value);
+  auto const __entry = __get_global_contention_state(const_cast<void const*>(__location));
+  __contention_wait<sizeof(__cxx_atomic_contention_t)>(
+      &__entry->__waiter_count, &__entry->__platform_state, &__old_value, NoTimeout{});
 }
 
-/* When the incoming atomic happens to be the platform wait size, we still need to use the
-   table for the contention detection, but we can use the atomic directly for the wait. */
-
 _LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_one(__cxx_atomic_contention_t const volatile* __location) noexcept {
-  __libcpp_contention_notify(&__libcpp_contention_state(__location)->__contention_state, __location, true);
-}
-_LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_all(__cxx_atomic_contention_t const volatile* __location) noexcept {
-  __libcpp_contention_notify(&__libcpp_contention_state(__location)->__contention_state, __location, false);
-}
-// This function is never used, but still exported for ABI compatibility.
-_LIBCPP_EXPORTED_FROM_ABI __cxx_contention_t
-__libcpp_atomic_monitor(__cxx_atomic_contention_t const volatile* __location) noexcept {
-  return __libcpp_contention_monitor_for_wait(&__libcpp_contention_state(__location)->__contention_state, __location);
-}
-_LIBCPP_EXPORTED_FROM_ABI void
-__libcpp_atomic_wait(__cxx_atomic_contention_t const volatile* __location, __cxx_contention_t __old_value) noexcept {
-  __libcpp_contention_wait(&__libcpp_contention_state(__location)->__contention_state, __location, __old_value);
+  auto __location_cast = const_cast<const void*>(static_cast<const volatile void*>(__location));
+  __contention_notify<sizeof(__cxx_atomic_contention_t)>(
+      __get_native_waiter_count(__location_cast), __location_cast, true);
 }
 
+_LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_all(__cxx_atomic_contention_t const volatile* __location) noexcept {
+  auto __location_cast = const_cast<const void*>(static_cast<const volatile void*>(__location));
+  __contention_notify<sizeof(__cxx_atomic_contention_t)>(
+      __get_native_waiter_count(__location_cast), __location_cast, false);
+}
+
+_LIBCPP_EXPORTED_FROM_ABI void
+__libcpp_atomic_wait(__cxx_atomic_contention_t const volatile* __location, __cxx_contention_t __old_value) noexcept {
+  auto __location_cast = const_cast<const void*>(static_cast<const volatile void*>(__location));
+  __contention_wait<sizeof(__cxx_atomic_contention_t)>(
+      __get_native_waiter_count(__location_cast), __location_cast, &__old_value, NoTimeout{});
+}
+
+// this function is even unused in the old ABI
+_LIBCPP_EXPORTED_FROM_ABI __cxx_contention_t
+__libcpp_atomic_monitor(__cxx_atomic_contention_t const volatile* __location) noexcept {
+  return __cxx_atomic_load(__location, memory_order_acquire);
+}
+
+_LIBCPP_DIAGNOSTIC_POP
+
 _LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
diff --git a/lib/libcxx/src/barrier.cpp b/lib/libcxx/src/barrier.cpp
index 868f1bfbaf..72c29b49ad 100644
--- a/lib/libcxx/src/barrier.cpp
+++ b/lib/libcxx/src/barrier.cpp
@@ -60,11 +60,12 @@ public:
 _LIBCPP_EXPORTED_FROM_ABI __barrier_algorithm_base* __construct_barrier_algorithm_base(ptrdiff_t& __expected) {
   return new __barrier_algorithm_base(__expected);
 }
-_LIBCPP_EXPORTED_FROM_ABI bool
-__arrive_barrier_algorithm_base(__barrier_algorithm_base* __barrier, __barrier_phase_t __old_phase) noexcept {
+_LIBCPP_EXPORTED_FROM_ABI bool __arrive_barrier_algorithm_base(
+    _LIBCPP_NOESCAPE __barrier_algorithm_base* __barrier, __barrier_phase_t __old_phase) noexcept {
   return __barrier->__arrive(__old_phase);
 }
-_LIBCPP_EXPORTED_FROM_ABI void __destroy_barrier_algorithm_base(__barrier_algorithm_base* __barrier) noexcept {
+_LIBCPP_EXPORTED_FROM_ABI void
+__destroy_barrier_algorithm_base(_LIBCPP_NOESCAPE __barrier_algorithm_base* __barrier) noexcept {
   delete __barrier;
 }
 
diff --git a/lib/libcxx/src/charconv.cpp b/lib/libcxx/src/charconv.cpp
index 5e8cb7d977..148068b07e 100644
--- a/lib/libcxx/src/charconv.cpp
+++ b/lib/libcxx/src/charconv.cpp
@@ -14,17 +14,20 @@
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-#ifndef _LIBCPP_ABI_DO_NOT_EXPORT_TO_CHARS_BASE_10
+#if _LIBCPP_AVAILABILITY_MINIMUM_HEADER_VERSION < 15
 
 namespace __itoa {
 
+_LIBCPP_DIAGNOSTIC_PUSH
+_LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wmissing-prototypes")
+// These functions exist for ABI compatibility, so we don't ever want a declaration prior to the definition.
 _LIBCPP_EXPORTED_FROM_ABI char* __u32toa(uint32_t value, char* buffer) noexcept { return __base_10_u32(buffer, value); }
-
 _LIBCPP_EXPORTED_FROM_ABI char* __u64toa(uint64_t value, char* buffer) noexcept { return __base_10_u64(buffer, value); }
+_LIBCPP_DIAGNOSTIC_POP
 
 } // namespace __itoa
 
-#endif // _LIBCPP_ABI_DO_NOT_EXPORT_TO_CHARS_BASE_10
+#endif // _LIBCPP_AVAILABILITY_MINIMUM_HEADER_VERSION < 15
 
 // The original version of floating-point to_chars was written by Microsoft and
 // contributed with the following license.
diff --git a/lib/libcxx/src/condition_variable_destructor.cpp b/lib/libcxx/src/condition_variable_destructor.cpp
index f6ffe33685..fc4b4a601d 100644
--- a/lib/libcxx/src/condition_variable_destructor.cpp
+++ b/lib/libcxx/src/condition_variable_destructor.cpp
@@ -14,7 +14,7 @@
 #include <__config>
 #include <__thread/support.h>
 
-#if _LIBCPP_ABI_VERSION == 1 || !_LIBCPP_HAS_TRIVIAL_CONDVAR_DESTRUCTION
+#if _LIBCPP_AVAILABILITY_MINIMUM_HEADER_VERSION < 9 || !_LIBCPP_HAS_TRIVIAL_CONDVAR_DESTRUCTION
 #  define NEEDS_CONDVAR_DESTRUCTOR
 #endif
 
diff --git a/lib/libcxx/src/error_category.cpp b/lib/libcxx/src/error_category.cpp
index 8ae460fb5f..9c0ca6a04a 100644
--- a/lib/libcxx/src/error_category.cpp
+++ b/lib/libcxx/src/error_category.cpp
@@ -8,7 +8,9 @@
 
 #include <__config>
 
-#ifdef _LIBCPP_DEPRECATED_ABI_LEGACY_LIBRARY_DEFINITIONS_FOR_INLINE_FUNCTIONS
+// This has technically been removed in LLVM 3.4
+#if !defined(_LIBCPP_OBJECT_FORMAT_COFF) && !defined(_LIBCPP_OBJECT_FORMAT_XCOFF) &&                                   \
+    _LIBCPP_AVAILABILITY_MINIMUM_HEADER_VERSION < 4
 #  define _LIBCPP_ERROR_CATEGORY_DEFINE_LEGACY_INLINE_FUNCTIONS
 #endif
 
diff --git a/lib/libcxx/src/exception.cpp b/lib/libcxx/src/exception.cpp
index ac6324cd9f..9932141006 100644
--- a/lib/libcxx/src/exception.cpp
+++ b/lib/libcxx/src/exception.cpp
@@ -9,20 +9,12 @@
 #define _LIBCPP_ENABLE_CXX20_REMOVED_UNCAUGHT_EXCEPTION
 #define _LIBCPP_DISABLE_DEPRECATION_WARNINGS
 
-#include <exception>
-#include <new>
-#include <typeinfo>
-
-#if defined(LIBCXXRT) || defined(LIBCXX_BUILDING_LIBCXXABI)
-#  include <cxxabi.h>
-using namespace __cxxabiv1;
-#  define HAVE_DEPENDENT_EH_ABI 1
-#endif
+#include <__config>
 
 #if defined(_LIBCPP_ABI_MICROSOFT)
 #  include "support/runtime/exception_msvc.ipp"
 #  include "support/runtime/exception_pointer_msvc.ipp"
-#elif defined(_LIBCPPABI_VERSION)
+#elif defined(LIBCXX_BUILDING_LIBCXXABI)
 #  include "support/runtime/exception_libcxxabi.ipp"
 #  include "support/runtime/exception_pointer_cxxabi.ipp"
 #elif defined(LIBCXXRT)
diff --git a/lib/libcxx/src/experimental/time_zone.cpp b/lib/libcxx/src/experimental/time_zone.cpp
index a735800b60..2cbce14af4 100644
--- a/lib/libcxx/src/experimental/time_zone.cpp
+++ b/lib/libcxx/src/experimental/time_zone.cpp
@@ -720,7 +720,7 @@ __get_sys_info(sys_seconds __time,
 // Iff the "offsets" are the same '__current.__end' is replaced with
 // '__next.__end', which effectively merges the two objects in one object. The
 // function returns true if a merge occurred.
-[[nodiscard]] bool __merge_continuation(sys_info& __current, const sys_info& __next) {
+[[nodiscard]] static bool __merge_continuation(sys_info& __current, const sys_info& __next) {
   if (__current.end != __next.begin)
     return false;
 
diff --git a/lib/libcxx/src/experimental/tzdb.cpp b/lib/libcxx/src/experimental/tzdb.cpp
index 694faf5f23..e750810533 100644
--- a/lib/libcxx/src/experimental/tzdb.cpp
+++ b/lib/libcxx/src/experimental/tzdb.cpp
@@ -49,6 +49,8 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 namespace chrono {
 
+_LIBCPP_DIAGNOSTIC_PUSH
+_LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wmissing-prototypes")
 // This function is weak so it can be overriden in the tests. The
 // declaration is in the test header test/support/test_tzdb.h
 _LIBCPP_WEAK string_view __libcpp_tzdb_directory() {
@@ -59,6 +61,7 @@ _LIBCPP_WEAK string_view __libcpp_tzdb_directory() {
   abort();
 #endif
 }
+_LIBCPP_DIAGNOSTIC_POP
 
 //===----------------------------------------------------------------------===//
 //                           Details
@@ -767,7 +770,7 @@ void __init_tzdb(tzdb& __tzdb, __tz::__rules_storage_type& __rules) {
   // On Linux systems it seems /etc/timezone is deprecated and being phased out.
   // This file is used when /etc/localtime does not exist, or when it exists but
   // is not a symlink. For more information and links see
-  // https://github.com/llvm/llvm-project/issues/105634
+  // https://llvm.org/PR105634
 
   string __name = chrono::__current_zone_environment();
 
diff --git a/lib/libcxx/src/filesystem/error.h b/lib/libcxx/src/filesystem/error.h
index 52a18b2bec..db5d1ae9a7 100644
--- a/lib/libcxx/src/filesystem/error.h
+++ b/lib/libcxx/src/filesystem/error.h
@@ -128,17 +128,8 @@ struct ErrorHandler {
   T report(const error_code& ec, const char* msg, ...) const {
     va_list ap;
     va_start(ap, msg);
-#if _LIBCPP_HAS_EXCEPTIONS
-    try {
-#endif // _LIBCPP_HAS_EXCEPTIONS
-      report_impl(ec, msg, ap);
-#if _LIBCPP_HAS_EXCEPTIONS
-    } catch (...) {
-      va_end(ap);
-      throw;
-    }
-#endif // _LIBCPP_HAS_EXCEPTIONS
-    va_end(ap);
+    __scope_guard guard([&] { va_end(ap); });
+    report_impl(ec, msg, ap);
     return error_value<T>();
   }
 
@@ -148,17 +139,8 @@ struct ErrorHandler {
   T report(errc const& err, const char* msg, ...) const {
     va_list ap;
     va_start(ap, msg);
-#if _LIBCPP_HAS_EXCEPTIONS
-    try {
-#endif // _LIBCPP_HAS_EXCEPTIONS
-      report_impl(make_error_code(err), msg, ap);
-#if _LIBCPP_HAS_EXCEPTIONS
-    } catch (...) {
-      va_end(ap);
-      throw;
-    }
-#endif // _LIBCPP_HAS_EXCEPTIONS
-    va_end(ap);
+    __scope_guard guard([&] { va_end(ap); });
+    report_impl(make_error_code(err), msg, ap);
     return error_value<T>();
   }
 
diff --git a/lib/libcxx/src/filesystem/format_string.h b/lib/libcxx/src/filesystem/format_string.h
index ad6c57579a..8d17b027a6 100644
--- a/lib/libcxx/src/filesystem/format_string.h
+++ b/lib/libcxx/src/filesystem/format_string.h
@@ -11,6 +11,7 @@
 
 #include <__assert>
 #include <__config>
+#include <__utility/scope_guard.h>
 #include <array>
 #include <cstdarg>
 #include <cstddef>
@@ -34,20 +35,19 @@ inline _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 1, 0) string vformat_string(const ch
 
   va_list apcopy;
   va_copy(apcopy, ap);
-  int ret = ::vsnprintf(buf.data(), buf.size(), msg, apcopy);
+  int size = ::vsnprintf(buf.data(), buf.size(), msg, apcopy);
   va_end(apcopy);
 
   string result;
-  if (static_cast<size_t>(ret) < buf.size()) {
-    result.assign(buf.data(), static_cast<size_t>(ret));
+  if (static_cast<size_t>(size) < buf.size()) {
+    result.assign(buf.data(), static_cast<size_t>(size));
   } else {
     // we did not provide a long enough buffer on our first attempt. The
     // return value is the number of bytes (excluding the null byte) that are
     // needed for formatting.
-    size_t size_with_null = static_cast<size_t>(ret) + 1;
-    result.__resize_default_init(size_with_null - 1);
-    ret = ::vsnprintf(&result[0], size_with_null, msg, ap);
-    _LIBCPP_ASSERT_INTERNAL(static_cast<size_t>(ret) == (size_with_null - 1), "TODO");
+    result.resize_and_overwrite(size, [&](char* res, size_t n) { return ::vsnprintf(res, n, msg, ap); });
+    _LIBCPP_ASSERT_INTERNAL(static_cast<size_t>(size) == result.size(),
+                            "vsnprintf did not result in the same number of characters as the first attempt?");
   }
   return result;
 }
@@ -56,17 +56,8 @@ inline _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 1, 2) string format_string(const cha
   string ret;
   va_list ap;
   va_start(ap, msg);
-#if _LIBCPP_HAS_EXCEPTIONS
-  try {
-#endif // _LIBCPP_HAS_EXCEPTIONS
-    ret = detail::vformat_string(msg, ap);
-#if _LIBCPP_HAS_EXCEPTIONS
-  } catch (...) {
-    va_end(ap);
-    throw;
-  }
-#endif // _LIBCPP_HAS_EXCEPTIONS
-  va_end(ap);
+  __scope_guard guard([&] { va_end(ap); });
+  ret = detail::vformat_string(msg, ap);
   return ret;
 }
 
diff --git a/lib/libcxx/src/filesystem/int128_builtins.cpp b/lib/libcxx/src/filesystem/int128_builtins.cpp
index da6f39e7d7..e811b3e6f9 100644
--- a/lib/libcxx/src/filesystem/int128_builtins.cpp
+++ b/lib/libcxx/src/filesystem/int128_builtins.cpp
@@ -16,6 +16,8 @@
 #include <__config>
 #include <climits>
 
+_LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wmissing-prototypes") // See the FIXME above
+
 #if _LIBCPP_HAS_INT128
 
 extern "C" __attribute__((no_sanitize("undefined"))) _LIBCPP_EXPORTED_FROM_ABI __int128_t
diff --git a/lib/libcxx/src/filesystem/operations.cpp b/lib/libcxx/src/filesystem/operations.cpp
index b71f94a89d..745db87ce3 100644
--- a/lib/libcxx/src/filesystem/operations.cpp
+++ b/lib/libcxx/src/filesystem/operations.cpp
@@ -41,17 +41,10 @@
 #include <time.h>
 
 // since Linux 4.5 and FreeBSD 13, but the Linux libc wrapper is only provided by glibc >= 2.27 and musl
-#if defined(__linux__)
-#  if defined(_LIBCPP_GLIBC_PREREQ)
-#    if _LIBCPP_GLIBC_PREREQ(2, 27)
-#      define _LIBCPP_FILESYSTEM_USE_COPY_FILE_RANGE
-#    endif
-#  elif _LIBCPP_HAS_MUSL_LIBC
-#    define _LIBCPP_FILESYSTEM_USE_COPY_FILE_RANGE
-#  endif
-#elif defined(__FreeBSD__)
+#if _LIBCPP_GLIBC_PREREQ(2, 27) || _LIBCPP_HAS_MUSL_LIBC || defined(__FreeBSD__)
 #  define _LIBCPP_FILESYSTEM_USE_COPY_FILE_RANGE
 #endif
+
 #if __has_include(<sys/sendfile.h>)
 #  include <sys/sendfile.h>
 #  define _LIBCPP_FILESYSTEM_USE_SENDFILE
diff --git a/lib/libcxx/src/filesystem/path.cpp b/lib/libcxx/src/filesystem/path.cpp
index 9f7dc54fdf..400b6e8988 100644
--- a/lib/libcxx/src/filesystem/path.cpp
+++ b/lib/libcxx/src/filesystem/path.cpp
@@ -292,7 +292,9 @@ path path::lexically_relative(const path& base) const {
   // return a path constructed with 'n' dot-dot elements, followed by the
   // elements of '*this' after the mismatch.
   path Result;
-  // FIXME: Reserve enough room in Result that it won't have to re-allocate.
+  constexpr size_t ElemSize      = 2; // ".."
+  constexpr size_t SeparatorSize = 1; // separator is always a single char
+  Result.__reserve(ElemCount * (ElemSize + SeparatorSize) + SeparatorSize + PP.Path.size());
   while (ElemCount--)
     Result /= PATHSTR("..");
   for (; PP; ++PP)
diff --git a/lib/libcxx/src/include/aligned_alloc.h b/lib/libcxx/src/include/aligned_alloc.h
new file mode 100644
index 0000000000..24ca26ce04
--- /dev/null
+++ b/lib/libcxx/src/include/aligned_alloc.h
@@ -0,0 +1,65 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP_SRC_ALIGNED_ALLOC_H
+#define _LIBCPP_SRC_ALIGNED_ALLOC_H
+
+#include <__config>
+#include <cstdlib>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_HAS_LIBRARY_ALIGNED_ALLOCATION
+
+// Low-level helpers to call the aligned allocation and deallocation functions
+// on the target platform. This is used to implement libc++'s own memory
+// allocation routines -- if you need to allocate memory inside the library,
+// chances are that you want to use `__libcpp_allocate` instead.
+//
+// Returns the allocated memory, or `nullptr` on failure.
+inline _LIBCPP_HIDE_FROM_ABI void* __libcpp_aligned_alloc(std::size_t __alignment, std::size_t __size) {
+#  if defined(_LIBCPP_MSVCRT_LIKE)
+  return ::_aligned_malloc(__size, __alignment);
+
+// Android only provides aligned_alloc when targeting API 28 or higher.
+#  elif !defined(__ANDROID__) || __ANDROID_API__ >= 28
+  // aligned_alloc() requires that __size is a multiple of __alignment,
+  // but for C++ [new.delete.general], only states "if the value of an
+  // alignment argument passed to any of these functions is not a valid
+  // alignment value, the behavior is undefined".
+  // To handle calls such as ::operator new(1, std::align_val_t(128)), we
+  // round __size up to the next multiple of __alignment.
+  size_t __rounded_size = (__size + __alignment - 1) & ~(__alignment - 1);
+  // Rounding up could have wrapped around to zero, so we have to add another
+  // max() ternary to the actual call site to avoid succeeded in that case.
+  return ::aligned_alloc(__alignment, __size > __rounded_size ? __size : __rounded_size);
+#  else
+  void* __result = nullptr;
+  (void)::posix_memalign(&__result, __alignment, __size);
+  // If posix_memalign fails, __result is unmodified so we still return `nullptr`.
+  return __result;
+#  endif
+}
+
+inline _LIBCPP_HIDE_FROM_ABI void __libcpp_aligned_free(void* __ptr) {
+#  if defined(_LIBCPP_MSVCRT_LIKE)
+  ::_aligned_free(__ptr);
+#  else
+  ::free(__ptr);
+#  endif
+}
+
+#endif // _LIBCPP_HAS_LIBRARY_ALIGNED_ALLOCATION
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP_SRC_ALIGNED_ALLOC_H
diff --git a/lib/libcxx/src/include/config_elast.h b/lib/libcxx/src/include/config_elast.h
index 7edff2d937..be665a97bf 100644
--- a/lib/libcxx/src/include/config_elast.h
+++ b/lib/libcxx/src/include/config_elast.h
@@ -23,7 +23,7 @@
 #  define _LIBCPP_ELAST ELAST
 #elif defined(__LLVM_LIBC__)
 // No _LIBCPP_ELAST needed for LLVM libc
-#elif defined(_NEWLIB_VERSION)
+#elif _LIBCPP_LIBC_NEWLIB
 #  define _LIBCPP_ELAST __ELASTERROR
 #elif defined(__NuttX__)
 // No _LIBCPP_ELAST needed on NuttX
diff --git a/lib/libcxx/src/include/from_chars_floating_point.h b/lib/libcxx/src/include/from_chars_floating_point.h
index 19eeeb28fb..a493103234 100644
--- a/lib/libcxx/src/include/from_chars_floating_point.h
+++ b/lib/libcxx/src/include/from_chars_floating_point.h
@@ -9,11 +9,6 @@
 #ifndef _LIBCPP_SRC_INCLUDE_FROM_CHARS_FLOATING_POINT_H
 #define _LIBCPP_SRC_INCLUDE_FROM_CHARS_FLOATING_POINT_H
 
-// These headers are in the shared LLVM-libc header library.
-#include "shared/fp_bits.h"
-#include "shared/str_to_float.h"
-#include "shared/str_to_integer.h"
-
 #include <__assert>
 #include <__config>
 #include <cctype>
@@ -21,6 +16,15 @@
 #include <concepts>
 #include <limits>
 
+// Make sure we use libc++'s assertion machinery within the shared code we use
+// from LLVM libc.
+#define LIBC_ASSERT(cond) _LIBCPP_ASSERT((cond), _LIBCPP_TOSTRING(cond))
+
+// These headers are in the shared LLVM-libc header library.
+#include "shared/fp_bits.h"
+#include "shared/str_to_float.h"
+#include "shared/str_to_integer.h"
+
 // Included for the _Floating_type_traits class
 #include "to_chars_floating_point.h"
 
@@ -193,7 +197,7 @@ struct __exponent_result {
 // __offset, 0, false. This allows using the results unconditionally, the
 // __present is important for the scientific notation, where the value is
 // mandatory.
-__exponent_result __parse_exponent(const char* __input, size_t __n, size_t __offset, char __marker) {
+static __exponent_result __parse_exponent(const char* __input, size_t __n, size_t __offset, char __marker) {
   if (__offset + 1 < __n &&                          // an exponent always needs at least one digit.
       std::tolower(__input[__offset]) == __marker && //
       !std::isspace(__input[__offset + 1])           // leading whitespace is not allowed.
@@ -213,7 +217,7 @@ __exponent_result __parse_exponent(const char* __input, size_t __n, size_t __off
 }
 
 // Here we do this operation as int64 to avoid overflow.
-int32_t __merge_exponents(int64_t __fractional, int64_t __exponent, int __max_biased_exponent) {
+static int32_t __merge_exponents(int64_t __fractional, int64_t __exponent, int __max_biased_exponent) {
   int64_t __sum = __fractional + __exponent;
 
   if (__sum > __max_biased_exponent)
diff --git a/lib/libcxx/src/include/overridable_function.h b/lib/libcxx/src/include/overridable_function.h
index 0b43f27148..288f3efa9c 100644
--- a/lib/libcxx/src/include/overridable_function.h
+++ b/lib/libcxx/src/include/overridable_function.h
@@ -29,12 +29,12 @@
 // This is a low-level utility which does not work on all platforms, since it needs
 // to make assumptions about the object file format in use. Furthermore, it requires
 // the "base definition" of the function (the one we want to check whether it has been
-// overridden) to be defined using the _LIBCPP_OVERRIDABLE_FUNCTION macro.
+// overridden) to be defined using the OVERRIDABLE_FUNCTION macro.
 //
 // This currently works with Mach-O files (used on Darwin) and with ELF files (used on Linux
 // and others). On platforms where we know how to implement this detection, the macro
 // _LIBCPP_CAN_DETECT_OVERRIDDEN_FUNCTION is defined to 1, and it is defined to 0 on
-// other platforms. The _LIBCPP_OVERRIDABLE_FUNCTION macro is defined to perform a normal
+// other platforms. The OVERRIDABLE_FUNCTION macro is defined to perform a normal
 // function definition on unsupported platforms so that it can be used to define functions
 // regardless of whether detection is actually supported.
 //
@@ -44,7 +44,7 @@
 // Let's say we want to check whether a weak function `f` has been overridden by the user.
 // The general mechanism works by placing `f`'s definition (in the libc++ built library)
 // inside a special section, which we do using the `__section__` attribute via the
-// _LIBCPP_OVERRIDABLE_FUNCTION macro.
+// OVERRIDABLE_FUNCTION macro.
 //
 // Then, when comes the time to check whether the function has been overridden, we take
 // the address of the function and we check whether it falls inside the special function
@@ -66,11 +66,10 @@
 #if defined(_LIBCPP_OBJECT_FORMAT_MACHO)
 
 #  define _LIBCPP_CAN_DETECT_OVERRIDDEN_FUNCTION 1
-#  define _LIBCPP_OVERRIDABLE_FUNCTION(type, name, arglist)                                                            \
-    __attribute__((__section__("__TEXT,__lcxx_override,regular,pure_instructions"))) _LIBCPP_WEAK type name arglist
+#  define OVERRIDABLE_FUNCTION                                                                                         \
+    __attribute__((__section__("__TEXT,__lcxx_override,regular,pure_instructions"))) _LIBCPP_WEAK
 
-_LIBCPP_BEGIN_NAMESPACE_STD
-template <typename T, T* _Func>
+_LIBCPP_BEGIN_NAMESPACE_STD template <typename T, T* _Func>
 _LIBCPP_HIDE_FROM_ABI inline bool __is_function_overridden() noexcept {
   // Declare two dummy bytes and give them these special `__asm` values. These values are
   // defined by the linker, which means that referring to `&__lcxx_override_start` will
@@ -100,8 +99,7 @@ _LIBCPP_END_NAMESPACE_STD
 #elif defined(_LIBCPP_OBJECT_FORMAT_ELF) && !defined(__NVPTX__)
 
 #  define _LIBCPP_CAN_DETECT_OVERRIDDEN_FUNCTION 1
-#  define _LIBCPP_OVERRIDABLE_FUNCTION(type, name, arglist)                                                            \
-    __attribute__((__section__("__lcxx_override"))) _LIBCPP_WEAK type name arglist
+#  define OVERRIDABLE_FUNCTION __attribute__((__section__("__lcxx_override"))) _LIBCPP_WEAK
 
 // This is very similar to what we do for Mach-O above. The ELF linker will implicitly define
 // variables with those names corresponding to the start and the end of the section.
@@ -129,7 +127,7 @@ _LIBCPP_END_NAMESPACE_STD
 #else
 
 #  define _LIBCPP_CAN_DETECT_OVERRIDDEN_FUNCTION 0
-#  define _LIBCPP_OVERRIDABLE_FUNCTION(type, name, arglist) _LIBCPP_WEAK type name arglist
+#  define OVERRIDABLE_FUNCTION _LIBCPP_WEAK
 
 #endif
 
diff --git a/lib/libcxx/src/iostream.cpp b/lib/libcxx/src/iostream.cpp
index 416725235c..b216c6ad35 100644
--- a/lib/libcxx/src/iostream.cpp
+++ b/lib/libcxx/src/iostream.cpp
@@ -16,24 +16,40 @@
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
+// This file implements the various stream objects provided inside <iostream>. We're doing some ODR violations in here,
+// so this quite fragile. Specifically, the size of the stream objects (i.e. cout, cin etc.) needs to stay the same.
+// For that reason, we have `stream` and `stream_data` separated into two objects. The public `stream` objects only
+// contain the actual stream, while the private `stream_data` objects contains the `basic_streambuf` we're using as well
+// as the mbstate_t. `stream_data` objects are only accessible within the library, so they aren't ABI sensitive and we
+// can change them as we want.
+
+template <class StreamT>
+union stream {
+  constexpr stream() {}
+  stream(const stream&)            = delete;
+  stream& operator=(const stream&) = delete;
+  constexpr ~stream() {}
+
+  StreamT value;
+};
+
 template <class StreamT, class BufferT>
 union stream_data {
   constexpr stream_data() {}
   constexpr ~stream_data() {}
   struct {
-    // The stream has to be the first element, since that's referenced by the stream declarations in <iostream>
-    StreamT stream;
     BufferT buffer;
     mbstate_t mb;
   };
-
-  void init(FILE* stdstream) {
-    mb = {};
-    std::construct_at(&buffer, stdstream, &mb);
-    std::construct_at(&stream, &buffer);
-  }
 };
 
+template <class StreamT, class BufferT>
+void init_stream(FILE* stdstream, stream<StreamT>& stream, stream_data<StreamT, BufferT>& data) {
+  data.mb = {};
+  std::construct_at(&data.buffer, stdstream, &data.mb);
+  std::construct_at(&stream.value, &data.buffer);
+}
+
 #define CHAR_MANGLING_char "D"
 #define CHAR_MANGLING_wchar_t "_W"
 #define CHAR_MANGLING(CharT) CHAR_MANGLING_##CharT
@@ -46,25 +62,28 @@ union stream_data {
 
 #ifdef _LIBCPP_ABI_MICROSOFT
 #  define STREAM(StreamT, BufferT, CharT, var)                                                                         \
-    STRING_DATA_CONSTINIT stream_data<StreamT<CharT>, BufferT<CharT>> var __asm__(                                     \
+    STRING_DATA_CONSTINIT stream_data<StreamT<CharT>, BufferT<CharT>> var##_data;                                      \
+    _LIBCPP_EXPORTED_FROM_ABI STRING_DATA_CONSTINIT stream<StreamT<CharT>> var __asm__(                                \
         "?" #var "@" ABI_NAMESPACE_STR "@std@@3V?$" #StreamT                                                           \
         "@" CHAR_MANGLING(CharT) "U?$char_traits@" CHAR_MANGLING(CharT) "@" ABI_NAMESPACE_STR "@std@@@12@A")
 #else
-#  define STREAM(StreamT, BufferT, CharT, var) STRING_DATA_CONSTINIT stream_data<StreamT<CharT>, BufferT<CharT>> var
+#  define STREAM(StreamT, BufferT, CharT, var)                                                                         \
+    STRING_DATA_CONSTINIT stream_data<StreamT<CharT>, BufferT<CharT>> var##_data;                                      \
+    _LIBCPP_EXPORTED_FROM_ABI STRING_DATA_CONSTINIT stream<StreamT<CharT>> var
 #endif
 
 // These definitions and the declarations in <iostream> technically cause ODR violations, since they have different
 // types (stream_data and {i,o}stream respectively). This means that <iostream> should never be included in this TU.
 
-_LIBCPP_EXPORTED_FROM_ABI STREAM(basic_istream, __stdinbuf, char, cin);
-_LIBCPP_EXPORTED_FROM_ABI STREAM(basic_ostream, __stdoutbuf, char, cout);
-_LIBCPP_EXPORTED_FROM_ABI STREAM(basic_ostream, __stdoutbuf, char, cerr);
-_LIBCPP_EXPORTED_FROM_ABI STREAM(basic_ostream, __stdoutbuf, char, clog);
+STREAM(basic_istream, __stdinbuf, char, cin);
+STREAM(basic_ostream, __stdoutbuf, char, cout);
+STREAM(basic_ostream, __stdoutbuf, char, cerr);
+STREAM(basic_ostream, __stdoutbuf, char, clog);
 #if _LIBCPP_HAS_WIDE_CHARACTERS
-_LIBCPP_EXPORTED_FROM_ABI STREAM(basic_istream, __stdinbuf, wchar_t, wcin);
-_LIBCPP_EXPORTED_FROM_ABI STREAM(basic_ostream, __stdoutbuf, wchar_t, wcout);
-_LIBCPP_EXPORTED_FROM_ABI STREAM(basic_ostream, __stdoutbuf, wchar_t, wcerr);
-_LIBCPP_EXPORTED_FROM_ABI STREAM(basic_ostream, __stdoutbuf, wchar_t, wclog);
+STREAM(basic_istream, __stdinbuf, wchar_t, wcin);
+STREAM(basic_ostream, __stdoutbuf, wchar_t, wcout);
+STREAM(basic_ostream, __stdoutbuf, wchar_t, wcerr);
+STREAM(basic_ostream, __stdoutbuf, wchar_t, wclog);
 #endif // _LIBCPP_HAS_WIDE_CHARACTERS
 
 // Pretend we're inside a system header so the compiler doesn't flag the use of the init_priority
@@ -98,34 +117,34 @@ public:
 DoIOSInit::DoIOSInit() {
   force_locale_initialization();
 
-  cin.init(stdin);
-  cout.init(stdout);
-  cerr.init(stderr);
-  clog.init(stderr);
+  init_stream(stdin, cin, cin_data);
+  init_stream(stdout, cout, cout_data);
+  init_stream(stderr, cerr, cerr_data);
+  init_stream(stderr, clog, clog_data);
 
-  cin.stream.tie(&cout.stream);
-  std::unitbuf(cerr.stream);
-  cerr.stream.tie(&cout.stream);
+  cin.value.tie(&cout.value);
+  std::unitbuf(cerr.value);
+  cerr.value.tie(&cout.value);
 
 #if _LIBCPP_HAS_WIDE_CHARACTERS
-  wcin.init(stdin);
-  wcout.init(stdout);
-  wcerr.init(stderr);
-  wclog.init(stderr);
+  init_stream(stdin, wcin, wcin_data);
+  init_stream(stdout, wcout, wcout_data);
+  init_stream(stderr, wcerr, wcerr_data);
+  init_stream(stderr, wclog, wclog_data);
 
-  wcin.stream.tie(&wcout.stream);
-  std::unitbuf(wcerr.stream);
-  wcerr.stream.tie(&wcout.stream);
+  wcin.value.tie(&wcout.value);
+  std::unitbuf(wcerr.value);
+  wcerr.value.tie(&wcout.value);
 #endif
 }
 
 DoIOSInit::~DoIOSInit() {
-  cout.stream.flush();
-  clog.stream.flush();
+  cout.value.flush();
+  clog.value.flush();
 
 #if _LIBCPP_HAS_WIDE_CHARACTERS
-  wcout.stream.flush();
-  wclog.stream.flush();
+  wcout.value.flush();
+  wclog.value.flush();
 #endif
 }
 
diff --git a/lib/libcxx/src/locale.cpp b/lib/libcxx/src/locale.cpp
index da735865c3..6be0537735 100644
--- a/lib/libcxx/src/locale.cpp
+++ b/lib/libcxx/src/locale.cpp
@@ -60,9 +60,8 @@ struct __libcpp_unique_locale {
 
   __locale::__locale_t __loc_;
 
-private:
-  __libcpp_unique_locale(__libcpp_unique_locale const&);
-  __libcpp_unique_locale& operator=(__libcpp_unique_locale const&);
+  __libcpp_unique_locale(__libcpp_unique_locale const&) = delete;
+  __libcpp_unique_locale& operator=(__libcpp_unique_locale const&) = delete;
 };
 
 #ifdef __cloc_defined
@@ -88,16 +87,6 @@ T& make(Args... args) {
   return *obj;
 }
 
-template <typename T, size_t N>
-inline constexpr size_t countof(const T (&)[N]) {
-  return N;
-}
-
-template <typename T>
-inline constexpr size_t countof(const T* const begin, const T* const end) {
-  return static_cast<size_t>(end - begin);
-}
-
 string build_name(const string& other, const string& one, locale::category c) {
   if (other == "*" || one == "*")
     return "*";
@@ -215,63 +204,58 @@ locale::__imp::__imp(size_t refs) : facet(refs), facets_(N), name_("C") {
 }
 
 locale::__imp::__imp(const string& name, size_t refs) : facet(refs), facets_(N), name_(name) {
-#if _LIBCPP_HAS_EXCEPTIONS
-  try {
-#endif // _LIBCPP_HAS_EXCEPTIONS
-    facets_ = locale::classic().__locale_->facets_;
-    for (unsigned i = 0; i < facets_.size(); ++i)
-      if (facets_[i])
-        facets_[i]->__add_shared();
-    install(new collate_byname<char>(name_));
-#if _LIBCPP_HAS_WIDE_CHARACTERS
-    install(new collate_byname<wchar_t>(name_));
-#endif
-    install(new ctype_byname<char>(name_));
-#if _LIBCPP_HAS_WIDE_CHARACTERS
-    install(new ctype_byname<wchar_t>(name_));
-#endif
-    install(new codecvt_byname<char, char, mbstate_t>(name_));
-#if _LIBCPP_HAS_WIDE_CHARACTERS
-    install(new codecvt_byname<wchar_t, char, mbstate_t>(name_));
-#endif
-    _LIBCPP_SUPPRESS_DEPRECATED_PUSH
-    install(new codecvt_byname<char16_t, char, mbstate_t>(name_));
-    install(new codecvt_byname<char32_t, char, mbstate_t>(name_));
-    _LIBCPP_SUPPRESS_DEPRECATED_POP
-#if _LIBCPP_HAS_CHAR8_T
-    install(new codecvt_byname<char16_t, char8_t, mbstate_t>(name_));
-    install(new codecvt_byname<char32_t, char8_t, mbstate_t>(name_));
-#endif
-    install(new numpunct_byname<char>(name_));
-#if _LIBCPP_HAS_WIDE_CHARACTERS
-    install(new numpunct_byname<wchar_t>(name_));
-#endif
-    install(new moneypunct_byname<char, false>(name_));
-    install(new moneypunct_byname<char, true>(name_));
-#if _LIBCPP_HAS_WIDE_CHARACTERS
-    install(new moneypunct_byname<wchar_t, false>(name_));
-    install(new moneypunct_byname<wchar_t, true>(name_));
-#endif
-    install(new time_get_byname<char>(name_));
-#if _LIBCPP_HAS_WIDE_CHARACTERS
-    install(new time_get_byname<wchar_t>(name_));
-#endif
-    install(new time_put_byname<char>(name_));
-#if _LIBCPP_HAS_WIDE_CHARACTERS
-    install(new time_put_byname<wchar_t>(name_));
-#endif
-    install(new messages_byname<char>(name_));
-#if _LIBCPP_HAS_WIDE_CHARACTERS
-    install(new messages_byname<wchar_t>(name_));
-#endif
-#if _LIBCPP_HAS_EXCEPTIONS
-  } catch (...) {
+  __exception_guard guard([&] {
     for (unsigned i = 0; i < facets_.size(); ++i)
       if (facets_[i])
         facets_[i]->__release_shared();
-    throw;
-  }
-#endif // _LIBCPP_HAS_EXCEPTIONS
+  });
+  facets_ = locale::classic().__locale_->facets_;
+  for (unsigned i = 0; i < facets_.size(); ++i)
+    if (facets_[i])
+      facets_[i]->__add_shared();
+  install(new collate_byname<char>(name_));
+#if _LIBCPP_HAS_WIDE_CHARACTERS
+  install(new collate_byname<wchar_t>(name_));
+#endif
+  install(new ctype_byname<char>(name_));
+#if _LIBCPP_HAS_WIDE_CHARACTERS
+  install(new ctype_byname<wchar_t>(name_));
+#endif
+  install(new codecvt_byname<char, char, mbstate_t>(name_));
+#if _LIBCPP_HAS_WIDE_CHARACTERS
+  install(new codecvt_byname<wchar_t, char, mbstate_t>(name_));
+#endif
+  _LIBCPP_SUPPRESS_DEPRECATED_PUSH
+  install(new codecvt_byname<char16_t, char, mbstate_t>(name_));
+  install(new codecvt_byname<char32_t, char, mbstate_t>(name_));
+  _LIBCPP_SUPPRESS_DEPRECATED_POP
+#if _LIBCPP_HAS_CHAR8_T
+  install(new codecvt_byname<char16_t, char8_t, mbstate_t>(name_));
+  install(new codecvt_byname<char32_t, char8_t, mbstate_t>(name_));
+#endif
+  install(new numpunct_byname<char>(name_));
+#if _LIBCPP_HAS_WIDE_CHARACTERS
+  install(new numpunct_byname<wchar_t>(name_));
+#endif
+  install(new moneypunct_byname<char, false>(name_));
+  install(new moneypunct_byname<char, true>(name_));
+#if _LIBCPP_HAS_WIDE_CHARACTERS
+  install(new moneypunct_byname<wchar_t, false>(name_));
+  install(new moneypunct_byname<wchar_t, true>(name_));
+#endif
+  install(new time_get_byname<char>(name_));
+#if _LIBCPP_HAS_WIDE_CHARACTERS
+  install(new time_get_byname<wchar_t>(name_));
+#endif
+  install(new time_put_byname<char>(name_));
+#if _LIBCPP_HAS_WIDE_CHARACTERS
+  install(new time_put_byname<wchar_t>(name_));
+#endif
+  install(new messages_byname<char>(name_));
+#if _LIBCPP_HAS_WIDE_CHARACTERS
+  install(new messages_byname<wchar_t>(name_));
+#endif
+  guard.__complete();
 }
 
 locale::__imp::__imp(const __imp& other) : facets_(max<size_t>(N, other.facets_.size())), name_(other.name_) {
@@ -287,71 +271,66 @@ locale::__imp::__imp(const __imp& other, const string& name, locale::category c)
   for (unsigned i = 0; i < facets_.size(); ++i)
     if (facets_[i])
       facets_[i]->__add_shared();
-#if _LIBCPP_HAS_EXCEPTIONS
-  try {
-#endif // _LIBCPP_HAS_EXCEPTIONS
-    if (c & locale::collate) {
-      install(new collate_byname<char>(name));
-#if _LIBCPP_HAS_WIDE_CHARACTERS
-      install(new collate_byname<wchar_t>(name));
-#endif
-    }
-    if (c & locale::ctype) {
-      install(new ctype_byname<char>(name));
-#if _LIBCPP_HAS_WIDE_CHARACTERS
-      install(new ctype_byname<wchar_t>(name));
-#endif
-      install(new codecvt_byname<char, char, mbstate_t>(name));
-#if _LIBCPP_HAS_WIDE_CHARACTERS
-      install(new codecvt_byname<wchar_t, char, mbstate_t>(name));
-#endif
-      _LIBCPP_SUPPRESS_DEPRECATED_PUSH
-      install(new codecvt_byname<char16_t, char, mbstate_t>(name));
-      install(new codecvt_byname<char32_t, char, mbstate_t>(name));
-      _LIBCPP_SUPPRESS_DEPRECATED_POP
-#if _LIBCPP_HAS_CHAR8_T
-      install(new codecvt_byname<char16_t, char8_t, mbstate_t>(name));
-      install(new codecvt_byname<char32_t, char8_t, mbstate_t>(name));
-#endif
-    }
-    if (c & locale::monetary) {
-      install(new moneypunct_byname<char, false>(name));
-      install(new moneypunct_byname<char, true>(name));
-#if _LIBCPP_HAS_WIDE_CHARACTERS
-      install(new moneypunct_byname<wchar_t, false>(name));
-      install(new moneypunct_byname<wchar_t, true>(name));
-#endif
-    }
-    if (c & locale::numeric) {
-      install(new numpunct_byname<char>(name));
-#if _LIBCPP_HAS_WIDE_CHARACTERS
-      install(new numpunct_byname<wchar_t>(name));
-#endif
-    }
-    if (c & locale::time) {
-      install(new time_get_byname<char>(name));
-#if _LIBCPP_HAS_WIDE_CHARACTERS
-      install(new time_get_byname<wchar_t>(name));
-#endif
-      install(new time_put_byname<char>(name));
-#if _LIBCPP_HAS_WIDE_CHARACTERS
-      install(new time_put_byname<wchar_t>(name));
-#endif
-    }
-    if (c & locale::messages) {
-      install(new messages_byname<char>(name));
-#if _LIBCPP_HAS_WIDE_CHARACTERS
-      install(new messages_byname<wchar_t>(name));
-#endif
-    }
-#if _LIBCPP_HAS_EXCEPTIONS
-  } catch (...) {
+  __exception_guard guard([&] {
     for (unsigned i = 0; i < facets_.size(); ++i)
       if (facets_[i])
         facets_[i]->__release_shared();
-    throw;
+  });
+  if (c & locale::collate) {
+    install(new collate_byname<char>(name));
+#if _LIBCPP_HAS_WIDE_CHARACTERS
+    install(new collate_byname<wchar_t>(name));
+#endif
   }
-#endif // _LIBCPP_HAS_EXCEPTIONS
+  if (c & locale::ctype) {
+    install(new ctype_byname<char>(name));
+#if _LIBCPP_HAS_WIDE_CHARACTERS
+    install(new ctype_byname<wchar_t>(name));
+#endif
+    install(new codecvt_byname<char, char, mbstate_t>(name));
+#if _LIBCPP_HAS_WIDE_CHARACTERS
+    install(new codecvt_byname<wchar_t, char, mbstate_t>(name));
+#endif
+    _LIBCPP_SUPPRESS_DEPRECATED_PUSH
+    install(new codecvt_byname<char16_t, char, mbstate_t>(name));
+    install(new codecvt_byname<char32_t, char, mbstate_t>(name));
+    _LIBCPP_SUPPRESS_DEPRECATED_POP
+#if _LIBCPP_HAS_CHAR8_T
+    install(new codecvt_byname<char16_t, char8_t, mbstate_t>(name));
+    install(new codecvt_byname<char32_t, char8_t, mbstate_t>(name));
+#endif
+  }
+  if (c & locale::monetary) {
+    install(new moneypunct_byname<char, false>(name));
+    install(new moneypunct_byname<char, true>(name));
+#if _LIBCPP_HAS_WIDE_CHARACTERS
+    install(new moneypunct_byname<wchar_t, false>(name));
+    install(new moneypunct_byname<wchar_t, true>(name));
+#endif
+  }
+  if (c & locale::numeric) {
+    install(new numpunct_byname<char>(name));
+#if _LIBCPP_HAS_WIDE_CHARACTERS
+    install(new numpunct_byname<wchar_t>(name));
+#endif
+  }
+  if (c & locale::time) {
+    install(new time_get_byname<char>(name));
+#if _LIBCPP_HAS_WIDE_CHARACTERS
+    install(new time_get_byname<wchar_t>(name));
+#endif
+    install(new time_put_byname<char>(name));
+#if _LIBCPP_HAS_WIDE_CHARACTERS
+    install(new time_put_byname<wchar_t>(name));
+#endif
+  }
+  if (c & locale::messages) {
+    install(new messages_byname<char>(name));
+#if _LIBCPP_HAS_WIDE_CHARACTERS
+    install(new messages_byname<wchar_t>(name));
+#endif
+  }
+  guard.__complete();
 }
 
 template <class F>
@@ -366,87 +345,83 @@ locale::__imp::__imp(const __imp& other, const __imp& one, locale::category c)
   for (unsigned i = 0; i < facets_.size(); ++i)
     if (facets_[i])
       facets_[i]->__add_shared();
-#if _LIBCPP_HAS_EXCEPTIONS
-  try {
-#endif // _LIBCPP_HAS_EXCEPTIONS
-    if (c & locale::collate) {
-      install_from<std::collate<char> >(one);
-#if _LIBCPP_HAS_WIDE_CHARACTERS
-      install_from<std::collate<wchar_t> >(one);
-#endif
-    }
-    if (c & locale::ctype) {
-      install_from<std::ctype<char> >(one);
-#if _LIBCPP_HAS_WIDE_CHARACTERS
-      install_from<std::ctype<wchar_t> >(one);
-#endif
-      install_from<std::codecvt<char, char, mbstate_t> >(one);
-      _LIBCPP_SUPPRESS_DEPRECATED_PUSH
-      install_from<std::codecvt<char16_t, char, mbstate_t> >(one);
-      install_from<std::codecvt<char32_t, char, mbstate_t> >(one);
-      _LIBCPP_SUPPRESS_DEPRECATED_POP
-#if _LIBCPP_HAS_CHAR8_T
-      install_from<std::codecvt<char16_t, char8_t, mbstate_t> >(one);
-      install_from<std::codecvt<char32_t, char8_t, mbstate_t> >(one);
-#endif
-#if _LIBCPP_HAS_WIDE_CHARACTERS
-      install_from<std::codecvt<wchar_t, char, mbstate_t> >(one);
-#endif
-    }
-    if (c & locale::monetary) {
-      install_from<moneypunct<char, false> >(one);
-      install_from<moneypunct<char, true> >(one);
-#if _LIBCPP_HAS_WIDE_CHARACTERS
-      install_from<moneypunct<wchar_t, false> >(one);
-      install_from<moneypunct<wchar_t, true> >(one);
-#endif
-      install_from<money_get<char> >(one);
-#if _LIBCPP_HAS_WIDE_CHARACTERS
-      install_from<money_get<wchar_t> >(one);
-#endif
-      install_from<money_put<char> >(one);
-#if _LIBCPP_HAS_WIDE_CHARACTERS
-      install_from<money_put<wchar_t> >(one);
-#endif
-    }
-    if (c & locale::numeric) {
-      install_from<numpunct<char> >(one);
-#if _LIBCPP_HAS_WIDE_CHARACTERS
-      install_from<numpunct<wchar_t> >(one);
-#endif
-      install_from<num_get<char> >(one);
-#if _LIBCPP_HAS_WIDE_CHARACTERS
-      install_from<num_get<wchar_t> >(one);
-#endif
-      install_from<num_put<char> >(one);
-#if _LIBCPP_HAS_WIDE_CHARACTERS
-      install_from<num_put<wchar_t> >(one);
-#endif
-    }
-    if (c & locale::time) {
-      install_from<time_get<char> >(one);
-#if _LIBCPP_HAS_WIDE_CHARACTERS
-      install_from<time_get<wchar_t> >(one);
-#endif
-      install_from<time_put<char> >(one);
-#if _LIBCPP_HAS_WIDE_CHARACTERS
-      install_from<time_put<wchar_t> >(one);
-#endif
-    }
-    if (c & locale::messages) {
-      install_from<std::messages<char> >(one);
-#if _LIBCPP_HAS_WIDE_CHARACTERS
-      install_from<std::messages<wchar_t> >(one);
-#endif
-    }
-#if _LIBCPP_HAS_EXCEPTIONS
-  } catch (...) {
+  __exception_guard guard([&] {
     for (unsigned i = 0; i < facets_.size(); ++i)
       if (facets_[i])
         facets_[i]->__release_shared();
-    throw;
+  });
+
+  if (c & locale::collate) {
+    install_from<std::collate<char> >(one);
+#if _LIBCPP_HAS_WIDE_CHARACTERS
+    install_from<std::collate<wchar_t> >(one);
+#endif
   }
-#endif // _LIBCPP_HAS_EXCEPTIONS
+  if (c & locale::ctype) {
+    install_from<std::ctype<char> >(one);
+#if _LIBCPP_HAS_WIDE_CHARACTERS
+    install_from<std::ctype<wchar_t> >(one);
+#endif
+    install_from<std::codecvt<char, char, mbstate_t> >(one);
+    _LIBCPP_SUPPRESS_DEPRECATED_PUSH
+    install_from<std::codecvt<char16_t, char, mbstate_t> >(one);
+    install_from<std::codecvt<char32_t, char, mbstate_t> >(one);
+    _LIBCPP_SUPPRESS_DEPRECATED_POP
+#if _LIBCPP_HAS_CHAR8_T
+    install_from<std::codecvt<char16_t, char8_t, mbstate_t> >(one);
+    install_from<std::codecvt<char32_t, char8_t, mbstate_t> >(one);
+#endif
+#if _LIBCPP_HAS_WIDE_CHARACTERS
+    install_from<std::codecvt<wchar_t, char, mbstate_t> >(one);
+#endif
+  }
+  if (c & locale::monetary) {
+    install_from<moneypunct<char, false> >(one);
+    install_from<moneypunct<char, true> >(one);
+#if _LIBCPP_HAS_WIDE_CHARACTERS
+    install_from<moneypunct<wchar_t, false> >(one);
+    install_from<moneypunct<wchar_t, true> >(one);
+#endif
+    install_from<money_get<char> >(one);
+#if _LIBCPP_HAS_WIDE_CHARACTERS
+    install_from<money_get<wchar_t> >(one);
+#endif
+    install_from<money_put<char> >(one);
+#if _LIBCPP_HAS_WIDE_CHARACTERS
+    install_from<money_put<wchar_t> >(one);
+#endif
+  }
+  if (c & locale::numeric) {
+    install_from<numpunct<char> >(one);
+#if _LIBCPP_HAS_WIDE_CHARACTERS
+    install_from<numpunct<wchar_t> >(one);
+#endif
+    install_from<num_get<char> >(one);
+#if _LIBCPP_HAS_WIDE_CHARACTERS
+    install_from<num_get<wchar_t> >(one);
+#endif
+    install_from<num_put<char> >(one);
+#if _LIBCPP_HAS_WIDE_CHARACTERS
+    install_from<num_put<wchar_t> >(one);
+#endif
+  }
+  if (c & locale::time) {
+    install_from<time_get<char> >(one);
+#if _LIBCPP_HAS_WIDE_CHARACTERS
+    install_from<time_get<wchar_t> >(one);
+#endif
+    install_from<time_put<char> >(one);
+#if _LIBCPP_HAS_WIDE_CHARACTERS
+    install_from<time_put<wchar_t> >(one);
+#endif
+  }
+  if (c & locale::messages) {
+    install_from<std::messages<char> >(one);
+#if _LIBCPP_HAS_WIDE_CHARACTERS
+    install_from<std::messages<wchar_t> >(one);
+#endif
+  }
+  guard.__complete();
 }
 
 locale::__imp::__imp(const __imp& other, facet* f, long id)
@@ -933,7 +908,7 @@ const ctype<char>::mask* ctype<char>::classic_table() noexcept {
   return __pctype_func();
 #  elif defined(__EMSCRIPTEN__)
   return *__ctype_b_loc();
-#  elif defined(_NEWLIB_VERSION)
+#  elif _LIBCPP_LIBC_NEWLIB
   // Newlib has a 257-entry table in ctype_.c, where (char)0 starts at [1].
   return _ctype_ + 1;
 #  elif defined(_AIX)
@@ -3961,13 +3936,7 @@ wstring numpunct<wchar_t>::do_falsename() const { return L"false"; }
 
 // numpunct_byname<char>
 
-numpunct_byname<char>::numpunct_byname(const char* nm, size_t refs) : numpunct<char>(refs) { __init(nm); }
-
-numpunct_byname<char>::numpunct_byname(const string& nm, size_t refs) : numpunct<char>(refs) { __init(nm.c_str()); }
-
-numpunct_byname<char>::~numpunct_byname() {}
-
-void numpunct_byname<char>::__init(const char* nm) {
+numpunct_byname<char>::numpunct_byname(const char* nm, size_t refs) : numpunct<char>(refs) {
   typedef numpunct<char> base;
   if (strcmp(nm, "C") != 0) {
     __libcpp_unique_locale loc(nm);
@@ -3988,18 +3957,14 @@ void numpunct_byname<char>::__init(const char* nm) {
   }
 }
 
+numpunct_byname<char>::numpunct_byname(const string& nm, size_t refs) : numpunct_byname<char>(nm.c_str(), refs) {}
+
+numpunct_byname<char>::~numpunct_byname() {}
+
 // numpunct_byname<wchar_t>
 
 #if _LIBCPP_HAS_WIDE_CHARACTERS
-numpunct_byname<wchar_t>::numpunct_byname(const char* nm, size_t refs) : numpunct<wchar_t>(refs) { __init(nm); }
-
-numpunct_byname<wchar_t>::numpunct_byname(const string& nm, size_t refs) : numpunct<wchar_t>(refs) {
-  __init(nm.c_str());
-}
-
-numpunct_byname<wchar_t>::~numpunct_byname() {}
-
-void numpunct_byname<wchar_t>::__init(const char* nm) {
+numpunct_byname<wchar_t>::numpunct_byname(const char* nm, size_t refs) : numpunct<wchar_t>(refs) {
   if (strcmp(nm, "C") != 0) {
     __libcpp_unique_locale loc(nm);
     if (!loc)
@@ -4016,6 +3981,10 @@ void numpunct_byname<wchar_t>::__init(const char* nm) {
     // localization for truename and falsename is not available
   }
 }
+
+numpunct_byname<wchar_t>::numpunct_byname(const string& nm, size_t refs) : numpunct_byname<wchar_t>(nm.c_str(), refs) {}
+
+numpunct_byname<wchar_t>::~numpunct_byname() {}
 #endif // _LIBCPP_HAS_WIDE_CHARACTERS
 
 // num_get helpers
@@ -4383,7 +4352,7 @@ string __time_get_storage<char>::__analyze(char fmt, const ctype<char>& ct) {
   char f[3] = {0};
   f[0]      = '%';
   f[1]      = fmt;
-  size_t n  = __locale::__strftime(buf, countof(buf), f, &t, __loc_);
+  size_t n  = __locale::__strftime(buf, std::size(buf), f, &t, __loc_);
   char* bb  = buf;
   char* be  = buf + n;
   string result;
@@ -4514,12 +4483,12 @@ wstring __time_get_storage<wchar_t>::__analyze(char fmt, const ctype<wchar_t>& c
   char f[3] = {0};
   f[0]      = '%';
   f[1]      = fmt;
-  __locale::__strftime(buf, countof(buf), f, &t, __loc_);
+  __locale::__strftime(buf, std::size(buf), f, &t, __loc_);
   wchar_t wbuf[100];
   wchar_t* wbb   = wbuf;
   mbstate_t mb   = {0};
   const char* bb = buf;
-  size_t j       = __locale::__mbsrtowcs(wbb, &bb, countof(wbuf), &mb, __loc_);
+  size_t j       = __locale::__mbsrtowcs(wbb, &bb, std::size(wbuf), &mb, __loc_);
   if (j == size_t(-1))
     std::__throw_runtime_error("locale not supported");
   wchar_t* wbe = wbb + j;
@@ -4640,25 +4609,25 @@ void __time_get_storage<char>::init(const ctype<char>& ct) {
   // __weeks_
   for (int i = 0; i < 7; ++i) {
     t.tm_wday = i;
-    __locale::__strftime(buf, countof(buf), "%A", &t, __loc_);
+    __locale::__strftime(buf, std::size(buf), "%A", &t, __loc_);
     __weeks_[i] = buf;
-    __locale::__strftime(buf, countof(buf), "%a", &t, __loc_);
+    __locale::__strftime(buf, std::size(buf), "%a", &t, __loc_);
     __weeks_[i + 7] = buf;
   }
   // __months_
   for (int i = 0; i < 12; ++i) {
     t.tm_mon = i;
-    __locale::__strftime(buf, countof(buf), "%B", &t, __loc_);
+    __locale::__strftime(buf, std::size(buf), "%B", &t, __loc_);
     __months_[i] = buf;
-    __locale::__strftime(buf, countof(buf), "%b", &t, __loc_);
+    __locale::__strftime(buf, std::size(buf), "%b", &t, __loc_);
     __months_[i + 12] = buf;
   }
   // __am_pm_
   t.tm_hour = 1;
-  __locale::__strftime(buf, countof(buf), "%p", &t, __loc_);
+  __locale::__strftime(buf, std::size(buf), "%p", &t, __loc_);
   __am_pm_[0] = buf;
   t.tm_hour   = 13;
-  __locale::__strftime(buf, countof(buf), "%p", &t, __loc_);
+  __locale::__strftime(buf, std::size(buf), "%p", &t, __loc_);
   __am_pm_[1] = buf;
   __c_        = __analyze('c', ct);
   __r_        = __analyze('r', ct);
@@ -4677,18 +4646,18 @@ void __time_get_storage<wchar_t>::init(const ctype<wchar_t>& ct) {
   // __weeks_
   for (int i = 0; i < 7; ++i) {
     t.tm_wday = i;
-    __locale::__strftime(buf, countof(buf), "%A", &t, __loc_);
+    __locale::__strftime(buf, std::size(buf), "%A", &t, __loc_);
     mb             = mbstate_t();
     const char* bb = buf;
-    size_t j       = __locale::__mbsrtowcs(wbuf, &bb, countof(wbuf), &mb, __loc_);
+    size_t j       = __locale::__mbsrtowcs(wbuf, &bb, std::size(wbuf), &mb, __loc_);
     if (j == size_t(-1) || j == 0)
       std::__throw_runtime_error("locale not supported");
     wbe = wbuf + j;
     __weeks_[i].assign(wbuf, wbe);
-    __locale::__strftime(buf, countof(buf), "%a", &t, __loc_);
+    __locale::__strftime(buf, std::size(buf), "%a", &t, __loc_);
     mb = mbstate_t();
     bb = buf;
-    j  = __locale::__mbsrtowcs(wbuf, &bb, countof(wbuf), &mb, __loc_);
+    j  = __locale::__mbsrtowcs(wbuf, &bb, std::size(wbuf), &mb, __loc_);
     if (j == size_t(-1) || j == 0)
       std::__throw_runtime_error("locale not supported");
     wbe = wbuf + j;
@@ -4697,18 +4666,18 @@ void __time_get_storage<wchar_t>::init(const ctype<wchar_t>& ct) {
   // __months_
   for (int i = 0; i < 12; ++i) {
     t.tm_mon = i;
-    __locale::__strftime(buf, countof(buf), "%B", &t, __loc_);
+    __locale::__strftime(buf, std::size(buf), "%B", &t, __loc_);
     mb             = mbstate_t();
     const char* bb = buf;
-    size_t j       = __locale::__mbsrtowcs(wbuf, &bb, countof(wbuf), &mb, __loc_);
+    size_t j       = __locale::__mbsrtowcs(wbuf, &bb, std::size(wbuf), &mb, __loc_);
     if (j == size_t(-1) || j == 0)
       std::__throw_runtime_error("locale not supported");
     wbe = wbuf + j;
     __months_[i].assign(wbuf, wbe);
-    __locale::__strftime(buf, countof(buf), "%b", &t, __loc_);
+    __locale::__strftime(buf, std::size(buf), "%b", &t, __loc_);
     mb = mbstate_t();
     bb = buf;
-    j  = __locale::__mbsrtowcs(wbuf, &bb, countof(wbuf), &mb, __loc_);
+    j  = __locale::__mbsrtowcs(wbuf, &bb, std::size(wbuf), &mb, __loc_);
     if (j == size_t(-1) || j == 0)
       std::__throw_runtime_error("locale not supported");
     wbe = wbuf + j;
@@ -4716,19 +4685,19 @@ void __time_get_storage<wchar_t>::init(const ctype<wchar_t>& ct) {
   }
   // __am_pm_
   t.tm_hour = 1;
-  __locale::__strftime(buf, countof(buf), "%p", &t, __loc_);
+  __locale::__strftime(buf, std::size(buf), "%p", &t, __loc_);
   mb             = mbstate_t();
   const char* bb = buf;
-  size_t j       = __locale::__mbsrtowcs(wbuf, &bb, countof(wbuf), &mb, __loc_);
+  size_t j       = __locale::__mbsrtowcs(wbuf, &bb, std::size(wbuf), &mb, __loc_);
   if (j == size_t(-1))
     std::__throw_runtime_error("locale not supported");
   wbe = wbuf + j;
   __am_pm_[0].assign(wbuf, wbe);
   t.tm_hour = 13;
-  __locale::__strftime(buf, countof(buf), "%p", &t, __loc_);
+  __locale::__strftime(buf, std::size(buf), "%p", &t, __loc_);
   mb = mbstate_t();
   bb = buf;
-  j  = __locale::__mbsrtowcs(wbuf, &bb, countof(wbuf), &mb, __loc_);
+  j  = __locale::__mbsrtowcs(wbuf, &bb, std::size(wbuf), &mb, __loc_);
   if (j == size_t(-1))
     std::__throw_runtime_error("locale not supported");
   wbe = wbuf + j;
@@ -4957,7 +4926,7 @@ void __time_put::__do_put(char* __nb, char*& __ne, const tm* __tm, char __fmt, c
   char fmt[] = {'%', __fmt, __mod, 0};
   if (__mod != 0)
     swap(fmt[1], fmt[2]);
-  size_t n = __locale::__strftime(__nb, countof(__nb, __ne), fmt, __tm, __loc_);
+  size_t n = __locale::__strftime(__nb, std::distance(__nb, __ne), fmt, __tm, __loc_);
   __ne     = __nb + n;
 }
 
@@ -4968,7 +4937,7 @@ void __time_put::__do_put(wchar_t* __wb, wchar_t*& __we, const tm* __tm, char __
   __do_put(__nar, __ne, __tm, __fmt, __mod);
   mbstate_t mb     = {0};
   const char* __nb = __nar;
-  size_t j         = __locale::__mbsrtowcs(__wb, &__nb, countof(__wb, __we), &mb, __loc_);
+  size_t j         = __locale::__mbsrtowcs(__wb, &__nb, std::distance(__wb, __we), &mb, __loc_);
   if (j == size_t(-1))
     std::__throw_runtime_error("locale not supported");
   __we = __wb + j;
@@ -5443,7 +5412,7 @@ void moneypunct_byname<wchar_t, false>::init(const char* nm) {
   wchar_t wbuf[100];
   mbstate_t mb   = {0};
   const char* bb = lc->currency_symbol;
-  size_t j       = __locale::__mbsrtowcs(wbuf, &bb, countof(wbuf), &mb, loc.get());
+  size_t j       = __locale::__mbsrtowcs(wbuf, &bb, std::size(wbuf), &mb, loc.get());
   if (j == size_t(-1))
     std::__throw_runtime_error("locale not supported");
   wchar_t* wbe = wbuf + j;
@@ -5457,7 +5426,7 @@ void moneypunct_byname<wchar_t, false>::init(const char* nm) {
   else {
     mb = mbstate_t();
     bb = lc->positive_sign;
-    j  = __locale::__mbsrtowcs(wbuf, &bb, countof(wbuf), &mb, loc.get());
+    j  = __locale::__mbsrtowcs(wbuf, &bb, std::size(wbuf), &mb, loc.get());
     if (j == size_t(-1))
       std::__throw_runtime_error("locale not supported");
     wbe = wbuf + j;
@@ -5468,7 +5437,7 @@ void moneypunct_byname<wchar_t, false>::init(const char* nm) {
   else {
     mb = mbstate_t();
     bb = lc->negative_sign;
-    j  = __locale::__mbsrtowcs(wbuf, &bb, countof(wbuf), &mb, loc.get());
+    j  = __locale::__mbsrtowcs(wbuf, &bb, std::size(wbuf), &mb, loc.get());
     if (j == size_t(-1))
       std::__throw_runtime_error("locale not supported");
     wbe = wbuf + j;
@@ -5498,7 +5467,7 @@ void moneypunct_byname<wchar_t, true>::init(const char* nm) {
   wchar_t wbuf[100];
   mbstate_t mb   = {0};
   const char* bb = lc->int_curr_symbol;
-  size_t j       = __locale::__mbsrtowcs(wbuf, &bb, countof(wbuf), &mb, loc.get());
+  size_t j       = __locale::__mbsrtowcs(wbuf, &bb, std::size(wbuf), &mb, loc.get());
   if (j == size_t(-1))
     std::__throw_runtime_error("locale not supported");
   wchar_t* wbe = wbuf + j;
@@ -5516,7 +5485,7 @@ void moneypunct_byname<wchar_t, true>::init(const char* nm) {
   else {
     mb = mbstate_t();
     bb = lc->positive_sign;
-    j  = __locale::__mbsrtowcs(wbuf, &bb, countof(wbuf), &mb, loc.get());
+    j  = __locale::__mbsrtowcs(wbuf, &bb, std::size(wbuf), &mb, loc.get());
     if (j == size_t(-1))
       std::__throw_runtime_error("locale not supported");
     wbe = wbuf + j;
@@ -5531,7 +5500,7 @@ void moneypunct_byname<wchar_t, true>::init(const char* nm) {
   else {
     mb = mbstate_t();
     bb = lc->negative_sign;
-    j  = __locale::__mbsrtowcs(wbuf, &bb, countof(wbuf), &mb, loc.get());
+    j  = __locale::__mbsrtowcs(wbuf, &bb, std::size(wbuf), &mb, loc.get());
     if (j == size_t(-1))
       std::__throw_runtime_error("locale not supported");
     wbe = wbuf + j;
@@ -5571,6 +5540,54 @@ string __num_get<_CharT>::__stage2_int_prep(ios_base& __iob, _CharT* __atoms, _C
   return __np.grouping();
 }
 
+template <class _CharT>
+int __num_get<_CharT>::__stage2_int_loop(
+    _CharT __ct,
+    int __base,
+    char* __a,
+    char*& __a_end,
+    unsigned& __dc,
+    _CharT __thousands_sep,
+    const string& __grouping,
+    unsigned* __g,
+    unsigned*& __g_end,
+    _CharT* __atoms) {
+  if (__a_end == __a && (__ct == __atoms[24] || __ct == __atoms[25])) {
+    *__a_end++ = __ct == __atoms[24] ? '+' : '-';
+    __dc       = 0;
+    return 0;
+  }
+  if (__grouping.size() != 0 && __ct == __thousands_sep) {
+    if (__g_end - __g < __num_get_buf_sz) {
+      *__g_end++ = __dc;
+      __dc       = 0;
+    }
+    return 0;
+  }
+  ptrdiff_t __f = __atoms_offset(__atoms, __ct);
+  if (__f >= 24)
+    return -1;
+  switch (__base) {
+  case 8:
+  case 10:
+    if (__f >= __base)
+      return -1;
+    break;
+  case 16:
+    if (__f < 22)
+      break;
+    if (__a_end != __a && __a_end - __a <= 2 && __a_end[-1] == '0') {
+      __dc       = 0;
+      *__a_end++ = __src[__f];
+      return 0;
+    }
+    return -1;
+  }
+  *__a_end++ = __src[__f];
+  ++__dc;
+  return 0;
+}
+
 template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS collate<char>;
 _LIBCPP_IF_WIDE_CHARACTERS(template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS collate<wchar_t>;)
 
diff --git a/lib/libcxx/src/memory.cpp b/lib/libcxx/src/memory.cpp
index 9be40cb9c1..61aa89d58a 100644
--- a/lib/libcxx/src/memory.cpp
+++ b/lib/libcxx/src/memory.cpp
@@ -7,7 +7,8 @@
 //===----------------------------------------------------------------------===//
 
 #include <__config>
-#ifdef _LIBCPP_DEPRECATED_ABI_LEGACY_LIBRARY_DEFINITIONS_FOR_INLINE_FUNCTIONS
+#if !defined(_LIBCPP_OBJECT_FORMAT_COFF) && !defined(_LIBCPP_OBJECT_FORMAT_XCOFF) &&                                   \
+    _LIBCPP_AVAILABILITY_MINIMUM_HEADER_VERSION < 5
 #  define _LIBCPP_SHARED_PTR_DEFINE_LEGACY_INLINE_FUNCTIONS
 #endif
 
@@ -132,19 +133,16 @@ __sp_mut& __get_sp_mut(const void* p) {
 
 #endif // _LIBCPP_HAS_THREADS
 
-void* align(size_t alignment, size_t size, void*& ptr, size_t& space) {
-  void* r = nullptr;
-  if (size <= space) {
-    char* p1 = static_cast<char*>(ptr);
-    char* p2 = reinterpret_cast<char*>(reinterpret_cast<uintptr_t>(p1 + (alignment - 1)) & -alignment);
-    size_t d = static_cast<size_t>(p2 - p1);
-    if (d <= space - size) {
-      r   = p2;
-      ptr = r;
-      space -= d;
-    }
-  }
-  return r;
+#if _LIBCPP_AVAILABILITY_MINIMUM_HEADER_VERSION < 21
+
+_LIBCPP_DIAGNOSTIC_PUSH
+_LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wmissing-prototypes")
+// This function only exists for ABI compatibility and we therefore don't provide a declaration in the headers
+_LIBCPP_EXPORTED_FROM_ABI void* align(size_t alignment, size_t size, void*& ptr, size_t& space) {
+  return __align_inline::align(alignment, size, ptr, space);
 }
+_LIBCPP_DIAGNOSTIC_POP
+
+#endif
 
 _LIBCPP_END_NAMESPACE_STD
diff --git a/lib/libcxx/src/mutex_destructor.cpp b/lib/libcxx/src/mutex_destructor.cpp
index 9f991721f0..4c63ea0da7 100644
--- a/lib/libcxx/src/mutex_destructor.cpp
+++ b/lib/libcxx/src/mutex_destructor.cpp
@@ -19,7 +19,7 @@
 #include <__config>
 #include <__thread/support.h>
 
-#if _LIBCPP_ABI_VERSION == 1 || !_LIBCPP_HAS_TRIVIAL_MUTEX_DESTRUCTION
+#if _LIBCPP_AVAILABILITY_MINIMUM_HEADER_VERSION < 9 || !_LIBCPP_HAS_TRIVIAL_MUTEX_DESTRUCTION
 #  define NEEDS_MUTEX_DESTRUCTOR
 #endif
 
diff --git a/lib/libcxx/src/new.cpp b/lib/libcxx/src/new.cpp
index ce6b63775c..505d239930 100644
--- a/lib/libcxx/src/new.cpp
+++ b/lib/libcxx/src/new.cpp
@@ -6,9 +6,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "include/aligned_alloc.h"
 #include "include/overridable_function.h"
 #include <__assert>
-#include <__memory/aligned_alloc.h>
 #include <cstddef>
 #include <cstdlib>
 #include <new>
@@ -43,7 +43,7 @@ static void* operator_new_impl(std::size_t size) {
   return p;
 }
 
-_LIBCPP_OVERRIDABLE_FUNCTION(void*, operator new, (std::size_t size)) _THROW_BAD_ALLOC {
+OVERRIDABLE_FUNCTION void* operator new(std::size_t size) _THROW_BAD_ALLOC {
   void* p = operator_new_impl(size);
   if (p == nullptr)
     __throw_bad_alloc_shim();
@@ -74,7 +74,7 @@ _LIBCPP_WEAK void* operator new(size_t size, const std::nothrow_t&) noexcept {
 #  endif
 }
 
-_LIBCPP_OVERRIDABLE_FUNCTION(void*, operator new[], (size_t size)) _THROW_BAD_ALLOC { return ::operator new(size); }
+OVERRIDABLE_FUNCTION void* operator new[](size_t size) _THROW_BAD_ALLOC { return ::operator new(size); }
 
 _LIBCPP_WEAK void* operator new[](size_t size, const std::nothrow_t&) noexcept {
 #  if !_LIBCPP_HAS_EXCEPTIONS
@@ -134,7 +134,7 @@ static void* operator_new_aligned_impl(std::size_t size, std::align_val_t alignm
   return p;
 }
 
-_LIBCPP_OVERRIDABLE_FUNCTION(void*, operator new, (std::size_t size, std::align_val_t alignment)) _THROW_BAD_ALLOC {
+OVERRIDABLE_FUNCTION void* operator new(std::size_t size, std::align_val_t alignment) _THROW_BAD_ALLOC {
   void* p = operator_new_aligned_impl(size, alignment);
   if (p == nullptr)
     __throw_bad_alloc_shim();
@@ -165,7 +165,7 @@ _LIBCPP_WEAK void* operator new(size_t size, std::align_val_t alignment, const s
 #    endif
 }
 
-_LIBCPP_OVERRIDABLE_FUNCTION(void*, operator new[], (size_t size, std::align_val_t alignment)) _THROW_BAD_ALLOC {
+OVERRIDABLE_FUNCTION void* operator new[](size_t size, std::align_val_t alignment) _THROW_BAD_ALLOC {
   return ::operator new(size, alignment);
 }
 
diff --git a/lib/libcxx/src/optional.cpp b/lib/libcxx/src/optional.cpp
index faabe66cfc..3b92580565 100644
--- a/lib/libcxx/src/optional.cpp
+++ b/lib/libcxx/src/optional.cpp
@@ -19,6 +19,8 @@ const char* bad_optional_access::what() const noexcept { return "bad_optional_ac
 
 #include <__config>
 
+#if _LIBCPP_AVAILABILITY_MINIMUM_HEADER_VERSION < 7
+
 //  Preserve std::experimental::bad_optional_access for ABI compatibility
 //  Even though it no longer exists in a header file
 _LIBCPP_BEGIN_NAMESPACE_EXPERIMENTAL
@@ -34,3 +36,5 @@ public:
 bad_optional_access::~bad_optional_access() noexcept = default;
 
 _LIBCPP_END_NAMESPACE_EXPERIMENTAL
+
+#endif // _LIBCPP_AVAILABILITY_MINIMUM_HEADER_VERSION < 7
diff --git a/lib/libcxx/src/print.cpp b/lib/libcxx/src/print.cpp
index 3f2baa6dcc..82cf2afd05 100644
--- a/lib/libcxx/src/print.cpp
+++ b/lib/libcxx/src/print.cpp
@@ -22,6 +22,14 @@
 #  include <windows.h>
 #elif __has_include(<unistd.h>)
 #  include <unistd.h>
+#  if defined(_NEWLIB_VERSION)
+#    if defined(_POSIX_C_SOURCE) && __has_include(<stdio.h>)
+#      include <stdio.h>
+#      define HAS_FILENO_AND_ISATTY
+#    endif
+#  else
+#    define HAS_FILENO_AND_ISATTY
+#  endif
 #endif
 
 _LIBCPP_BEGIN_NAMESPACE_STD
@@ -56,7 +64,7 @@ __write_to_windows_console([[maybe_unused]] FILE* __stream, [[maybe_unused]] wst
 }
 #  endif // _LIBCPP_HAS_WIDE_CHARACTERS
 
-#elif __has_include(<unistd.h>) // !_LIBCPP_WIN32API
+#elif defined(HAS_FILENO_AND_ISATTY) // !_LIBCPP_WIN32API
 
 _LIBCPP_EXPORTED_FROM_ABI bool __is_posix_terminal(FILE* __stream) { return isatty(fileno(__stream)); }
 #endif
diff --git a/lib/libcxx/src/random.cpp b/lib/libcxx/src/random.cpp
index 5c6644811b..79815aadc7 100644
--- a/lib/libcxx/src/random.cpp
+++ b/lib/libcxx/src/random.cpp
@@ -31,8 +31,6 @@
 #    include <linux/random.h>
 #    include <sys/ioctl.h>
 #  endif
-#elif defined(_LIBCPP_USING_NACL_RANDOM)
-#  include <nacl/nacl_random.h>
 #elif defined(_LIBCPP_USING_FUCHSIA_CPRNG)
 #  include <zircon/syscalls.h>
 #endif
@@ -93,30 +91,6 @@ unsigned random_device::operator()() {
   return r;
 }
 
-#elif defined(_LIBCPP_USING_NACL_RANDOM)
-
-random_device::random_device(const string& __token) {
-  if (__token != "/dev/urandom")
-    std::__throw_system_error(ENOENT, ("random device not supported " + __token).c_str());
-  int error = nacl_secure_random_init();
-  if (error)
-    std::__throw_system_error(error, ("random device failed to open " + __token).c_str());
-}
-
-random_device::~random_device() {}
-
-unsigned random_device::operator()() {
-  unsigned r;
-  size_t n = sizeof(r);
-  size_t bytes_written;
-  int error = nacl_secure_random(&r, n, &bytes_written);
-  if (error != 0)
-    std::__throw_system_error(error, "random_device failed getting bytes");
-  else if (bytes_written != n)
-    std::__throw_runtime_error("random_device failed to obtain enough bytes");
-  return r;
-}
-
 #elif defined(_LIBCPP_USING_WIN32_RANDOM)
 
 random_device::random_device(const string& __token) {
diff --git a/lib/libcxx/src/string.cpp b/lib/libcxx/src/string.cpp
index 55d19a6880..178ef710f0 100644
--- a/lib/libcxx/src/string.cpp
+++ b/lib/libcxx/src/string.cpp
@@ -20,7 +20,7 @@
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-#ifndef _LIBCPP_ABI_DO_NOT_EXPORT_BASIC_STRING_COMMON
+#if _LIBCPP_AVAILABILITY_MINIMUM_HEADER_VERSION < 14
 
 template <bool>
 struct __basic_string_common;
@@ -35,45 +35,30 @@ struct __basic_string_common<true> {
 void __basic_string_common<true>::__throw_length_error() const { std::__throw_length_error("basic_string"); }
 void __basic_string_common<true>::__throw_out_of_range() const { std::__throw_out_of_range("basic_string"); }
 
-#endif // _LIBCPP_ABI_DO_NOT_EXPORT_BASIC_STRING_COMMON
+#endif // _LIBCPP_AVAILABILITY_MINIMUM_HEADER_VERSION < 14
 
 // Define legacy ABI functions
 // ---------------------------
 
-#ifndef _LIBCPP_ABI_STRING_OPTIMIZED_EXTERNAL_INSTANTIATION
+#if _LIBCPP_AVAILABILITY_MINIMUM_HEADER_VERSION < 21
 
+// This initializes the string with [__s, __s + __sz), but capacity() == __reserve. Assumes that __reserve >= __sz.
 template <class _CharT, class _Traits, class _Allocator>
 void basic_string<_CharT, _Traits, _Allocator>::__init(const value_type* __s, size_type __sz, size_type __reserve) {
-  if (__libcpp_is_constant_evaluated())
-    __rep_ = __rep();
-  if (__reserve > max_size())
-    __throw_length_error();
-  pointer __p;
-  if (__fits_in_sso(__reserve)) {
-    __set_short_size(__sz);
-    __p = __get_short_pointer();
-  } else {
-    auto __allocation = std::__allocate_at_least(__alloc_, __recommend(__reserve) + 1);
-    __p               = __allocation.ptr;
-    __begin_lifetime(__p, __allocation.count);
-    __set_long_pointer(__p);
-    __set_long_cap(__allocation.count);
-    __set_long_size(__sz);
-  }
+  pointer __p = __init_internal_buffer(__reserve);
+  __annotate_delete();
+  __set_size(__sz);
   traits_type::copy(std::__to_address(__p), __s, __sz);
   traits_type::assign(__p[__sz], value_type());
   __annotate_new(__sz);
 }
 
-#  define STRING_LEGACY_API(CharT)                                                                                     \
-    template _LIBCPP_EXPORTED_FROM_ABI void basic_string<CharT>::__init(const value_type*, size_type, size_type)
-
-STRING_LEGACY_API(char);
+template _LIBCPP_EXPORTED_FROM_ABI void basic_string<char>::__init(const value_type*, size_type, size_type);
 #  if _LIBCPP_HAS_WIDE_CHARACTERS
-STRING_LEGACY_API(wchar_t);
+template _LIBCPP_EXPORTED_FROM_ABI void basic_string<wchar_t>::__init(const value_type*, size_type, size_type);
 #  endif
 
-#endif // _LIBCPP_ABI_STRING_OPTIMIZED_EXTERNAL_INSTANTIATION
+#endif // _LIBCPP_AVAILABILITY_MINIMUM_HEADER_VERSION < 21
 
 #define _LIBCPP_EXTERN_TEMPLATE_DEFINE(...) template _LIBCPP_EXPORTED_FROM_ABI __VA_ARGS__;
 #ifdef _LIBCPP_ABI_STRING_OPTIMIZED_EXTERNAL_INSTANTIATION
diff --git a/lib/libcxx/src/support/runtime/exception_fallback.ipp b/lib/libcxx/src/support/runtime/exception_fallback.ipp
index ba283aee22..dca904e902 100644
--- a/lib/libcxx/src/support/runtime/exception_fallback.ipp
+++ b/lib/libcxx/src/support/runtime/exception_fallback.ipp
@@ -8,6 +8,8 @@
 //===----------------------------------------------------------------------===//
 
 #include <__verbose_abort>
+#include <exception>
+#include "include/atomic_support.h"
 
 namespace std {
 
diff --git a/lib/libcxx/src/support/runtime/exception_glibcxx.ipp b/lib/libcxx/src/support/runtime/exception_glibcxx.ipp
index aa67cab6bc..5eb8d87f6d 100644
--- a/lib/libcxx/src/support/runtime/exception_glibcxx.ipp
+++ b/lib/libcxx/src/support/runtime/exception_glibcxx.ipp
@@ -11,6 +11,9 @@
 #  error header can only be used when targeting libstdc++ or libsupc++
 #endif
 
+#include <exception>
+#include <new>
+
 namespace std {
 
 bad_alloc::bad_alloc() noexcept {}
diff --git a/lib/libcxx/src/support/runtime/exception_libcxxabi.ipp b/lib/libcxx/src/support/runtime/exception_libcxxabi.ipp
index df6bd6574b..c42bb237d9 100644
--- a/lib/libcxx/src/support/runtime/exception_libcxxabi.ipp
+++ b/lib/libcxx/src/support/runtime/exception_libcxxabi.ipp
@@ -7,6 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include <exception>
+
+#include <cxxabi.h>
+
 #ifndef _LIBCPPABI_VERSION
 #  error this header can only be used with libc++abi
 #endif
@@ -17,9 +21,9 @@ bool uncaught_exception() noexcept { return uncaught_exceptions() > 0; }
 
 int uncaught_exceptions() noexcept {
 #if _LIBCPPABI_VERSION > 1001
-  return __cxa_uncaught_exceptions();
+  return abi::__cxa_uncaught_exceptions();
 #else
-  return __cxa_uncaught_exception() ? 1 : 0;
+  return abi::__cxa_uncaught_exception() ? 1 : 0;
 #endif
 }
 
diff --git a/lib/libcxx/src/support/runtime/exception_libcxxrt.ipp b/lib/libcxx/src/support/runtime/exception_libcxxrt.ipp
index f17fecc71e..6afdc00656 100644
--- a/lib/libcxx/src/support/runtime/exception_libcxxrt.ipp
+++ b/lib/libcxx/src/support/runtime/exception_libcxxrt.ipp
@@ -11,6 +11,8 @@
 #  error this header may only be used when targeting libcxxrt
 #endif
 
+#include <exception>
+
 namespace std {
 
 bad_exception::~bad_exception() noexcept {}
diff --git a/lib/libcxx/src/support/runtime/exception_msvc.ipp b/lib/libcxx/src/support/runtime/exception_msvc.ipp
index 2ae004bb02..7114d90892 100644
--- a/lib/libcxx/src/support/runtime/exception_msvc.ipp
+++ b/lib/libcxx/src/support/runtime/exception_msvc.ipp
@@ -12,6 +12,8 @@
 #endif
 
 #include <__verbose_abort>
+#include <exception>
+#include <new>
 
 extern "C" {
 typedef void(__cdecl* terminate_handler)();
diff --git a/lib/libcxx/src/support/runtime/exception_pointer_cxxabi.ipp b/lib/libcxx/src/support/runtime/exception_pointer_cxxabi.ipp
index 8f5c2060bb..75cb7c9d82 100644
--- a/lib/libcxx/src/support/runtime/exception_pointer_cxxabi.ipp
+++ b/lib/libcxx/src/support/runtime/exception_pointer_cxxabi.ipp
@@ -7,22 +7,21 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef HAVE_DEPENDENT_EH_ABI
-#  error this header may only be used with libc++abi or libcxxrt
-#endif
+#include <cxxabi.h>
+#include <exception>
 
 namespace std {
 
-exception_ptr::~exception_ptr() noexcept { __cxa_decrement_exception_refcount(__ptr_); }
+exception_ptr::~exception_ptr() noexcept { abi::__cxa_decrement_exception_refcount(__ptr_); }
 
 exception_ptr::exception_ptr(const exception_ptr& other) noexcept : __ptr_(other.__ptr_) {
-  __cxa_increment_exception_refcount(__ptr_);
+  abi::__cxa_increment_exception_refcount(__ptr_);
 }
 
 exception_ptr& exception_ptr::operator=(const exception_ptr& other) noexcept {
   if (__ptr_ != other.__ptr_) {
-    __cxa_increment_exception_refcount(other.__ptr_);
-    __cxa_decrement_exception_refcount(__ptr_);
+    abi::__cxa_increment_exception_refcount(other.__ptr_);
+    abi::__cxa_decrement_exception_refcount(__ptr_);
     __ptr_ = other.__ptr_;
   }
   return *this;
@@ -31,7 +30,7 @@ exception_ptr& exception_ptr::operator=(const exception_ptr& other) noexcept {
 exception_ptr exception_ptr::__from_native_exception_pointer(void* __e) noexcept {
   exception_ptr ptr;
   ptr.__ptr_ = __e;
-  __cxa_increment_exception_refcount(ptr.__ptr_);
+  abi::__cxa_increment_exception_refcount(ptr.__ptr_);
 
   return ptr;
 }
@@ -51,12 +50,12 @@ exception_ptr current_exception() noexcept {
   // this whole function would be just:
   //    return exception_ptr(__cxa_current_primary_exception());
   exception_ptr ptr;
-  ptr.__ptr_ = __cxa_current_primary_exception();
+  ptr.__ptr_ = abi::__cxa_current_primary_exception();
   return ptr;
 }
 
 void rethrow_exception(exception_ptr p) {
-  __cxa_rethrow_primary_exception(p.__ptr_);
+  abi::__cxa_rethrow_primary_exception(p.__ptr_);
   // if p.__ptr_ is NULL, above returns so we terminate
   terminate();
 }
diff --git a/lib/libcxx/src/support/runtime/exception_pointer_glibcxx.ipp b/lib/libcxx/src/support/runtime/exception_pointer_glibcxx.ipp
index 174b44ce0e..4b08db6f1a 100644
--- a/lib/libcxx/src/support/runtime/exception_pointer_glibcxx.ipp
+++ b/lib/libcxx/src/support/runtime/exception_pointer_glibcxx.ipp
@@ -16,6 +16,8 @@
 // stable ABI), and its rethrow_exception(std::__exception_ptr::exception_ptr)
 // function.
 
+#include <exception>
+
 namespace std {
 
 namespace __exception_ptr {
diff --git a/lib/libcxx/src/support/runtime/exception_pointer_msvc.ipp b/lib/libcxx/src/support/runtime/exception_pointer_msvc.ipp
index 2be5136176..4141e03123 100644
--- a/lib/libcxx/src/support/runtime/exception_pointer_msvc.ipp
+++ b/lib/libcxx/src/support/runtime/exception_pointer_msvc.ipp
@@ -7,6 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include <exception>
 #include <stdio.h>
 #include <stdlib.h>
 
diff --git a/lib/libcxx/src/support/runtime/exception_pointer_unimplemented.ipp b/lib/libcxx/src/support/runtime/exception_pointer_unimplemented.ipp
index 05a71ce34e..5e55f0f6de 100644
--- a/lib/libcxx/src/support/runtime/exception_pointer_unimplemented.ipp
+++ b/lib/libcxx/src/support/runtime/exception_pointer_unimplemented.ipp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include <__verbose_abort>
+#include <exception>
 
 namespace std {
 
diff --git a/lib/libcxx/src/support/win32/locale_win32.cpp b/lib/libcxx/src/support/win32/locale_win32.cpp
index 24402e818d..26722e6e47 100644
--- a/lib/libcxx/src/support/win32/locale_win32.cpp
+++ b/lib/libcxx/src/support/win32/locale_win32.cpp
@@ -144,7 +144,7 @@ int __snprintf(char* ret, size_t n, __locale_t loc, const char* format, ...) {
 // Like sprintf, but when return value >= 0 it returns
 // a pointer to a malloc'd string in *sptr.
 // If return >= 0, use free to delete *sptr.
-int __libcpp_vasprintf(char** sptr, const char* __restrict format, va_list ap) {
+static int __libcpp_vasprintf(char** sptr, const char* __restrict format, va_list ap) {
   *sptr = nullptr;
   // Query the count required.
   va_list ap_copy;
diff --git a/lib/libcxx/src/system_error.cpp b/lib/libcxx/src/system_error.cpp
index 164fb72621..6397a94932 100644
--- a/lib/libcxx/src/system_error.cpp
+++ b/lib/libcxx/src/system_error.cpp
@@ -95,6 +95,8 @@ std::optional<errc> __win_err_to_errc(int err) {
     return errc::no_lock_available;
   case ERROR_NEGATIVE_SEEK:
     return errc::invalid_argument;
+  case ERROR_NETNAME_DELETED:
+    return errc::no_such_file_or_directory;
   case ERROR_NOACCESS:
     return errc::permission_denied;
   case ERROR_NOT_ENOUGH_MEMORY:
diff --git a/lib/libcxx/src/thread.cpp b/lib/libcxx/src/thread.cpp
index 028d36e3bf..e494574ec2 100644
--- a/lib/libcxx/src/thread.cpp
+++ b/lib/libcxx/src/thread.cpp
@@ -74,9 +74,7 @@ unsigned thread::hardware_concurrency() noexcept {
     return 0;
   return static_cast<unsigned>(result);
 #elif defined(_LIBCPP_WIN32API)
-  SYSTEM_INFO info;
-  GetSystemInfo(&info);
-  return info.dwNumberOfProcessors;
+  return static_cast<unsigned>(GetActiveProcessorCount(ALL_PROCESSOR_GROUPS));
 #else // defined(CTL_HW) && defined(HW_NCPU)
   // TODO: grovel through /proc or check cpuid on x86 and similar
   // instructions on other architectures.
diff --git a/lib/libcxx/src/valarray.cpp b/lib/libcxx/src/valarray.cpp
index 6ef1f1cafc..3d3a9ac30e 100644
--- a/lib/libcxx/src/valarray.cpp
+++ b/lib/libcxx/src/valarray.cpp
@@ -10,8 +10,7 @@
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-// These two symbols are part of the v1 ABI but not part of the >=v2 ABI.
-#if _LIBCPP_ABI_VERSION == 1
+#if _LIBCPP_AVAILABILITY_MINIMUM_HEADER_VERSION < 9
 template _LIBCPP_EXPORTED_FROM_ABI valarray<size_t>::valarray(size_t);
 template _LIBCPP_EXPORTED_FROM_ABI valarray<size_t>::~valarray();
 #endif
diff --git a/lib/libcxx/src/vector.cpp b/lib/libcxx/src/vector.cpp
index 3f3a906d64..77a028a480 100644
--- a/lib/libcxx/src/vector.cpp
+++ b/lib/libcxx/src/vector.cpp
@@ -10,7 +10,7 @@
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-#ifndef _LIBCPP_ABI_DO_NOT_EXPORT_VECTOR_BASE_COMMON
+#if _LIBCPP_AVAILABILITY_MINIMUM_HEADER_VERSION < 15
 
 template <bool>
 struct __vector_base_common;
@@ -25,6 +25,6 @@ void __vector_base_common<true>::__throw_length_error() const { std::__throw_len
 
 void __vector_base_common<true>::__throw_out_of_range() const { std::__throw_out_of_range("vector"); }
 
-#endif // _LIBCPP_ABI_DO_NOT_EXPORT_VECTOR_BASE_COMMON
+#endif // _LIBCPP_AVAILABILITY_MINIMUM_HEADER_VERSION < 15
 
 _LIBCPP_END_NAMESPACE_STD
diff --git a/lib/libcxxabi/include/__cxxabi_config.h b/lib/libcxxabi/include/__cxxabi_config.h
index 759445dac9..e4fd845b1f 100644
--- a/lib/libcxxabi/include/__cxxabi_config.h
+++ b/lib/libcxxabi/include/__cxxabi_config.h
@@ -14,10 +14,6 @@
 #define _LIBCXXABI_ARM_EHABI
 #endif
 
-#if !defined(__has_attribute)
-#define __has_attribute(_attribute_) 0
-#endif
-
 #if defined(__clang__)
 #  define _LIBCXXABI_COMPILER_CLANG
 #  ifndef __apple_build_version__
@@ -25,10 +21,6 @@
 #  endif
 #elif defined(__GNUC__)
 #  define _LIBCXXABI_COMPILER_GCC
-#elif defined(_MSC_VER)
-#  define _LIBCXXABI_COMPILER_MSVC
-#elif defined(__IBMCPP__)
-#  define _LIBCXXABI_COMPILER_IBM
 #endif
 
 #if defined(_WIN32)
@@ -66,17 +58,7 @@
  #endif
 #endif
 
-#if defined(_LIBCXXABI_COMPILER_MSVC)
-#define _LIBCXXABI_WEAK
-#else
 #define _LIBCXXABI_WEAK __attribute__((__weak__))
-#endif
-
-#if defined(__clang__)
-#define _LIBCXXABI_COMPILER_CLANG
-#elif defined(__GNUC__)
-#define _LIBCXXABI_COMPILER_GCC
-#endif
 
 #if __has_attribute(__no_sanitize__) && defined(_LIBCXXABI_COMPILER_CLANG)
 #define _LIBCXXABI_NO_CFI __attribute__((__no_sanitize__("cfi")))
@@ -89,11 +71,7 @@
 #  define _LIBCXXABI_GUARD_ABI_ARM
 #endif
 
-#if defined(_LIBCXXABI_COMPILER_CLANG)
-#  if !__has_feature(cxx_exceptions)
-#    define _LIBCXXABI_NO_EXCEPTIONS
-#  endif
-#elif defined(_LIBCXXABI_COMPILER_GCC) && !defined(__EXCEPTIONS)
+#if !defined(__cpp_exceptions) || __cpp_exceptions < 199711L
 #  define _LIBCXXABI_NO_EXCEPTIONS
 #endif
 
@@ -103,6 +81,47 @@
 #define _LIBCXXABI_DTOR_FUNC
 #endif
 
+#if __has_include(<ptrauth.h>)
+#  include <ptrauth.h>
+#endif
+
+#if __has_feature(ptrauth_calls)
+
+// ptrauth_string_discriminator("__cxa_exception::actionRecord") == 0xFC91
+#  define __ptrauth_cxxabi_action_record __ptrauth(ptrauth_key_process_dependent_data, 1, 0xFC91)
+
+// ptrauth_string_discriminator("__cxa_exception::languageSpecificData") == 0xE8EE
+#  define __ptrauth_cxxabi_lsd __ptrauth(ptrauth_key_process_dependent_data, 1, 0xE8EE)
+
+// ptrauth_string_discriminator("__cxa_exception::catchTemp") == 0xFA58
+#  define __ptrauth_cxxabi_catch_temp_disc 0xFA58
+#  define __ptrauth_cxxabi_catch_temp_key ptrauth_key_process_dependent_data
+#  define __ptrauth_cxxabi_catch_temp __ptrauth(__ptrauth_cxxabi_catch_temp_key, 1, __ptrauth_cxxabi_catch_temp_disc)
+
+// ptrauth_string_discriminator("__cxa_exception::adjustedPtr") == 0x99E4
+#  define __ptrauth_cxxabi_adjusted_ptr __ptrauth(ptrauth_key_process_dependent_data, 1, 0x99E4)
+
+// ptrauth_string_discriminator("__cxa_exception::unexpectedHandler") == 0x99A9
+#  define __ptrauth_cxxabi_unexpected_handler __ptrauth(ptrauth_key_function_pointer, 1, 0x99A9)
+
+// ptrauth_string_discriminator("__cxa_exception::terminateHandler") == 0x0886)
+#  define __ptrauth_cxxabi_terminate_handler __ptrauth(ptrauth_key_function_pointer, 1, 0x886)
+
+// ptrauth_string_discriminator("__cxa_exception::exceptionDestructor") == 0xC088
+#  define __ptrauth_cxxabi_exception_destructor __ptrauth(ptrauth_key_function_pointer, 1, 0xC088)
+
+#else
+
+#  define __ptrauth_cxxabi_action_record
+#  define __ptrauth_cxxabi_lsd
+#  define __ptrauth_cxxabi_catch_temp
+#  define __ptrauth_cxxabi_adjusted_ptr
+#  define __ptrauth_cxxabi_unexpected_handler
+#  define __ptrauth_cxxabi_terminate_handler
+#  define __ptrauth_cxxabi_exception_destructor
+
+#endif
+
 #if __cplusplus < 201103L
 #  define _LIBCXXABI_NOEXCEPT throw()
 #else
diff --git a/lib/libcxxabi/src/cxa_exception.cpp b/lib/libcxxabi/src/cxa_exception.cpp
index 92901a83bf..5d7edae697 100644
--- a/lib/libcxxabi/src/cxa_exception.cpp
+++ b/lib/libcxxabi/src/cxa_exception.cpp
@@ -192,7 +192,9 @@ void *__cxa_allocate_exception(size_t thrown_size) throw() {
         std::terminate();
     __cxa_exception *exception_header =
         static_cast<__cxa_exception *>((void *)(raw_buffer + header_offset));
-    ::memset(exception_header, 0, actual_size);
+    // We warn on memset to a non-trivially castable type. We might want to
+    // change that diagnostic to not fire on a trivially obvious zero fill.
+    ::memset(static_cast<void*>(exception_header), 0, actual_size);
     return thrown_object_from_cxa_exception(exception_header);
 }
 
@@ -226,7 +228,7 @@ __cxa_exception* __cxa_init_primary_exception(void* object, std::type_info* tinf
 }
 
 //  This function shall allocate a __cxa_dependent_exception and
-//  return a pointer to it. (Really to the object, not past its' end).
+//  return a pointer to it. (Really to the object, not past its end).
 //  Otherwise, it will work like __cxa_allocate_exception.
 void * __cxa_allocate_dependent_exception () {
     size_t actual_size = sizeof(__cxa_dependent_exception);
diff --git a/lib/libcxxabi/src/cxa_exception.h b/lib/libcxxabi/src/cxa_exception.h
index aba08f2992..fa4c4dc55b 100644
--- a/lib/libcxxabi/src/cxa_exception.h
+++ b/lib/libcxxabi/src/cxa_exception.h
@@ -47,10 +47,10 @@ struct _LIBCXXABI_HIDDEN __cxa_exception {
     // In Wasm, a destructor returns its argument
     void *(_LIBCXXABI_DTOR_FUNC *exceptionDestructor)(void *);
 #else
-    void (_LIBCXXABI_DTOR_FUNC *exceptionDestructor)(void *);
+    void (_LIBCXXABI_DTOR_FUNC *__ptrauth_cxxabi_exception_destructor exceptionDestructor)(void *);
 #endif
-    std::unexpected_handler unexpectedHandler;
-    std::terminate_handler  terminateHandler;
+    std::unexpected_handler __ptrauth_cxxabi_unexpected_handler unexpectedHandler;
+    std::terminate_handler __ptrauth_cxxabi_terminate_handler terminateHandler;
 
     __cxa_exception *nextException;
 
@@ -61,10 +61,10 @@ struct _LIBCXXABI_HIDDEN __cxa_exception {
     int propagationCount;
 #else
     int handlerSwitchValue;
-    const unsigned char *actionRecord;
-    const unsigned char *languageSpecificData;
-    void *catchTemp;
-    void *adjustedPtr;
+    const unsigned char *__ptrauth_cxxabi_action_record actionRecord;
+    const unsigned char *__ptrauth_cxxabi_lsd languageSpecificData;
+    void *__ptrauth_cxxabi_catch_temp catchTemp;
+    void *__ptrauth_cxxabi_adjusted_ptr adjustedPtr;
 #endif
 
 #if !defined(__LP64__) && !defined(_WIN64) && !defined(_LIBCXXABI_ARM_EHABI)
@@ -79,6 +79,8 @@ struct _LIBCXXABI_HIDDEN __cxa_exception {
 // http://sourcery.mentor.com/archives/cxx-abi-dev/msg01924.html
 // The layout of this structure MUST match the layout of __cxa_exception, with
 // primaryException instead of referenceCount.
+// The pointer authentication schemas specified here must also match those of
+// the corresponding members in __cxa_exception.
 struct _LIBCXXABI_HIDDEN __cxa_dependent_exception {
 #if defined(__LP64__) || defined(_WIN64) || defined(_LIBCXXABI_ARM_EHABI)
     void* reserve; // padding.
@@ -86,9 +88,9 @@ struct _LIBCXXABI_HIDDEN __cxa_dependent_exception {
 #endif
 
     std::type_info *exceptionType;
-    void (_LIBCXXABI_DTOR_FUNC *exceptionDestructor)(void *);
-    std::unexpected_handler unexpectedHandler;
-    std::terminate_handler terminateHandler;
+    void (_LIBCXXABI_DTOR_FUNC *__ptrauth_cxxabi_exception_destructor exceptionDestructor)(void *);
+    std::unexpected_handler __ptrauth_cxxabi_unexpected_handler unexpectedHandler;
+    std::terminate_handler __ptrauth_cxxabi_terminate_handler terminateHandler;
 
     __cxa_exception *nextException;
 
@@ -99,10 +101,10 @@ struct _LIBCXXABI_HIDDEN __cxa_dependent_exception {
     int propagationCount;
 #else
     int handlerSwitchValue;
-    const unsigned char *actionRecord;
-    const unsigned char *languageSpecificData;
-    void * catchTemp;
-    void *adjustedPtr;
+    const unsigned char *__ptrauth_cxxabi_action_record actionRecord;
+    const unsigned char *__ptrauth_cxxabi_lsd languageSpecificData;
+    void *__ptrauth_cxxabi_catch_temp catchTemp;
+    void *__ptrauth_cxxabi_adjusted_ptr adjustedPtr;
 #endif
 
 #if !defined(__LP64__) && !defined(_WIN64) && !defined(_LIBCXXABI_ARM_EHABI)
diff --git a/lib/libcxxabi/src/cxa_personality.cpp b/lib/libcxxabi/src/cxa_personality.cpp
index 5f6e75c5be..77b2eb53af 100644
--- a/lib/libcxxabi/src/cxa_personality.cpp
+++ b/lib/libcxxabi/src/cxa_personality.cpp
@@ -20,7 +20,52 @@
 #include "cxa_exception.h"
 #include "cxa_handlers.h"
 #include "private_typeinfo.h"
-#include "unwind.h"
+
+#if __has_feature(ptrauth_calls)
+
+// CXXABI depends on defintions in libunwind as pointer auth couples the
+// definitions
+#  include "libunwind.h"
+
+// The actual value of the discriminators listed below is not important.
+// The derivation of the constants is only being included for the purpose
+// of maintaining a record of how they were originally produced.
+
+// ptrauth_string_discriminator("scan_results::languageSpecificData") == 0xE50D)
+#  define __ptrauth_scan_results_lsd __ptrauth(ptrauth_key_process_dependent_code, 1, 0xE50D)
+
+// ptrauth_string_discriminator("scan_results::actionRecord") == 0x9823
+#  define __ptrauth_scan_results_action_record __ptrauth(ptrauth_key_process_dependent_code, 1, 0x9823)
+
+// scan result is broken up as we have a manual re-sign that requires each component
+#  define __ptrauth_scan_results_landingpad_key ptrauth_key_process_dependent_code
+// ptrauth_string_discriminator("scan_results::landingPad") == 0xD27C
+#  define __ptrauth_scan_results_landingpad_disc 0xD27C
+#  define __ptrauth_scan_results_landingpad                                                                            \
+    __ptrauth(__ptrauth_scan_results_landingpad_key, 1, __ptrauth_scan_results_landingpad_disc)
+
+// `__ptrauth_restricted_intptr` is a feature of apple clang that predates
+// support for direct application of `__ptrauth` to integer types. This
+// guard is necessary to support compilation with those compiler.
+#  if __has_extension(ptrauth_restricted_intptr_qualifier)
+#    define __ptrauth_scan_results_landingpad_intptr                                                                   \
+      __ptrauth_restricted_intptr(__ptrauth_scan_results_landingpad_key, 1, __ptrauth_scan_results_landingpad_disc)
+#  else
+#    define __ptrauth_scan_results_landingpad_intptr                                                                   \
+      __ptrauth(__ptrauth_scan_results_landingpad_key, 1, __ptrauth_scan_results_landingpad_disc)
+#  endif
+
+#else
+#  define __ptrauth_scan_results_lsd
+#  define __ptrauth_scan_results_action_record
+#  define __ptrauth_scan_results_landingpad
+#  define __ptrauth_scan_results_landingpad_intptr
+#endif
+
+// The functions defined in this file are magic functions called only by the compiler.
+#ifdef __clang__
+#  pragma clang diagnostic ignored "-Wmissing-prototypes"
+#endif
 
 // TODO: This is a temporary workaround for libc++abi to recognize that it's being
 // built against LLVM's libunwind. LLVM's libunwind started reporting _LIBUNWIND_VERSION
@@ -527,12 +572,17 @@ get_thrown_object_ptr(_Unwind_Exception* unwind_exception)
 namespace
 {
 
+typedef const uint8_t *__ptrauth_scan_results_lsd lsd_ptr_t;
+typedef const uint8_t *__ptrauth_scan_results_action_record action_ptr_t;
+typedef uintptr_t __ptrauth_scan_results_landingpad_intptr landing_pad_t;
+typedef void *__ptrauth_scan_results_landingpad landing_pad_ptr_t;
+
 struct scan_results
 {
     int64_t        ttypeIndex;   // > 0 catch handler, < 0 exception spec handler, == 0 a cleanup
-    const uint8_t* actionRecord;         // Currently unused.  Retained to ease future maintenance.
-    const uint8_t* languageSpecificData;  // Needed only for __cxa_call_unexpected
-    uintptr_t      landingPad;   // null -> nothing found, else something found
+    action_ptr_t   actionRecord; // Currently unused.  Retained to ease future maintenance.
+    lsd_ptr_t      languageSpecificData; // Needed only for __cxa_call_unexpected
+    landing_pad_t  landingPad;   // null -> nothing found, else something found
     void*          adjustedPtr;  // Used in cxa_exception.cpp
     _Unwind_Reason_Code reason;  // One of _URC_FATAL_PHASE1_ERROR,
                                  //        _URC_FATAL_PHASE2_ERROR,
@@ -557,7 +607,23 @@ set_registers(_Unwind_Exception* unwind_exception, _Unwind_Context* context,
                 reinterpret_cast<uintptr_t>(unwind_exception));
   _Unwind_SetGR(context, __builtin_eh_return_data_regno(1),
                 static_cast<uintptr_t>(results.ttypeIndex));
+#if __has_feature(ptrauth_calls)
+  auto stackPointer = _Unwind_GetGR(context, UNW_REG_SP);
+  // We manually re-sign the IP as the __ptrauth qualifiers cannot
+  // express the required relationship with the destination address
+  const auto existingDiscriminator =
+      ptrauth_blend_discriminator(&results.landingPad,
+                                  __ptrauth_scan_results_landingpad_disc);
+  unw_word_t newIP /* opaque __ptrauth(ptrauth_key_return_address, stackPointer, 0) */ =
+      (unw_word_t)ptrauth_auth_and_resign(*(void* const*)&results.landingPad,
+                                          __ptrauth_scan_results_landingpad_key,
+                                          existingDiscriminator,
+                                          ptrauth_key_return_address,
+                                          stackPointer);
+  _Unwind_SetIP(context, newIP);
+#else
   _Unwind_SetIP(context, results.landingPad);
+#endif
 }
 
 /*
@@ -691,12 +757,12 @@ static void scan_eh_tab(scan_results &results, _Unwind_Action actions,
         // The call sites are ordered in increasing value of start
         uintptr_t start = readEncodedPointer(&callSitePtr, callSiteEncoding);
         uintptr_t length = readEncodedPointer(&callSitePtr, callSiteEncoding);
-        uintptr_t landingPad = readEncodedPointer(&callSitePtr, callSiteEncoding);
+        landing_pad_t landingPad = readEncodedPointer(&callSitePtr, callSiteEncoding);
         uintptr_t actionEntry = readULEB128(&callSitePtr);
         if ((start <= ipOffset) && (ipOffset < (start + length)))
 #else  // __USING_SJLJ_EXCEPTIONS__ || __WASM_EXCEPTIONS__
         // ip is 1-based index into this table
-        uintptr_t landingPad = readULEB128(&callSitePtr);
+        landing_pad_t landingPad = readULEB128(&callSitePtr);
         uintptr_t actionEntry = readULEB128(&callSitePtr);
         if (--ip == 0)
 #endif // __USING_SJLJ_EXCEPTIONS__ || __WASM_EXCEPTIONS__
@@ -903,6 +969,57 @@ _UA_CLEANUP_PHASE
 */
 
 #if !defined(_LIBCXXABI_ARM_EHABI)
+
+// We use these helper functions to work around the behavior of casting between
+// integers (even those that are authenticated) and authenticated pointers.
+// Because the schemas being used are address discriminated we cannot use a
+// trivial value union to coerce the types so instead we perform the re-signing
+// manually.
+using __cxa_catch_temp_type = decltype(__cxa_exception::catchTemp);
+static inline void set_landing_pad(scan_results& results,
+                                   const __cxa_catch_temp_type& source) {
+#if __has_feature(ptrauth_calls)
+  const uintptr_t sourceDiscriminator =
+      ptrauth_blend_discriminator(&source, __ptrauth_cxxabi_catch_temp_disc);
+  const uintptr_t targetDiscriminator =
+      ptrauth_blend_discriminator(&results.landingPad,
+                                  __ptrauth_scan_results_landingpad_disc);
+  uintptr_t reauthenticatedLandingPad =
+      (uintptr_t)ptrauth_auth_and_resign(*reinterpret_cast<void* const*>(&source),
+                                         __ptrauth_cxxabi_catch_temp_key,
+                                         sourceDiscriminator,
+                                         __ptrauth_scan_results_landingpad_key,
+                                         targetDiscriminator);
+  memmove(reinterpret_cast<void *>(&results.landingPad),
+          reinterpret_cast<void *>(&reauthenticatedLandingPad),
+          sizeof(reauthenticatedLandingPad));
+#else
+  results.landingPad = reinterpret_cast<landing_pad_t>(source);
+#endif
+}
+
+static inline void get_landing_pad(__cxa_catch_temp_type &dest,
+                                   const scan_results &results) {
+#if __has_feature(ptrauth_calls)
+  const uintptr_t sourceDiscriminator =
+      ptrauth_blend_discriminator(&results.landingPad,
+                                  __ptrauth_scan_results_landingpad_disc);
+  const uintptr_t targetDiscriminator =
+      ptrauth_blend_discriminator(&dest, __ptrauth_cxxabi_catch_temp_disc);
+  uintptr_t reauthenticatedPointer =
+      (uintptr_t)ptrauth_auth_and_resign(*reinterpret_cast<void* const*>(&results.landingPad),
+                                         __ptrauth_scan_results_landingpad_key,
+                                         sourceDiscriminator,
+                                         __ptrauth_cxxabi_catch_temp_key,
+                                         targetDiscriminator);
+  memmove(reinterpret_cast<void *>(&dest),
+          reinterpret_cast<void *>(&reauthenticatedPointer),
+          sizeof(reauthenticatedPointer));
+#else
+  dest = reinterpret_cast<__cxa_catch_temp_type>(results.landingPad);
+#endif
+}
+
 #ifdef __WASM_EXCEPTIONS__
 _Unwind_Reason_Code __gxx_personality_wasm0
 #elif defined(__SEH__) && !defined(__USING_SJLJ_EXCEPTIONS__)
@@ -935,8 +1052,7 @@ __gxx_personality_v0
         results.ttypeIndex = exception_header->handlerSwitchValue;
         results.actionRecord = exception_header->actionRecord;
         results.languageSpecificData = exception_header->languageSpecificData;
-        results.landingPad =
-            reinterpret_cast<uintptr_t>(exception_header->catchTemp);
+        set_landing_pad(results, exception_header->catchTemp);
         results.adjustedPtr = exception_header->adjustedPtr;
 
         // Jump to the handler.
@@ -970,7 +1086,7 @@ __gxx_personality_v0
             exc->handlerSwitchValue = static_cast<int>(results.ttypeIndex);
             exc->actionRecord = results.actionRecord;
             exc->languageSpecificData = results.languageSpecificData;
-            exc->catchTemp = reinterpret_cast<void*>(results.landingPad);
+            get_landing_pad(exc->catchTemp, results);
             exc->adjustedPtr = results.adjustedPtr;
 #ifdef __WASM_EXCEPTIONS__
             // Wasm only uses a single phase (_UA_SEARCH_PHASE), so save the
@@ -1009,9 +1125,6 @@ __gxx_personality_seh0(PEXCEPTION_RECORD ms_exc, void *this_frame,
 
 #else
 
-extern "C" _Unwind_Reason_Code __gnu_unwind_frame(_Unwind_Exception*,
-                                                  _Unwind_Context*);
-
 // Helper function to unwind one frame.
 // ARM EHABI 7.3 and 7.4: If the personality function returns _URC_CONTINUE_UNWIND, the
 // personality routine should update the virtual register set (VRS) according to the
diff --git a/lib/libcxxabi/src/cxa_thread_atexit.cpp b/lib/libcxxabi/src/cxa_thread_atexit.cpp
index 8546cfe48c..402a52c741 100644
--- a/lib/libcxxabi/src/cxa_thread_atexit.cpp
+++ b/lib/libcxxabi/src/cxa_thread_atexit.cpp
@@ -106,6 +106,7 @@ namespace {
 
 #endif // HAVE___CXA_THREAD_ATEXIT_IMPL
 
+#if defined(__linux__) || defined(__Fuchsia__)
 extern "C" {
 
   _LIBCXXABI_FUNC_VIS int __cxa_thread_atexit(Dtor dtor, void* obj, void* dso_symbol) throw() {
@@ -140,6 +141,6 @@ extern "C" {
     }
 #endif // HAVE___CXA_THREAD_ATEXIT_IMPL
   }
-
 } // extern "C"
+#endif // defined(__linux__) || defined(__Fuchsia__)
 } // namespace __cxxabiv1
diff --git a/lib/libcxxabi/src/demangle/DemangleConfig.h b/lib/libcxxabi/src/demangle/DemangleConfig.h
index 7904e9d1eb..79dbeb89cc 100644
--- a/lib/libcxxabi/src/demangle/DemangleConfig.h
+++ b/lib/libcxxabi/src/demangle/DemangleConfig.h
@@ -115,4 +115,8 @@
 #define DEMANGLE_NAMESPACE_BEGIN namespace { namespace itanium_demangle {
 #define DEMANGLE_NAMESPACE_END } }
 
+// The DEMANGLE_ABI macro resolves to nothing when building libc++abi. Only
+// the llvm copy defines DEMANGLE_ABI as a visibility attribute.
+#define DEMANGLE_ABI
+
 #endif // LIBCXXABI_DEMANGLE_DEMANGLE_CONFIG_H
diff --git a/lib/libcxxabi/src/demangle/ItaniumDemangle.h b/lib/libcxxabi/src/demangle/ItaniumDemangle.h
index b306b20134..b999438ff2 100644
--- a/lib/libcxxabi/src/demangle/ItaniumDemangle.h
+++ b/lib/libcxxabi/src/demangle/ItaniumDemangle.h
@@ -1366,7 +1366,7 @@ public:
   template <typename Fn> void match(Fn F) const { F(Name, Params, Requires); }
 
   void printLeft(OutputBuffer &OB) const override {
-    ScopedOverride<unsigned> LT(OB.GtIsGt, 0);
+    ScopedOverride<bool> LT(OB.TemplateTracker.InsideTemplate, true);
     OB += "template<";
     Params.printWithComma(OB);
     OB += "> typename ";
@@ -1550,7 +1550,7 @@ public:
   NodeArray getParams() { return Params; }
 
   void printLeft(OutputBuffer &OB) const override {
-    ScopedOverride<unsigned> LT(OB.GtIsGt, 0);
+    ScopedOverride<bool> LT(OB.TemplateTracker.InsideTemplate, true);
     OB += "<";
     Params.printWithComma(OB);
     OB += ">";
@@ -1824,7 +1824,7 @@ public:
 
   void printDeclarator(OutputBuffer &OB) const {
     if (!TemplateParams.empty()) {
-      ScopedOverride<unsigned> LT(OB.GtIsGt, 0);
+      ScopedOverride<bool> LT(OB.TemplateTracker.InsideTemplate, true);
       OB += "<";
       TemplateParams.printWithComma(OB);
       OB += ">";
@@ -1885,7 +1885,9 @@ public:
   }
 
   void printLeft(OutputBuffer &OB) const override {
-    bool ParenAll = OB.isGtInsideTemplateArgs() &&
+    // If we're printing a '<' inside of a template argument, and we haven't
+    // yet parenthesized the expression, do so now.
+    bool ParenAll = !OB.isInParensInTemplateArgs() &&
                     (InfixOperator == ">" || InfixOperator == ">>");
     if (ParenAll)
       OB.printOpen();
@@ -2061,7 +2063,7 @@ public:
   void printLeft(OutputBuffer &OB) const override {
     OB += CastKind;
     {
-      ScopedOverride<unsigned> LT(OB.GtIsGt, 0);
+      ScopedOverride<bool> LT(OB.TemplateTracker.InsideTemplate, true);
       OB += "<";
       OB.printLeft(*To);
       OB += ">";
@@ -3049,7 +3051,8 @@ template <typename Derived, typename Alloc> struct AbstractManglingParser {
   Node *parse(bool ParseParams = true);
 };
 
-const char* parse_discriminator(const char* first, const char* last);
+DEMANGLE_ABI const char *parse_discriminator(const char *first,
+                                             const char *last);
 
 // <name> ::= <nested-name> // N
 //        ::= <local-name> # See Scope Encoding below  // Z
diff --git a/lib/libcxxabi/src/demangle/Utility.h b/lib/libcxxabi/src/demangle/Utility.h
index 8829f3fa13..df5b54dca4 100644
--- a/lib/libcxxabi/src/demangle/Utility.h
+++ b/lib/libcxxabi/src/demangle/Utility.h
@@ -81,7 +81,7 @@ public:
   OutputBuffer(const OutputBuffer &) = delete;
   OutputBuffer &operator=(const OutputBuffer &) = delete;
 
-  virtual ~OutputBuffer() {}
+  virtual ~OutputBuffer() = default;
 
   operator std::string_view() const {
     return std::string_view(Buffer, CurrentPosition);
@@ -104,18 +104,32 @@ public:
   unsigned CurrentPackIndex = std::numeric_limits<unsigned>::max();
   unsigned CurrentPackMax = std::numeric_limits<unsigned>::max();
 
-  /// When zero, we're printing template args and '>' needs to be parenthesized.
-  /// Use a counter so we can simply increment inside parentheses.
-  unsigned GtIsGt = 1;
+  struct {
+    /// The depth of '(' and ')' inside the currently printed template
+    /// arguments.
+    unsigned ParenDepth = 0;
 
-  bool isGtInsideTemplateArgs() const { return GtIsGt == 0; }
+    /// True if we're currently printing a template argument.
+    bool InsideTemplate = false;
+  } TemplateTracker;
+
+  /// Returns true if we're currently between a '(' and ')' when printing
+  /// template args.
+  bool isInParensInTemplateArgs() const {
+    return TemplateTracker.ParenDepth > 0;
+  }
+
+  /// Returns true if we're printing template args.
+  bool isInsideTemplateArgs() const { return TemplateTracker.InsideTemplate; }
 
   void printOpen(char Open = '(') {
-    GtIsGt++;
+    if (isInsideTemplateArgs())
+      TemplateTracker.ParenDepth++;
     *this += Open;
   }
   void printClose(char Close = ')') {
-    GtIsGt--;
+    if (isInsideTemplateArgs())
+      TemplateTracker.ParenDepth--;
     *this += Close;
   }
 
diff --git a/lib/libcxxabi/src/fallback_malloc.cpp b/lib/libcxxabi/src/fallback_malloc.cpp
index 75788fe9be..6a261e6f00 100644
--- a/lib/libcxxabi/src/fallback_malloc.cpp
+++ b/lib/libcxxabi/src/fallback_malloc.cpp
@@ -16,7 +16,7 @@
 #endif
 #endif
 
-#include <__memory/aligned_alloc.h>
+#include "include/aligned_alloc.h" // from libc++
 #include <__assert>
 #include <stdlib.h> // for malloc, calloc, free
 #include <string.h> // for memset
diff --git a/lib/libcxxabi/src/private_typeinfo.cpp b/lib/libcxxabi/src/private_typeinfo.cpp
index 01a1d2603b..d185f2618a 100644
--- a/lib/libcxxabi/src/private_typeinfo.cpp
+++ b/lib/libcxxabi/src/private_typeinfo.cpp
@@ -831,6 +831,10 @@ bool __pointer_to_member_type_info::can_catch_nested(
 #pragma clang diagnostic ignored "-Wmissing-field-initializers"
 #endif
 
+#pragma GCC diagnostic push
+// __dynamic_cast is called by the compiler, so there is no prototype
+#pragma GCC diagnostic ignored "-Wmissing-prototypes"
+
 // __dynamic_cast
 
 // static_ptr: pointer to an object of type static_type; nonnull, and since the
@@ -953,6 +957,8 @@ __dynamic_cast(const void *static_ptr, const __class_type_info *static_type,
     return const_cast<void*>(dst_ptr);
 }
 
+#pragma GCC diagnostic pop
+
 #ifdef __clang__
 #pragma clang diagnostic pop
 #endif
diff --git a/lib/libcxxabi/src/stdlib_new_delete.cpp b/lib/libcxxabi/src/stdlib_new_delete.cpp
index b5ed59958d..783357c435 100644
--- a/lib/libcxxabi/src/stdlib_new_delete.cpp
+++ b/lib/libcxxabi/src/stdlib_new_delete.cpp
@@ -8,8 +8,8 @@
 
 #include "__cxxabi_config.h"
 #include "abort_message.h"
+#include "include/aligned_alloc.h"        // from libc++
 #include "include/overridable_function.h" // from libc++
-#include <__memory/aligned_alloc.h>
 #include <cstddef>
 #include <cstdlib>
 #include <new>
@@ -63,7 +63,7 @@ static void* operator_new_impl(std::size_t size) {
   return p;
 }
 
-_LIBCPP_OVERRIDABLE_FUNCTION(void*, operator new, (std::size_t size)) _THROW_BAD_ALLOC {
+OVERRIDABLE_FUNCTION void* operator new(std::size_t size) _THROW_BAD_ALLOC {
   void* p = operator_new_impl(size);
   if (p == nullptr)
     __throw_bad_alloc_shim();
@@ -94,7 +94,7 @@ _LIBCPP_WEAK void* operator new(size_t size, const std::nothrow_t&) noexcept {
 #endif
 }
 
-_LIBCPP_OVERRIDABLE_FUNCTION(void*, operator new[], (size_t size)) _THROW_BAD_ALLOC { return ::operator new(size); }
+OVERRIDABLE_FUNCTION void* operator new[](size_t size) _THROW_BAD_ALLOC { return ::operator new(size); }
 
 _LIBCPP_WEAK void* operator new[](size_t size, const std::nothrow_t&) noexcept {
 #if !_LIBCPP_HAS_EXCEPTIONS
@@ -154,7 +154,7 @@ static void* operator_new_aligned_impl(std::size_t size, std::align_val_t alignm
   return p;
 }
 
-_LIBCPP_OVERRIDABLE_FUNCTION(void*, operator new, (std::size_t size, std::align_val_t alignment)) _THROW_BAD_ALLOC {
+OVERRIDABLE_FUNCTION void* operator new(std::size_t size, std::align_val_t alignment) _THROW_BAD_ALLOC {
   void* p = operator_new_aligned_impl(size, alignment);
   if (p == nullptr)
     __throw_bad_alloc_shim();
@@ -185,7 +185,7 @@ _LIBCPP_WEAK void* operator new(size_t size, std::align_val_t alignment, const s
 #  endif
 }
 
-_LIBCPP_OVERRIDABLE_FUNCTION(void*, operator new[], (size_t size, std::align_val_t alignment)) _THROW_BAD_ALLOC {
+OVERRIDABLE_FUNCTION void* operator new[](size_t size, std::align_val_t alignment) _THROW_BAD_ALLOC {
   return ::operator new(size, alignment);
 }
 
diff --git a/lib/libtsan/LICENSE.TXT b/lib/libtsan/LICENSE.TXT
new file mode 100644
index 0000000000..5a79a1b9d5
--- /dev/null
+++ b/lib/libtsan/LICENSE.TXT
@@ -0,0 +1,311 @@
+==============================================================================
+The LLVM Project is under the Apache License v2.0 with LLVM Exceptions:
+==============================================================================
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+    1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+    2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+    3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+    4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+    5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+    6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+    7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+    8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+    9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+    END OF TERMS AND CONDITIONS
+
+    APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+    Copyright [yyyy] [name of copyright owner]
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+
+
+---- LLVM Exceptions to the Apache 2.0 License ----
+
+As an exception, if, as a result of your compiling your source code, portions
+of this Software are embedded into an Object form of such source code, you
+may redistribute such embedded portions in such Object form without complying
+with the conditions of Sections 4(a), 4(b) and 4(d) of the License.
+
+In addition, if you combine or link compiled forms of this Software with
+software that is licensed under the GPLv2 ("Combined Software") and if a
+court of competent jurisdiction determines that the patent provision (Section
+3), the indemnity provision (Section 9) or other Section of the License
+conflicts with the conditions of the GPLv2, you may retroactively and
+prospectively choose to deem waived or otherwise exclude such Section(s) of
+the License, but only in their entirety and only with respect to the Combined
+Software.
+
+==============================================================================
+Software from third parties included in the LLVM Project:
+==============================================================================
+The LLVM Project contains third party software which is under different license
+terms. All such code will be identified clearly using at least one of two
+mechanisms:
+1) It will be in a separate directory tree with its own `LICENSE.txt` or
+   `LICENSE` file at the top containing the specific license and restrictions
+   which apply to that software, or
+2) It will contain specific license and restriction terms at the top of every
+   file.
+
+==============================================================================
+Legacy LLVM License (https://llvm.org/docs/DeveloperPolicy.html#legacy):
+==============================================================================
+
+The compiler_rt library is dual licensed under both the University of Illinois
+"BSD-Like" license and the MIT license.  As a user of this code you may choose
+to use it under either license.  As a contributor, you agree to allow your code
+to be used under both.
+
+Full text of the relevant licenses is included below.
+
+==============================================================================
+
+University of Illinois/NCSA
+Open Source License
+
+Copyright (c) 2009-2019 by the contributors listed in CREDITS.TXT
+
+All rights reserved.
+
+Developed by:
+
+    LLVM Team
+
+    University of Illinois at Urbana-Champaign
+
+    http://llvm.org
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal with
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimers.
+
+    * Redistributions in binary form must reproduce the above copyright notice,
+      this list of conditions and the following disclaimers in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the names of the LLVM Team, University of Illinois at
+      Urbana-Champaign, nor the names of its contributors may be used to
+      endorse or promote products derived from this Software without specific
+      prior written permission.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
+SOFTWARE.
+
+==============================================================================
+
+Copyright (c) 2009-2015 by the contributors listed in CREDITS.TXT
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/lib/libtsan/builtins/assembly.h b/lib/libtsan/builtins/assembly.h
index 89372f18c8..2eddbf468c 100644
--- a/lib/libtsan/builtins/assembly.h
+++ b/lib/libtsan/builtins/assembly.h
@@ -14,7 +14,7 @@
 #ifndef COMPILERRT_ASSEMBLY_H
 #define COMPILERRT_ASSEMBLY_H
 
-#if defined(__linux__) && defined(__CET__)
+#ifdef __CET__
 #if __has_include(<cet.h>)
 #include <cet.h>
 #endif
@@ -71,19 +71,35 @@
 
 #endif
 
+#if defined(__aarch64__) && defined(__ELF__) &&                                \
+    defined(COMPILER_RT_EXECUTE_ONLY_CODE)
+// The assembler always creates an implicit '.text' section with default flags
+// (SHF_ALLOC | SHF_EXECINSTR), which is incompatible with the execute-only
+// '.text' section we want to create here because of the missing
+// SHF_AARCH64_PURECODE section flag. To solve this, we use 'unique,0' to
+// differentiate the two sections. The output will therefore have two separate
+// sections named '.text', where code will be placed into the execute-only
+// '.text' section, and the implicitly-created one will be empty.
+#define TEXT_SECTION                                                           \
+  .section .text,"axy",@progbits,unique,0
+#else
+#define TEXT_SECTION                                                           \
+  .text
+#endif
+
 #if defined(__arm__) || defined(__aarch64__) || defined(__arm64ec__)
 #define FUNC_ALIGN                                                             \
-  .text SEPARATOR                                                              \
   .balign 16 SEPARATOR
 #else
 #define FUNC_ALIGN
 #endif
 
-// BTI and PAC gnu property note
+// BTI, PAC, and GCS gnu property note
 #define NT_GNU_PROPERTY_TYPE_0 5
 #define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000
 #define GNU_PROPERTY_AARCH64_FEATURE_1_BTI 1
 #define GNU_PROPERTY_AARCH64_FEATURE_1_PAC 2
+#define GNU_PROPERTY_AARCH64_FEATURE_1_GCS 4
 
 #if defined(__ARM_FEATURE_BTI_DEFAULT)
 #define BTI_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_BTI
@@ -97,6 +113,12 @@
 #define PAC_FLAG 0
 #endif
 
+#if defined(__ARM_FEATURE_GCS_DEFAULT)
+#define GCS_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_GCS
+#else
+#define GCS_FLAG 0
+#endif
+
 #define GNU_PROPERTY(type, value)                                              \
   .pushsection .note.gnu.property, "a" SEPARATOR                               \
   .p2align 3 SEPARATOR                                                         \
@@ -118,11 +140,12 @@
 #define BTI_J
 #endif
 
-#if (BTI_FLAG | PAC_FLAG) != 0
-#define GNU_PROPERTY_BTI_PAC                                                   \
-  GNU_PROPERTY(GNU_PROPERTY_AARCH64_FEATURE_1_AND, BTI_FLAG | PAC_FLAG)
+#if (BTI_FLAG | PAC_FLAG | GCS_FLAG) != 0
+#define GNU_PROPERTY_BTI_PAC_GCS                                               \
+  GNU_PROPERTY(GNU_PROPERTY_AARCH64_FEATURE_1_AND,                             \
+               BTI_FLAG | PAC_FLAG | GCS_FLAG)
 #else
-#define GNU_PROPERTY_BTI_PAC
+#define GNU_PROPERTY_BTI_PAC_GCS
 #endif
 
 #if defined(__clang__) || defined(__GCC_HAVE_DWARF2_CFI_ASM)
@@ -247,6 +270,7 @@
 #endif
 
 #define DEFINE_COMPILERRT_FUNCTION(name)                                       \
+  TEXT_SECTION SEPARATOR                                                       \
   DEFINE_CODE_STATE                                                            \
   FILE_LEVEL_DIRECTIVE SEPARATOR                                               \
   .globl FUNC_SYMBOL(SYMBOL_NAME(name)) SEPARATOR                              \
@@ -256,6 +280,7 @@
   FUNC_SYMBOL(SYMBOL_NAME(name)):
 
 #define DEFINE_COMPILERRT_THUMB_FUNCTION(name)                                 \
+  TEXT_SECTION SEPARATOR                                                       \
   DEFINE_CODE_STATE                                                            \
   FILE_LEVEL_DIRECTIVE SEPARATOR                                               \
   .globl FUNC_SYMBOL(SYMBOL_NAME(name)) SEPARATOR                              \
@@ -265,6 +290,7 @@
   FUNC_SYMBOL(SYMBOL_NAME(name)):
 
 #define DEFINE_COMPILERRT_PRIVATE_FUNCTION(name)                               \
+  TEXT_SECTION SEPARATOR                                                       \
   DEFINE_CODE_STATE                                                            \
   FILE_LEVEL_DIRECTIVE SEPARATOR                                               \
   .globl FUNC_SYMBOL(SYMBOL_NAME(name)) SEPARATOR                              \
@@ -274,6 +300,7 @@
   FUNC_SYMBOL(SYMBOL_NAME(name)):
 
 #define DEFINE_COMPILERRT_PRIVATE_FUNCTION_UNMANGLED(name)                     \
+  TEXT_SECTION SEPARATOR                                                       \
   DEFINE_CODE_STATE                                                            \
   .globl FUNC_SYMBOL(name) SEPARATOR                                           \
   SYMBOL_IS_FUNC(name) SEPARATOR                                               \
@@ -282,6 +309,7 @@
   FUNC_SYMBOL(name):
 
 #define DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(name)                     \
+  TEXT_SECTION SEPARATOR                                                       \
   DEFINE_CODE_STATE                                                            \
   FUNC_ALIGN                                                                   \
   .globl FUNC_SYMBOL(name) SEPARATOR                                           \
@@ -296,7 +324,7 @@
   .globl FUNC_SYMBOL(SYMBOL_NAME(name)) SEPARATOR                              \
   SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR                                  \
   DECLARE_SYMBOL_VISIBILITY(name) SEPARATOR                                    \
-  .set FUNC_SYMBOL(SYMBOL_NAME(name)), FUNC_SYMBOL(target) SEPARATOR
+  .set FUNC_SYMBOL(SYMBOL_NAME(name)), FUNC_SYMBOL(SYMBOL_NAME(target)) SEPARATOR
 
 #if defined(__ARM_EABI__)
 #define DEFINE_AEABI_FUNCTION_ALIAS(aeabi_name, name)                          \
@@ -329,4 +357,9 @@
 #endif
 #endif
 
+#if defined(__ASSEMBLER__) && (defined(__i386__) || defined(__amd64__)) &&     \
+    !defined(__arm64ec__)
+.att_syntax
+#endif
+
 #endif // COMPILERRT_ASSEMBLY_H
diff --git a/lib/libtsan/interception/interception_win.cpp b/lib/libtsan/interception/interception_win.cpp
index 246a22c56c..8568724251 100644
--- a/lib/libtsan/interception/interception_win.cpp
+++ b/lib/libtsan/interception/interception_win.cpp
@@ -646,6 +646,7 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
     case 0xC033:  // 33 C0 : xor eax, eax
     case 0xC933:  // 33 C9 : xor ecx, ecx
     case 0xD233:  // 33 D2 : xor edx, edx
+    case 0xFF33:  // 33 FF : xor edi, edi
     case 0x9066:  // 66 90 : xchg %ax,%ax (Two-byte NOP)
     case 0xDB84:  // 84 DB : test bl,bl
     case 0xC084:  // 84 C0 : test al,al
@@ -764,6 +765,7 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
 
   switch (0x00FFFFFF & *(u32 *)address) {
     case 0x10b70f:    // 0f b7 10 : movzx edx, WORD PTR [rax]
+    case 0x02b70f:    // 0f b7 02 : movzx eax, WORD PTR [rdx]
     case 0xc00b4d:    // 4d 0b c0 : or r8, r8
     case 0xc03345:    // 45 33 c0 : xor r8d, r8d
     case 0xc08548:    // 48 85 c0 : test rax, rax
@@ -799,6 +801,7 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
     case 0xc9854d:    // 4d 85 c9 : test r9, r9
     case 0xc98b4c:    // 4c 8b c9 : mov r9, rcx
     case 0xd12948:    // 48 29 d1 : sub rcx, rdx
+    case 0xc22b4c:    // 4c 2b c2 : sub r8, rdx
     case 0xca2b48:    // 48 2b ca : sub rcx, rdx
     case 0xca3b48:    // 48 3b ca : cmp rcx, rdx
     case 0xd12b48:    // 48 2b d1 : sub rdx, rcx
@@ -813,6 +816,7 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
     case 0xd9f748:    // 48 f7 d9 : neg rcx
     case 0xc03145:    // 45 31 c0 : xor r8d,r8d
     case 0xc93145:    // 45 31 c9 : xor r9d,r9d
+    case 0xd23345:    // 45 33 d2 : xor r10d, r10d
     case 0xdb3345:    // 45 33 db : xor r11d, r11d
     case 0xc08445:    // 45 84 c0 : test r8b,r8b
     case 0xd28445:    // 45 84 d2 : test r10b,r10b
diff --git a/lib/libtsan/sanitizer_common/sanitizer_allocator_primary32.h b/lib/libtsan/sanitizer_common/sanitizer_allocator_primary32.h
index 602b197c42..0faf9b3c15 100644
--- a/lib/libtsan/sanitizer_common/sanitizer_allocator_primary32.h
+++ b/lib/libtsan/sanitizer_common/sanitizer_allocator_primary32.h
@@ -288,6 +288,7 @@ class SizeClassAllocator32 {
   uptr ComputeRegionId(uptr mem) const {
     if (SANITIZER_SIGN_EXTENDED_ADDRESSES)
       mem &= (kSpaceSize - 1);
+    mem -= kSpaceBeg;
     const uptr res = mem >> kRegionSizeLog;
     CHECK_LT(res, kNumPossibleRegions);
     return res;
diff --git a/lib/libtsan/sanitizer_common/sanitizer_allocator_primary64.h b/lib/libtsan/sanitizer_common/sanitizer_allocator_primary64.h
index 51ac1b6ae4..b39eb1538c 100644
--- a/lib/libtsan/sanitizer_common/sanitizer_allocator_primary64.h
+++ b/lib/libtsan/sanitizer_common/sanitizer_allocator_primary64.h
@@ -113,6 +113,24 @@ class SizeClassAllocator64 {
   // ~(uptr)0.
   void Init(s32 release_to_os_interval_ms, uptr heap_start = 0) {
     uptr TotalSpaceSize = kSpaceSize + AdditionalSize();
+
+    uptr MaxAddr = GetMaxUserVirtualAddress();
+    // VReport does not call the sanitizer allocator.
+    VReport(3, "Max user virtual address: 0x%zx\n", MaxAddr);
+    VReport(3, "Total space size for primary allocator: 0x%zx\n",
+            TotalSpaceSize);
+    // TODO: revise the check if we ever configure sanitizers to deliberately
+    //       map beyond the 2**48 barrier (note that Linux pretends the VMA is
+    //       limited to 48-bit for backwards compatibility, but allows apps to
+    //       explicitly specify an address beyond that).
+    if (heap_start + TotalSpaceSize >= MaxAddr) {
+      // We can't easily adjust the requested heap size, because kSpaceSize is
+      // const (for optimization) and used throughout the code.
+      VReport(0, "Error: heap size %zx exceeds max user virtual address %zx\n",
+              TotalSpaceSize, MaxAddr);
+      VReport(
+          0, "Try using a kernel that allows a larger virtual address space\n");
+    }
     PremappedHeap = heap_start != 0;
     if (PremappedHeap) {
       CHECK(!kUsingConstantSpaceBeg);
diff --git a/lib/libtsan/sanitizer_common/sanitizer_common.h b/lib/libtsan/sanitizer_common/sanitizer_common.h
index 120c2861c1..515a7c9cdf 100644
--- a/lib/libtsan/sanitizer_common/sanitizer_common.h
+++ b/lib/libtsan/sanitizer_common/sanitizer_common.h
@@ -78,8 +78,8 @@ uptr GetMmapGranularity();
 uptr GetMaxVirtualAddress();
 uptr GetMaxUserVirtualAddress();
 // Threads
-tid_t GetTid();
-int TgKill(pid_t pid, tid_t tid, int sig);
+ThreadID GetTid();
+int TgKill(pid_t pid, ThreadID tid, int sig);
 uptr GetThreadSelf();
 void GetThreadStackTopAndBottom(bool at_initialization, uptr *stack_top,
                                 uptr *stack_bottom);
@@ -390,6 +390,9 @@ void ReportDeadlySignal(const SignalContext &sig, u32 tid,
 void SetAlternateSignalStack();
 void UnsetAlternateSignalStack();
 
+bool IsSignalHandlerFromSanitizer(int signum);
+bool SetSignalHandlerFromSanitizer(int signum, bool new_state);
+
 // Construct a one-line string:
 //   SUMMARY: SanitizerToolName: error_message
 // and pass it to __sanitizer_report_error_summary.
@@ -484,6 +487,13 @@ inline uptr Log2(uptr x) {
   return LeastSignificantSetBitIndex(x);
 }
 
+inline bool IntervalsAreSeparate(uptr start1, uptr end1, uptr start2,
+                                 uptr end2) {
+  CHECK_LE(start1, end1);
+  CHECK_LE(start2, end2);
+  return (end1 < start2) || (end2 < start1);
+}
+
 // Don't use std::min, std::max or std::swap, to minimize dependency
 // on libstdc++.
 template <class T>
@@ -734,6 +744,7 @@ enum ModuleArch {
   kModuleArchARMV7S,
   kModuleArchARMV7K,
   kModuleArchARM64,
+  kModuleArchARM64E,
   kModuleArchLoongArch64,
   kModuleArchRISCV64,
   kModuleArchHexagon
@@ -807,6 +818,8 @@ inline const char *ModuleArchToString(ModuleArch arch) {
       return "armv7k";
     case kModuleArchARM64:
       return "arm64";
+    case kModuleArchARM64E:
+      return "arm64e";
     case kModuleArchLoongArch64:
       return "loongarch64";
     case kModuleArchRISCV64:
diff --git a/lib/libtsan/sanitizer_common/sanitizer_common_interceptors.inc b/lib/libtsan/sanitizer_common/sanitizer_common_interceptors.inc
index 2d6cf7fc32..b10ce7fa44 100644
--- a/lib/libtsan/sanitizer_common/sanitizer_common_interceptors.inc
+++ b/lib/libtsan/sanitizer_common/sanitizer_common_interceptors.inc
@@ -1285,8 +1285,34 @@ INTERCEPTOR(int, puts, char *s) {
 #endif
 
 #if SANITIZER_INTERCEPT_PRCTL
-INTERCEPTOR(int, prctl, int option, unsigned long arg2, unsigned long arg3,
-            unsigned long arg4, unsigned long arg5) {
+
+#  if defined(__aarch64__)
+// https://llvm.org/docs/PointerAuth.html
+// AArch64 is currently the only architecture with full PAC support.
+// Avoid adding PAC instructions to prevent crashes caused by
+// prctl(PR_PAC_RESET_KEYS, ...). Since PR_PAC_RESET_KEYS resets the
+// authentication key, using the old key afterward will lead to a crash.
+
+#    if defined(__ARM_FEATURE_BTI_DEFAULT)
+#      define BRANCH_PROTECTION_ATTRIBUTE \
+        __attribute__((target("branch-protection=bti")))
+#    else
+#      define BRANCH_PROTECTION_ATTRIBUTE \
+        __attribute__((target("branch-protection=none")))
+#    endif
+
+#    define PRCTL_INTERCEPTOR(ret_type, func, ...)                          \
+      DEFINE_REAL(ret_type, func, __VA_ARGS__)                              \
+      DECLARE_WRAPPER(ret_type, func, __VA_ARGS__)                          \
+      extern "C" INTERCEPTOR_ATTRIBUTE BRANCH_PROTECTION_ATTRIBUTE ret_type \
+      WRAP(func)(__VA_ARGS__)
+
+#  else
+#    define PRCTL_INTERCEPTOR INTERCEPTOR
+#  endif
+
+PRCTL_INTERCEPTOR(int, prctl, int option, unsigned long arg2,
+                  unsigned long arg3, unsigned long arg4, unsigned long arg5) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, prctl, option, arg2, arg3, arg4, arg5);
   static const int PR_SET_NAME = 15;
@@ -1300,7 +1326,7 @@ INTERCEPTOR(int, prctl, int option, unsigned long arg2, unsigned long arg3,
   static const int PR_SET_SECCOMP = 22;
   static const int SECCOMP_MODE_FILTER = 2;
 #  endif
-  if (option == PR_SET_VMA && arg2 == 0UL) {
+  if (option == PR_SET_VMA && arg2 == 0UL && arg5 != 0UL) {
     char *name = (char *)arg5;
     COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
   }
@@ -1326,7 +1352,7 @@ INTERCEPTOR(int, prctl, int option, unsigned long arg2, unsigned long arg3,
   }
   return res;
 }
-#define INIT_PRCTL COMMON_INTERCEPT_FUNCTION(prctl)
+#  define INIT_PRCTL COMMON_INTERCEPT_FUNCTION(prctl)
 #else
 #define INIT_PRCTL
 #endif  // SANITIZER_INTERCEPT_PRCTL
diff --git a/lib/libtsan/sanitizer_common/sanitizer_common_interceptors_ioctl.inc b/lib/libtsan/sanitizer_common/sanitizer_common_interceptors_ioctl.inc
index 08c2be47f5..673f284b6a 100644
--- a/lib/libtsan/sanitizer_common/sanitizer_common_interceptors_ioctl.inc
+++ b/lib/libtsan/sanitizer_common/sanitizer_common_interceptors_ioctl.inc
@@ -344,12 +344,16 @@ static void ioctl_table_fill() {
   _(SOUND_PCM_WRITE_CHANNELS, WRITE, sizeof(int));
   _(SOUND_PCM_WRITE_FILTER, WRITE, sizeof(int));
   _(TCFLSH, NONE, 0);
+#    if SANITIZER_TERMIOS_IOCTL_CONSTANTS
   _(TCGETS, WRITE, struct_termios_sz);
+#    endif
   _(TCSBRK, NONE, 0);
   _(TCSBRKP, NONE, 0);
+#    if SANITIZER_TERMIOS_IOCTL_CONSTANTS
   _(TCSETS, READ, struct_termios_sz);
   _(TCSETSF, READ, struct_termios_sz);
   _(TCSETSW, READ, struct_termios_sz);
+#    endif
   _(TCXONC, NONE, 0);
   _(TIOCGLCKTRMIOS, WRITE, struct_termios_sz);
   _(TIOCGSOFTCAR, WRITE, sizeof(int));
diff --git a/lib/libtsan/sanitizer_common/sanitizer_common_interceptors_vfork_aarch64.inc.S b/lib/libtsan/sanitizer_common/sanitizer_common_interceptors_vfork_aarch64.inc.S
index cdfa6f1d7f..c5c2180e0d 100644
--- a/lib/libtsan/sanitizer_common/sanitizer_common_interceptors_vfork_aarch64.inc.S
+++ b/lib/libtsan/sanitizer_common/sanitizer_common_interceptors_vfork_aarch64.inc.S
@@ -5,6 +5,7 @@
 
 ASM_HIDDEN(COMMON_INTERCEPTOR_SPILL_AREA)
 
+TEXT_SECTION
 .comm _ZN14__interception10real_vforkE,8,8
 .globl ASM_WRAPPER_NAME(vfork)
 ASM_TYPE_FUNCTION(ASM_WRAPPER_NAME(vfork))
@@ -43,6 +44,6 @@ ASM_SIZE(vfork)
 ASM_INTERCEPTOR_TRAMPOLINE(vfork)
 ASM_TRAMPOLINE_ALIAS(vfork, vfork)
 
-GNU_PROPERTY_BTI_PAC
+GNU_PROPERTY_BTI_PAC_GCS
 
 #endif
diff --git a/lib/libtsan/sanitizer_common/sanitizer_common_interceptors_vfork_i386.inc.S b/lib/libtsan/sanitizer_common/sanitizer_common_interceptors_vfork_i386.inc.S
index c633014e2d..5ef090c003 100644
--- a/lib/libtsan/sanitizer_common/sanitizer_common_interceptors_vfork_i386.inc.S
+++ b/lib/libtsan/sanitizer_common/sanitizer_common_interceptors_vfork_i386.inc.S
@@ -2,6 +2,8 @@
 
 #include "sanitizer_common/sanitizer_asm.h"
 
+.att_syntax
+
 .comm _ZN14__interception10real_vforkE,4,4
 .globl ASM_WRAPPER_NAME(vfork)
 ASM_TYPE_FUNCTION(ASM_WRAPPER_NAME(vfork))
diff --git a/lib/libtsan/sanitizer_common/sanitizer_common_interceptors_vfork_x86_64.inc.S b/lib/libtsan/sanitizer_common/sanitizer_common_interceptors_vfork_x86_64.inc.S
index 5500f817ae..9c85407fe0 100644
--- a/lib/libtsan/sanitizer_common/sanitizer_common_interceptors_vfork_x86_64.inc.S
+++ b/lib/libtsan/sanitizer_common/sanitizer_common_interceptors_vfork_x86_64.inc.S
@@ -2,6 +2,8 @@
 
 #include "sanitizer_common/sanitizer_asm.h"
 
+.att_syntax
+
 .comm _ZN14__interception10real_vforkE,8,8
 .globl ASM_WRAPPER_NAME(vfork)
 ASM_TYPE_FUNCTION(ASM_WRAPPER_NAME(vfork))
diff --git a/lib/libtsan/sanitizer_common/sanitizer_common_syscalls.inc b/lib/libtsan/sanitizer_common/sanitizer_common_syscalls.inc
index 521fc116f2..ee3ac723e3 100644
--- a/lib/libtsan/sanitizer_common/sanitizer_common_syscalls.inc
+++ b/lib/libtsan/sanitizer_common/sanitizer_common_syscalls.inc
@@ -143,6 +143,12 @@ struct sanitizer_kernel_sockaddr {
   char sa_data[14];
 };
 
+struct sanitizer_kernel_open_how {
+  u64 flags;
+  u64 mode;
+  u64 resolve;
+};
+
 // Real sigset size is always passed as a syscall argument.
 // Declare it "void" to catch sizeof(kernel_sigset_t).
 typedef void kernel_sigset_t;
@@ -2843,6 +2849,18 @@ PRE_SYSCALL(openat)(long dfd, const void *filename, long flags, long mode) {
 POST_SYSCALL(openat)
 (long res, long dfd, const void *filename, long flags, long mode) {}
 
+PRE_SYSCALL(openat2)(long dfd, const void* filename,
+                     const sanitizer_kernel_open_how* how, uptr howlen) {
+  if (filename)
+    PRE_READ(filename, __sanitizer::internal_strlen((const char*)filename) + 1);
+
+  if (how)
+    PRE_READ(how, howlen);
+}
+
+POST_SYSCALL(openat2)(long res, long dfd, const void* filename,
+                      const sanitizer_kernel_open_how* how, uptr howlen) {}
+
 PRE_SYSCALL(newfstatat)
 (long dfd, const void *filename, void *statbuf, long flag) {
   if (filename)
diff --git a/lib/libtsan/sanitizer_common/sanitizer_coverage_interface.inc b/lib/libtsan/sanitizer_common/sanitizer_coverage_interface.inc
deleted file mode 100644
index 9d36a40270..0000000000
--- a/lib/libtsan/sanitizer_common/sanitizer_coverage_interface.inc
+++ /dev/null
@@ -1,43 +0,0 @@
-//===-- sanitizer_coverage_interface.inc ----------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-// Sanitizer Coverage interface list.
-//===----------------------------------------------------------------------===//
-INTERFACE_FUNCTION(__sanitizer_cov_dump)
-INTERFACE_FUNCTION(__sanitizer_cov_reset)
-INTERFACE_FUNCTION(__sanitizer_dump_coverage)
-INTERFACE_FUNCTION(__sanitizer_dump_trace_pc_guard_coverage)
-INTERFACE_WEAK_FUNCTION(__sancov_default_options)
-INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_cmp)
-INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_cmp1)
-INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_cmp2)
-INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_cmp4)
-INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_cmp8)
-INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_const_cmp1)
-INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_const_cmp2)
-INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_const_cmp4)
-INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_const_cmp8)
-INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_div4)
-INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_div8)
-INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_gep)
-INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_pc_guard)
-INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_pc_guard_init)
-INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_pc_indir)
-INTERFACE_WEAK_FUNCTION(__sanitizer_cov_load1)
-INTERFACE_WEAK_FUNCTION(__sanitizer_cov_load2)
-INTERFACE_WEAK_FUNCTION(__sanitizer_cov_load4)
-INTERFACE_WEAK_FUNCTION(__sanitizer_cov_load8)
-INTERFACE_WEAK_FUNCTION(__sanitizer_cov_load16)
-INTERFACE_WEAK_FUNCTION(__sanitizer_cov_store1)
-INTERFACE_WEAK_FUNCTION(__sanitizer_cov_store2)
-INTERFACE_WEAK_FUNCTION(__sanitizer_cov_store4)
-INTERFACE_WEAK_FUNCTION(__sanitizer_cov_store8)
-INTERFACE_WEAK_FUNCTION(__sanitizer_cov_store16)
-INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_switch)
-INTERFACE_WEAK_FUNCTION(__sanitizer_cov_8bit_counters_init)
-INTERFACE_WEAK_FUNCTION(__sanitizer_cov_bool_flag_init)
-INTERFACE_WEAK_FUNCTION(__sanitizer_cov_pcs_init)
diff --git a/lib/libtsan/sanitizer_common/sanitizer_file.cpp b/lib/libtsan/sanitizer_common/sanitizer_file.cpp
index 9236a458cd..e8f219b941 100644
--- a/lib/libtsan/sanitizer_common/sanitizer_file.cpp
+++ b/lib/libtsan/sanitizer_common/sanitizer_file.cpp
@@ -36,9 +36,17 @@ void RawWrite(const char *buffer) {
 
 void ReportFile::ReopenIfNecessary() {
   mu->CheckLocked();
-  if (fd == kStdoutFd || fd == kStderrFd) return;
-
   uptr pid = internal_getpid();
+  if (fallbackToStderrActive && fd_pid != pid) {
+    // If fallbackToStderrActive is set then we fellback to stderr. If this is a
+    // new process, mark fd as invalid so we attempt to open again.
+    CHECK_EQ(fd, kStderrFd);
+    fd = kInvalidFd;
+    fallbackToStderrActive = false;
+  }
+  if (fd == kStdoutFd || fd == kStderrFd)
+    return;
+
   // If in tracer, use the parent's file.
   if (pid == stoptheworld_tracer_pid)
     pid = stoptheworld_tracer_ppid;
@@ -48,8 +56,7 @@ void ReportFile::ReopenIfNecessary() {
     // process, close it now.
     if (fd_pid == pid)
       return;
-    else
-      CloseFile(fd);
+    CloseFile(fd);
   }
 
   const char *exe_name = GetProcessName();
@@ -65,18 +72,24 @@ void ReportFile::ReopenIfNecessary() {
   error_t err;
   fd = OpenFile(full_path, WrOnly, &err);
   if (fd == kInvalidFd) {
-    const char *ErrorMsgPrefix = "ERROR: Can't open file: ";
+    bool fallback = common_flags()->log_fallback_to_stderr;
+    const char *ErrorMsgPrefix =
+        fallback ? "WARNING: Can't open file, falling back to stderr: "
+                 : "ERROR: Can't open file: ";
     WriteToFile(kStderrFd, ErrorMsgPrefix, internal_strlen(ErrorMsgPrefix));
     WriteToFile(kStderrFd, full_path, internal_strlen(full_path));
     char errmsg[100];
     internal_snprintf(errmsg, sizeof(errmsg), " (reason: %d)\n", err);
     WriteToFile(kStderrFd, errmsg, internal_strlen(errmsg));
-    Die();
+    if (!fallback)
+      Die();
+    fallbackToStderrActive = true;
+    fd = kStderrFd;
   }
   fd_pid = pid;
 }
 
-static void RecursiveCreateParentDirs(char *path) {
+static void RecursiveCreateParentDirs(char *path, fd_t &fd) {
   if (path[0] == '\0')
     return;
   for (int i = 1; path[i] != '\0'; ++i) {
@@ -85,12 +98,19 @@ static void RecursiveCreateParentDirs(char *path) {
       continue;
     path[i] = '\0';
     if (!DirExists(path) && !CreateDir(path)) {
-      const char *ErrorMsgPrefix = "ERROR: Can't create directory: ";
+      bool fallback = common_flags()->log_fallback_to_stderr;
+      const char *ErrorMsgPrefix =
+          fallback ? "WARNING: Can't create directory, falling back to stderr: "
+                   : "ERROR: Can't create directory: ";
       WriteToFile(kStderrFd, ErrorMsgPrefix, internal_strlen(ErrorMsgPrefix));
       WriteToFile(kStderrFd, path, internal_strlen(path));
       const char *ErrorMsgSuffix = "\n";
       WriteToFile(kStderrFd, ErrorMsgSuffix, internal_strlen(ErrorMsgSuffix));
-      Die();
+      if (!fallback)
+        Die();
+      path[i] = save;
+      fd = kStderrFd;
+      return;
     }
     path[i] = save;
   }
@@ -108,6 +128,9 @@ static void ParseAndSetPath(const char *pattern, char *dest,
   CHECK(dest);
   CHECK_GE(dest_size, 1);
   dest[0] = '\0';
+  // Return empty string if empty string was passed
+  if (internal_strlen(pattern) == 0)
+    return;
   uptr next_substr_start_idx = 0;
   for (uptr i = 0; i < internal_strlen(pattern) - 1; i++) {
     if (pattern[i] != '%')
@@ -161,12 +184,17 @@ void ReportFile::SetReportPath(const char *path) {
   if (path) {
     uptr len = internal_strlen(path);
     if (len > sizeof(path_prefix) - 100) {
-      const char *message = "ERROR: Path is too long: ";
+      bool fallback = common_flags()->log_fallback_to_stderr;
+      const char *message =
+          fallback ? "WARNING: Path is too long, falling back to stderr: "
+                   : "ERROR: Path is too long: ";
       WriteToFile(kStderrFd, message, internal_strlen(message));
       WriteToFile(kStderrFd, path, 8);
       message = "...\n";
       WriteToFile(kStderrFd, message, internal_strlen(message));
-      Die();
+      if (!fallback)
+        Die();
+      path = "stderr";
     }
   }
 
@@ -180,7 +208,7 @@ void ReportFile::SetReportPath(const char *path) {
     fd = kStdoutFd;
   } else {
     ParseAndSetPath(path, path_prefix, kMaxPathLength);
-    RecursiveCreateParentDirs(path_prefix);
+    RecursiveCreateParentDirs(path_prefix, fd);
   }
 }
 
diff --git a/lib/libtsan/sanitizer_common/sanitizer_file.h b/lib/libtsan/sanitizer_common/sanitizer_file.h
index bef2c842d9..b3a5fed922 100644
--- a/lib/libtsan/sanitizer_common/sanitizer_file.h
+++ b/lib/libtsan/sanitizer_common/sanitizer_file.h
@@ -43,6 +43,9 @@ struct ReportFile {
   // PID of the process that opened fd. If a fork() occurs,
   // the PID of child will be different from fd_pid.
   uptr fd_pid;
+  // Set to true if the last attempt to open the logfile failed, perhaps due to
+  // permission errors
+  bool fallbackToStderrActive = false;
 
  private:
   void ReopenIfNecessary();
diff --git a/lib/libtsan/sanitizer_common/sanitizer_flags.inc b/lib/libtsan/sanitizer_common/sanitizer_flags.inc
index c1e3530618..5f449907f6 100644
--- a/lib/libtsan/sanitizer_common/sanitizer_flags.inc
+++ b/lib/libtsan/sanitizer_common/sanitizer_flags.inc
@@ -65,6 +65,8 @@ COMMON_FLAG(
     bool, log_to_syslog, (bool)SANITIZER_ANDROID || (bool)SANITIZER_APPLE,
     "Write all sanitizer output to syslog in addition to other means of "
     "logging.")
+COMMON_FLAG(bool, log_fallback_to_stderr, false,
+            "When set, fallback to stderr if we are unable to open log path.")
 COMMON_FLAG(
     int, verbosity, 0,
     "Verbosity level (0 - silent, 1 - a bit of output, 2+ - more output).")
@@ -111,6 +113,11 @@ COMMON_FLAG(HandleSignalMode, handle_sigfpe, kHandleSignalYes,
 COMMON_FLAG(bool, allow_user_segv_handler, true,
             "Deprecated. True has no effect, use handle_sigbus=1. If false, "
             "handle_*=1 will be upgraded to handle_*=2.")
+COMMON_FLAG(bool, cloak_sanitizer_signal_handlers, false,
+            "If set, signal/sigaction will pretend that sanitizers did not "
+            "preinstall any signal handlers. If the user subsequently installs "
+            "a signal handler, this will disable cloaking for the respective "
+            "signal.")
 COMMON_FLAG(bool, use_sigaltstack, true,
             "If set, uses alternate stack for signal handling.")
 COMMON_FLAG(bool, detect_deadlocks, true,
diff --git a/lib/libtsan/sanitizer_common/sanitizer_fuchsia.cpp b/lib/libtsan/sanitizer_common/sanitizer_fuchsia.cpp
index 1ca50eb186..3c61b60802 100644
--- a/lib/libtsan/sanitizer_common/sanitizer_fuchsia.cpp
+++ b/lib/libtsan/sanitizer_common/sanitizer_fuchsia.cpp
@@ -14,6 +14,7 @@
 #include "sanitizer_fuchsia.h"
 #if SANITIZER_FUCHSIA
 
+#  include <limits.h>
 #  include <pthread.h>
 #  include <stdlib.h>
 #  include <unistd.h>
@@ -68,7 +69,7 @@ int internal_dlinfo(void *handle, int request, void *p) { UNIMPLEMENTED(); }
 
 uptr GetThreadSelf() { return reinterpret_cast<uptr>(thrd_current()); }
 
-tid_t GetTid() { return GetThreadSelf(); }
+ThreadID GetTid() { return GetThreadSelf(); }
 
 void Abort() { abort(); }
 
@@ -117,11 +118,37 @@ uptr GetMmapGranularity() { return _zx_system_get_page_size(); }
 
 sanitizer_shadow_bounds_t ShadowBounds;
 
+// Any sanitizer that utilizes shadow should explicitly call whenever it's
+// appropriate for that sanitizer to reference shadow bounds. For ASan, this is
+// done in `InitializeShadowMemory` and for HWASan, this is done in
+// `InitShadow`.
 void InitShadowBounds() { ShadowBounds = __sanitizer_shadow_bounds(); }
 
+// TODO(leonardchan): It's not immediately clear from a user perspective if
+// `GetMaxUserVirtualAddress` should be called exatly once on runtime startup
+// or can be called multiple times. Currently it looks like most instances of
+// `GetMaxUserVirtualAddress` are meant to be called once, but if someone
+// decides to call this multiple times in the future, we should have a separate
+// function that's ok to call multiple times. Ideally we would just invoke this
+// syscall once. Also for Fuchsia, this syscall technically gets invoked twice
+// since `__sanitizer_shadow_bounds` also invokes this syscall under the hood.
 uptr GetMaxUserVirtualAddress() {
-  InitShadowBounds();
-  return ShadowBounds.memory_limit - 1;
+  zx_info_vmar_t info;
+  zx_status_t status = _zx_object_get_info(_zx_vmar_root_self(), ZX_INFO_VMAR,
+                                           &info, sizeof(info), NULL, NULL);
+  CHECK_EQ(status, ZX_OK);
+
+  // Find the top of the accessible address space.
+  uintptr_t top = info.base + info.len;
+
+  // Round it up to a power-of-two size.  There may be some pages at
+  // the top that can't actually be mapped, but for purposes of the
+  // the shadow, we'll pretend they could be.
+  int bit = (sizeof(uintptr_t) * CHAR_BIT) - __builtin_clzl(top);
+  if (top != (uintptr_t)1 << bit)
+    top = (uintptr_t)1 << (bit + 1);
+
+  return top - 1;
 }
 
 uptr GetMaxVirtualAddress() { return GetMaxUserVirtualAddress(); }
diff --git a/lib/libtsan/sanitizer_common/sanitizer_haiku.cpp b/lib/libtsan/sanitizer_common/sanitizer_haiku.cpp
index 7cf2437d5b..7c11441756 100644
--- a/lib/libtsan/sanitizer_common/sanitizer_haiku.cpp
+++ b/lib/libtsan/sanitizer_common/sanitizer_haiku.cpp
@@ -231,12 +231,12 @@ uptr internal_execve(const char *filename, char *const argv[],
 }
 
 #  if 0
-tid_t GetTid() {
+ThreadID GetTid() {
   DEFINE__REAL(int, _lwp_self);
   return _REAL(_lwp_self);
 }
 
-int TgKill(pid_t pid, tid_t tid, int sig) {
+int TgKill(pid_t pid, ThreadID tid, int sig) {
   DEFINE__REAL(int, _lwp_kill, int a, int b);
   (void)pid;
   return _REAL(_lwp_kill, tid, sig);
diff --git a/lib/libtsan/sanitizer_common/sanitizer_internal_defs.h b/lib/libtsan/sanitizer_common/sanitizer_internal_defs.h
index fff60c96f6..c719e2a8ef 100644
--- a/lib/libtsan/sanitizer_common/sanitizer_internal_defs.h
+++ b/lib/libtsan/sanitizer_common/sanitizer_internal_defs.h
@@ -209,7 +209,7 @@ typedef long ssize;
 typedef sptr ssize;
 #endif
 
-typedef u64 tid_t;
+typedef u64 ThreadID;
 
 // ----------- ATTENTION -------------
 // This header should NOT include any other headers to avoid portability issues.
diff --git a/lib/libtsan/sanitizer_common/sanitizer_libc.cpp b/lib/libtsan/sanitizer_common/sanitizer_libc.cpp
index 9318066afe..ece768ec8d 100644
--- a/lib/libtsan/sanitizer_common/sanitizer_libc.cpp
+++ b/lib/libtsan/sanitizer_common/sanitizer_libc.cpp
@@ -190,6 +190,14 @@ uptr internal_strlcat(char *dst, const char *src, uptr maxlen) {
   return dstlen + srclen;
 }
 
+char* internal_strcat(char* dst, const char* src) {
+  uptr len = internal_strlen(dst);
+  uptr i;
+  for (i = 0; src[i]; i++) dst[len + i] = src[i];
+  dst[len + i] = 0;
+  return dst;
+}
+
 char *internal_strncat(char *dst, const char *src, uptr n) {
   uptr len = internal_strlen(dst);
   uptr i;
diff --git a/lib/libtsan/sanitizer_common/sanitizer_libc.h b/lib/libtsan/sanitizer_common/sanitizer_libc.h
index 1906569e2a..2f7ec9249e 100644
--- a/lib/libtsan/sanitizer_common/sanitizer_libc.h
+++ b/lib/libtsan/sanitizer_common/sanitizer_libc.h
@@ -59,6 +59,7 @@ char *internal_strdup(const char *s);
 uptr internal_strlen(const char *s);
 uptr internal_strlcat(char *dst, const char *src, uptr maxlen);
 char *internal_strncat(char *dst, const char *src, uptr n);
+char* internal_strcat(char* dst, const char* src);
 int internal_strncmp(const char *s1, const char *s2, uptr n);
 uptr internal_strlcpy(char *dst, const char *src, uptr maxlen);
 char *internal_strncpy(char *dst, const char *src, uptr n);
diff --git a/lib/libtsan/sanitizer_common/sanitizer_linux.cpp b/lib/libtsan/sanitizer_common/sanitizer_linux.cpp
index acb59dfd6b..58608ef72b 100644
--- a/lib/libtsan/sanitizer_common/sanitizer_linux.cpp
+++ b/lib/libtsan/sanitizer_common/sanitizer_linux.cpp
@@ -635,7 +635,7 @@ bool DirExists(const char *path) {
 }
 
 #  if !SANITIZER_NETBSD
-tid_t GetTid() {
+ThreadID GetTid() {
 #    if SANITIZER_FREEBSD
   long Tid;
   thr_self(&Tid);
@@ -649,7 +649,7 @@ tid_t GetTid() {
 #    endif
 }
 
-int TgKill(pid_t pid, tid_t tid, int sig) {
+int TgKill(pid_t pid, ThreadID tid, int sig) {
 #    if SANITIZER_LINUX
   return internal_syscall(SYSCALL(tgkill), pid, tid, sig);
 #    elif SANITIZER_FREEBSD
@@ -1091,7 +1091,7 @@ ThreadLister::ThreadLister(pid_t pid) : buffer_(4096) {
 }
 
 ThreadLister::Result ThreadLister::ListThreads(
-    InternalMmapVector<tid_t> *threads) {
+    InternalMmapVector<ThreadID> *threads) {
   int descriptor = internal_open(task_path_.data(), O_RDONLY | O_DIRECTORY);
   if (internal_iserror(descriptor)) {
     Report("Can't open %s for reading.\n", task_path_.data());
@@ -1146,7 +1146,7 @@ ThreadLister::Result ThreadLister::ListThreads(
   }
 }
 
-const char *ThreadLister::LoadStatus(tid_t tid) {
+const char *ThreadLister::LoadStatus(ThreadID tid) {
   status_path_.clear();
   status_path_.AppendF("%s/%llu/status", task_path_.data(), tid);
   auto cleanup = at_scope_exit([&] {
@@ -1159,7 +1159,7 @@ const char *ThreadLister::LoadStatus(tid_t tid) {
   return buffer_.data();
 }
 
-bool ThreadLister::IsAlive(tid_t tid) {
+bool ThreadLister::IsAlive(ThreadID tid) {
   // /proc/%d/task/%d/status uses same call to detect alive threads as
   // proc_task_readdir. See task_state implementation in Linux.
   static const char kPrefix[] = "\nPPid:";
@@ -1289,7 +1289,7 @@ uptr GetPageSize() {
 
 uptr ReadBinaryName(/*out*/ char *buf, uptr buf_len) {
 #  if SANITIZER_HAIKU
-  int cookie = 0;
+  int32 cookie = 0;
   image_info info;
   const char *argv0 = "<UNKNOWN>";
   while (get_next_image_info(B_CURRENT_TEAM, &cookie, &info) == B_OK) {
@@ -1989,7 +1989,10 @@ SignalContext::WriteFlag SignalContext::GetWriteFlag() const {
 #    elif SANITIZER_NETBSD
   uptr err = ucontext->uc_mcontext.__gregs[_REG_ERR];
 #    elif SANITIZER_HAIKU
-  uptr err = ucontext->uc_mcontext.r13;
+  uptr err = 0;  // FIXME: ucontext->uc_mcontext.r13;
+                 // The err register was added on the main branch and not
+                 // available with the current release. To be reverted later.
+                 // https://github.com/haiku/haiku/commit/11adda21aa4e6b24f71a496868a44d7607bc3764
 #    elif SANITIZER_SOLARIS && defined(__i386__)
   const int Err = 13;
   uptr err = ucontext->uc_mcontext.gregs[Err];
@@ -2619,6 +2622,11 @@ static void GetPcSpBp(void *context, uptr *pc, uptr *sp, uptr *bp) {
   *pc = ucontext->uc_mcontext.mc_eip;
   *bp = ucontext->uc_mcontext.mc_ebp;
   *sp = ucontext->uc_mcontext.mc_esp;
+#    elif SANITIZER_HAIKU
+  ucontext_t *ucontext = (ucontext_t *)context;
+  *pc = ucontext->uc_mcontext.eip;
+  *bp = ucontext->uc_mcontext.ebp;
+  *sp = ucontext->uc_mcontext.esp;
 #    else
   ucontext_t *ucontext = (ucontext_t *)context;
 #      if SANITIZER_SOLARIS
diff --git a/lib/libtsan/sanitizer_common/sanitizer_linux.h b/lib/libtsan/sanitizer_common/sanitizer_linux.h
index 05b7d2e28a..e621799c4b 100644
--- a/lib/libtsan/sanitizer_common/sanitizer_linux.h
+++ b/lib/libtsan/sanitizer_common/sanitizer_linux.h
@@ -108,11 +108,11 @@ class ThreadLister {
     Incomplete,
     Ok,
   };
-  Result ListThreads(InternalMmapVector<tid_t> *threads);
-  const char *LoadStatus(tid_t tid);
+  Result ListThreads(InternalMmapVector<ThreadID> *threads);
+  const char *LoadStatus(ThreadID tid);
 
  private:
-  bool IsAlive(tid_t tid);
+  bool IsAlive(ThreadID tid);
 
   InternalScopedString task_path_;
   InternalScopedString status_path_;
diff --git a/lib/libtsan/sanitizer_common/sanitizer_linux_libcdep.cpp b/lib/libtsan/sanitizer_common/sanitizer_linux_libcdep.cpp
index 1263f307ac..fb99bc0886 100644
--- a/lib/libtsan/sanitizer_common/sanitizer_linux_libcdep.cpp
+++ b/lib/libtsan/sanitizer_common/sanitizer_linux_libcdep.cpp
@@ -29,6 +29,7 @@
 #  include "sanitizer_solaris.h"
 
 #  if SANITIZER_HAIKU
+#    define _GNU_SOURCE
 #    define _DEFAULT_SOURCE
 #  endif
 
diff --git a/lib/libtsan/sanitizer_common/sanitizer_mac.cpp b/lib/libtsan/sanitizer_common/sanitizer_mac.cpp
index bb71af5ad8..940175791f 100644
--- a/lib/libtsan/sanitizer_common/sanitizer_mac.cpp
+++ b/lib/libtsan/sanitizer_common/sanitizer_mac.cpp
@@ -22,6 +22,11 @@
 #  endif
 #  include <stdio.h>
 
+// Start searching for available memory region past PAGEZERO, which is
+// 4KB on 32-bit and 4GB on 64-bit.
+#  define GAP_SEARCH_START_ADDRESS \
+    ((SANITIZER_WORDSIZE == 32) ? 0x000000001000 : 0x000100000000)
+
 #  include "sanitizer_common.h"
 #  include "sanitizer_file.h"
 #  include "sanitizer_flags.h"
@@ -58,9 +63,11 @@ extern char ***_NSGetArgv(void);
 #  include <dlfcn.h>  // for dladdr()
 #  include <errno.h>
 #  include <fcntl.h>
+#  include <inttypes.h>
 #  include <libkern/OSAtomic.h>
 #  include <mach-o/dyld.h>
 #  include <mach/mach.h>
+#  include <mach/mach_error.h>
 #  include <mach/mach_time.h>
 #  include <mach/vm_statistics.h>
 #  include <malloc/malloc.h>
@@ -96,8 +103,16 @@ extern "C" {
     natural_t *nesting_depth,
     vm_region_recurse_info_t info,
     mach_msg_type_number_t *infoCnt);
+
+  extern const void* _dyld_get_shared_cache_range(size_t* length);
 }
 
+#  if !SANITIZER_GO
+// Weak symbol no-op when TSan is not linked
+SANITIZER_WEAK_ATTRIBUTE extern void __tsan_set_in_internal_write_call(
+    bool value) {}
+#  endif
+
 namespace __sanitizer {
 
 #include "sanitizer_syscall_generic.inc"
@@ -168,7 +183,15 @@ uptr internal_read(fd_t fd, void *buf, uptr count) {
 }
 
 uptr internal_write(fd_t fd, const void *buf, uptr count) {
+#  if SANITIZER_GO
   return write(fd, buf, count);
+#  else
+  // We need to disable interceptors when writing in TSan
+  __tsan_set_in_internal_write_call(true);
+  uptr res = write(fd, buf, count);
+  __tsan_set_in_internal_write_call(false);
+  return res;
+#  endif
 }
 
 uptr internal_stat(const char *path, void *buf) {
@@ -258,53 +281,43 @@ int internal_sysctlbyname(const char *sname, void *oldp, uptr *oldlenp,
                       (size_t)newlen);
 }
 
-static fd_t internal_spawn_impl(const char *argv[], const char *envp[],
-                                pid_t *pid) {
-  fd_t primary_fd = kInvalidFd;
-  fd_t secondary_fd = kInvalidFd;
+bool internal_spawn(const char* argv[], const char* envp[], pid_t* pid,
+                    fd_t fd_stdin, fd_t fd_stdout) {
+  // NOTE: Caller ensures that fd_stdin and fd_stdout are not 0, 1, or 2, since
+  // this can break communication.
+  //
+  // NOTE: Caller is responsible for closing fd_stdin after the process has
+  // died.
 
+  int res;
   auto fd_closer = at_scope_exit([&] {
-    internal_close(primary_fd);
-    internal_close(secondary_fd);
+    // NOTE: We intentionally do not close fd_stdin since this can
+    // cause us to receive a fatal SIGPIPE if the process dies.
+    internal_close(fd_stdout);
   });
 
-  // We need a new pseudoterminal to avoid buffering problems. The 'atos' tool
-  // in particular detects when it's talking to a pipe and forgets to flush the
-  // output stream after sending a response.
-  primary_fd = posix_openpt(O_RDWR);
-  if (primary_fd == kInvalidFd)
-    return kInvalidFd;
-
-  int res = grantpt(primary_fd) || unlockpt(primary_fd);
-  if (res != 0) return kInvalidFd;
-
-  // Use TIOCPTYGNAME instead of ptsname() to avoid threading problems.
-  char secondary_pty_name[128];
-  res = ioctl(primary_fd, TIOCPTYGNAME, secondary_pty_name);
-  if (res == -1) return kInvalidFd;
-
-  secondary_fd = internal_open(secondary_pty_name, O_RDWR);
-  if (secondary_fd == kInvalidFd)
-    return kInvalidFd;
-
   // File descriptor actions
   posix_spawn_file_actions_t acts;
   res = posix_spawn_file_actions_init(&acts);
-  if (res != 0) return kInvalidFd;
+  if (res != 0)
+    return false;
 
   auto acts_cleanup = at_scope_exit([&] {
     posix_spawn_file_actions_destroy(&acts);
   });
 
-  res = posix_spawn_file_actions_adddup2(&acts, secondary_fd, STDIN_FILENO) ||
-        posix_spawn_file_actions_adddup2(&acts, secondary_fd, STDOUT_FILENO) ||
-        posix_spawn_file_actions_addclose(&acts, secondary_fd);
-  if (res != 0) return kInvalidFd;
+  res = posix_spawn_file_actions_adddup2(&acts, fd_stdin, STDIN_FILENO) ||
+        posix_spawn_file_actions_adddup2(&acts, fd_stdout, STDOUT_FILENO) ||
+        posix_spawn_file_actions_addclose(&acts, fd_stdin) ||
+        posix_spawn_file_actions_addclose(&acts, fd_stdout);
+  if (res != 0)
+    return false;
 
   // Spawn attributes
   posix_spawnattr_t attrs;
   res = posix_spawnattr_init(&attrs);
-  if (res != 0) return kInvalidFd;
+  if (res != 0)
+    return false;
 
   auto attrs_cleanup  = at_scope_exit([&] {
     posix_spawnattr_destroy(&attrs);
@@ -313,50 +326,17 @@ static fd_t internal_spawn_impl(const char *argv[], const char *envp[],
   // In the spawned process, close all file descriptors that are not explicitly
   // described by the file actions object. This is Darwin-specific extension.
   res = posix_spawnattr_setflags(&attrs, POSIX_SPAWN_CLOEXEC_DEFAULT);
-  if (res != 0) return kInvalidFd;
+  if (res != 0)
+    return false;
 
   // posix_spawn
   char **argv_casted = const_cast<char **>(argv);
   char **envp_casted = const_cast<char **>(envp);
   res = posix_spawn(pid, argv[0], &acts, &attrs, argv_casted, envp_casted);
-  if (res != 0) return kInvalidFd;
+  if (res != 0)
+    return false;
 
-  // Disable echo in the new terminal, disable CR.
-  struct termios termflags;
-  tcgetattr(primary_fd, &termflags);
-  termflags.c_oflag &= ~ONLCR;
-  termflags.c_lflag &= ~ECHO;
-  tcsetattr(primary_fd, TCSANOW, &termflags);
-
-  // On success, do not close primary_fd on scope exit.
-  fd_t fd = primary_fd;
-  primary_fd = kInvalidFd;
-
-  return fd;
-}
-
-fd_t internal_spawn(const char *argv[], const char *envp[], pid_t *pid) {
-  // The client program may close its stdin and/or stdout and/or stderr thus
-  // allowing open/posix_openpt to reuse file descriptors 0, 1 or 2. In this
-  // case the communication is broken if either the parent or the child tries to
-  // close or duplicate these descriptors. We temporarily reserve these
-  // descriptors here to prevent this.
-  fd_t low_fds[3];
-  size_t count = 0;
-
-  for (; count < 3; count++) {
-    low_fds[count] = posix_openpt(O_RDWR);
-    if (low_fds[count] >= STDERR_FILENO)
-      break;
-  }
-
-  fd_t fd = internal_spawn_impl(argv, envp, pid);
-
-  for (; count > 0; count--) {
-    internal_close(low_fds[count]);
-  }
-
-  return fd;
+  return true;
 }
 
 uptr internal_rename(const char *oldpath, const char *newpath) {
@@ -394,8 +374,8 @@ bool DirExists(const char *path) {
   return S_ISDIR(st.st_mode);
 }
 
-tid_t GetTid() {
-  tid_t tid;
+ThreadID GetTid() {
+  ThreadID tid;
   pthread_threadid_np(nullptr, &tid);
   return tid;
 }
@@ -769,11 +749,17 @@ void internal_join_thread(void *th) { pthread_join((pthread_t)th, 0); }
 static Mutex syslog_lock;
 #  endif
 
+#  if SANITIZER_DRIVERKIT
+#    define SANITIZER_OS_LOG os_log
+#  else
+#    define SANITIZER_OS_LOG os_log_error
+#  endif
+
 void WriteOneLineToSyslog(const char *s) {
 #if !SANITIZER_GO
   syslog_lock.CheckLocked();
   if (GetMacosAlignedVersion() >= MacosVersion(10, 12)) {
-    os_log_error(OS_LOG_DEFAULT, "%{public}s", s);
+    SANITIZER_OS_LOG(OS_LOG_DEFAULT, "%{public}s", s);
   } else {
 #pragma clang diagnostic push
 // as_log is deprecated.
@@ -837,22 +823,22 @@ void LogMessageOnPrintf(const char *str) {
 
 void LogFullErrorReport(const char *buffer) {
 #  if !SANITIZER_GO
-  // Log with os_log_error. This will make it into the crash log.
+  // When logging with os_log_error this will make it into the crash log.
   if (internal_strncmp(SanitizerToolName, "AddressSanitizer",
                        sizeof("AddressSanitizer") - 1) == 0)
-    os_log_error(OS_LOG_DEFAULT, "Address Sanitizer reported a failure.");
+    SANITIZER_OS_LOG(OS_LOG_DEFAULT, "Address Sanitizer reported a failure.");
   else if (internal_strncmp(SanitizerToolName, "UndefinedBehaviorSanitizer",
                             sizeof("UndefinedBehaviorSanitizer") - 1) == 0)
-    os_log_error(OS_LOG_DEFAULT,
-                 "Undefined Behavior Sanitizer reported a failure.");
+    SANITIZER_OS_LOG(OS_LOG_DEFAULT,
+                     "Undefined Behavior Sanitizer reported a failure.");
   else if (internal_strncmp(SanitizerToolName, "ThreadSanitizer",
                             sizeof("ThreadSanitizer") - 1) == 0)
-    os_log_error(OS_LOG_DEFAULT, "Thread Sanitizer reported a failure.");
+    SANITIZER_OS_LOG(OS_LOG_DEFAULT, "Thread Sanitizer reported a failure.");
   else
-    os_log_error(OS_LOG_DEFAULT, "Sanitizer tool reported a failure.");
+    SANITIZER_OS_LOG(OS_LOG_DEFAULT, "Sanitizer tool reported a failure.");
 
   if (common_flags()->log_to_syslog)
-    os_log_error(OS_LOG_DEFAULT, "Consult syslog for more information.");
+    SANITIZER_OS_LOG(OS_LOG_DEFAULT, "Consult syslog for more information.");
 
   // Log to syslog.
   // The logging on OS X may call pthread_create so we need the threading
@@ -933,7 +919,17 @@ static void DisableMmapExcGuardExceptions() {
       RTLD_DEFAULT, "task_set_exc_guard_behavior");
   if (set_behavior == nullptr) return;
   const task_exc_guard_behavior_t task_exc_guard_none = 0;
-  set_behavior(mach_task_self(), task_exc_guard_none);
+  kern_return_t res = set_behavior(mach_task_self(), task_exc_guard_none);
+  if (res != KERN_SUCCESS) {
+    Report(
+        "WARN: task_set_exc_guard_behavior returned %d (%s), "
+        "mmap may fail unexpectedly.\n",
+        res, mach_error_string(res));
+    if (res == KERN_DENIED)
+      Report(
+          "HINT: Check that task_set_exc_guard_behavior is allowed by "
+          "sandbox.\n");
+  }
 }
 
 static void VerifyInterceptorsWorking();
@@ -1100,6 +1096,67 @@ static void StripEnv() {
 }
 #endif  // SANITIZER_GO
 
+// Prints out a consolidated memory map: contiguous regions
+// are merged together.
+static void PrintVmmap() {
+  const mach_vm_address_t max_vm_address = GetMaxVirtualAddress() + 1;
+  mach_vm_address_t address = GAP_SEARCH_START_ADDRESS;
+  kern_return_t kr = KERN_SUCCESS;
+
+  Report("Memory map:\n");
+  mach_vm_address_t last = 0;
+  mach_vm_address_t lastsz = 0;
+
+  while (1) {
+    mach_vm_size_t vmsize = 0;
+    natural_t depth = 0;
+    vm_region_submap_short_info_data_64_t vminfo;
+    mach_msg_type_number_t count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
+    kr = mach_vm_region_recurse(mach_task_self(), &address, &vmsize, &depth,
+                                (vm_region_info_t)&vminfo, &count);
+
+    if (kr == KERN_DENIED) {
+      Report(
+          "ERROR: mach_vm_region_recurse got KERN_DENIED when printing memory "
+          "map.\n");
+      Report(
+          "HINT: Check whether mach_vm_region_recurse is allowed by "
+          "sandbox.\n");
+    }
+
+    if (kr == KERN_SUCCESS && address < max_vm_address) {
+      if (last + lastsz == address) {
+        // This region is contiguous with the last; merge together.
+        lastsz += vmsize;
+      } else {
+        if (lastsz)
+          Printf("|| `[%p, %p]` || size=0x%016" PRIx64 " ||\n", (void*)last,
+                 (void*)(last + lastsz), lastsz);
+
+        last = address;
+        lastsz = vmsize;
+      }
+      address += vmsize;
+    } else {
+      // We've reached the end of the memory map. Print the last remaining
+      // region, if there is one.
+      if (lastsz)
+        Printf("|| `[%p, %p]` || size=0x%016" PRIx64 " ||\n", (void*)last,
+               (void*)(last + lastsz), lastsz);
+
+      break;
+    }
+  }
+}
+
+static void ReportShadowAllocFail(uptr shadow_size_bytes, uptr alignment) {
+  Report(
+      "FATAL: Failed to allocate shadow memory. Tried to allocate %p bytes "
+      "(alignment=%p).\n",
+      (void*)shadow_size_bytes, (void*)alignment);
+  PrintVmmap();
+}
+
 char **GetArgv() {
   return *_NSGetArgv();
 }
@@ -1207,10 +1264,11 @@ uptr MapDynamicShadow(uptr shadow_size_bytes, uptr shadow_scale,
     if (new_max_vm < max_occupied_addr) {
       Report("Unable to find a memory range for dynamic shadow.\n");
       Report(
-          "space_size = %p, largest_gap_found = %p, max_occupied_addr = %p, "
-          "new_max_vm = %p\n",
-          (void *)space_size, (void *)largest_gap_found,
-          (void *)max_occupied_addr, (void *)new_max_vm);
+          "\tspace_size = %p\n\tlargest_gap_found = %p\n\tmax_occupied_addr "
+          "= %p\n\tnew_max_vm = %p\n",
+          (void*)space_size, (void*)largest_gap_found, (void*)max_occupied_addr,
+          (void*)new_max_vm);
+      ReportShadowAllocFail(shadow_size_bytes, alignment);
       CHECK(0 && "cannot place shadow");
     }
     RestrictMemoryToMaxAddress(new_max_vm);
@@ -1221,6 +1279,7 @@ uptr MapDynamicShadow(uptr shadow_size_bytes, uptr shadow_scale,
                                             nullptr, nullptr);
     if (shadow_start == 0) {
       Report("Unable to find a memory range after restricting VM.\n");
+      ReportShadowAllocFail(shadow_size_bytes, alignment);
       CHECK(0 && "cannot place shadow after restricting vm");
     }
   }
@@ -1229,6 +1288,25 @@ uptr MapDynamicShadow(uptr shadow_size_bytes, uptr shadow_scale,
   return shadow_start;
 }
 
+// Returns a list of ranges which must be covered by shadow memory,
+// and cannot overlap with any fixed mappings made by a sanitizer.
+// This can ensure that the sanitizer runtime does not map over
+// platform-reserved regions.
+void GetAppReservedRanges(InternalMmapVector<ReservedRange>& ranges) {
+  ranges.clear();
+
+#  if SANITIZER_OSX
+  // On macOS, the first 512GB are platform-reserved (some of which
+  // may also be available to applications).
+  ranges.push_back({0x1000UL, 0x8000000000UL});
+#  endif
+
+  VReport(2, "App ranges:\n");
+  for (auto& [range_start, range_end] : ranges) {
+    VReport(2, "  [%p, %p]\n", range_start, range_end);
+  }
+}
+
 uptr MapDynamicShadowAndAliases(uptr shadow_size, uptr alias_size,
                                 uptr num_aliases, uptr ring_buffer_size) {
   CHECK(false && "HWASan aliasing is unimplemented on Mac");
@@ -1236,40 +1314,61 @@ uptr MapDynamicShadowAndAliases(uptr shadow_size, uptr alias_size,
 }
 
 uptr FindAvailableMemoryRange(uptr size, uptr alignment, uptr left_padding,
-                              uptr *largest_gap_found,
-                              uptr *max_occupied_addr) {
-  typedef vm_region_submap_short_info_data_64_t RegionInfo;
-  enum { kRegionInfoSize = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64 };
-  // Start searching for available memory region past PAGEZERO, which is
-  // 4KB on 32-bit and 4GB on 64-bit.
-  mach_vm_address_t start_address =
-    (SANITIZER_WORDSIZE == 32) ? 0x000000001000 : 0x000100000000;
-
+                              uptr* largest_gap_found,
+                              uptr* max_occupied_addr) {
   const mach_vm_address_t max_vm_address = GetMaxVirtualAddress() + 1;
-  mach_vm_address_t address = start_address;
-  mach_vm_address_t free_begin = start_address;
+  mach_vm_address_t address = GAP_SEARCH_START_ADDRESS;
+  mach_vm_address_t free_begin = GAP_SEARCH_START_ADDRESS;
+
+  // Restrict the search to be after any reserved ranges
+  InternalMmapVector<ReservedRange> app_ranges;
+  GetAppReservedRanges(app_ranges);
+
+  for (auto& [range_start, range_end] : app_ranges) {
+    address = Max(address, (mach_vm_address_t)range_end);
+    free_begin = Max(free_begin, (mach_vm_address_t)range_end);
+  }
+
   kern_return_t kr = KERN_SUCCESS;
   if (largest_gap_found) *largest_gap_found = 0;
   if (max_occupied_addr) *max_occupied_addr = 0;
   while (kr == KERN_SUCCESS) {
     mach_vm_size_t vmsize = 0;
     natural_t depth = 0;
-    RegionInfo vminfo;
-    mach_msg_type_number_t count = kRegionInfoSize;
+    vm_region_submap_short_info_data_64_t vminfo;
+    mach_msg_type_number_t count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
     kr = mach_vm_region_recurse(mach_task_self(), &address, &vmsize, &depth,
                                 (vm_region_info_t)&vminfo, &count);
 
-    // There are cases where going beyond the processes' max vm does
-    // not return KERN_INVALID_ADDRESS so we check for going beyond that
-    // max address as well.
-    if (kr == KERN_INVALID_ADDRESS || address > max_vm_address) {
+    if (kr == KERN_SUCCESS) {
+      // There are cases where going beyond the processes' max vm does
+      // not return KERN_INVALID_ADDRESS so we check for going beyond that
+      // max address as well.
+      if (address > max_vm_address) {
+        address = max_vm_address;
+        kr = -1;  // break after this iteration.
+      }
+
+      if (max_occupied_addr)
+        *max_occupied_addr = address + vmsize;
+    } else if (kr == KERN_INVALID_ADDRESS) {
       // No more regions beyond "address", consider the gap at the end of VM.
       address = max_vm_address;
-      vmsize = 0;
-      kr = -1;  // break after this iteration.
+
+      // We will break after this iteration anyway since kr != KERN_SUCCESS
+    } else if (kr == KERN_DENIED) {
+      Report("ERROR: Unable to find a memory range for dynamic shadow.\n");
+      Report("HINT: Ensure mach_vm_region_recurse is allowed under sandbox.\n");
+      Die();
     } else {
-      if (max_occupied_addr) *max_occupied_addr = address + vmsize;
+      Report(
+          "WARNING: mach_vm_region_recurse returned unexpected code %d (%s)\n",
+          kr, mach_error_string(kr));
+      DCHECK(false && "mach_vm_region_recurse returned unexpected code");
+      break;  // address is not valid unless KERN_SUCCESS, therefore we must not
+              // use it.
     }
+
     if (free_begin != address) {
       // We found a free region [free_begin..address-1].
       uptr gap_start = RoundUpTo((uptr)free_begin + left_padding, alignment);
@@ -1292,6 +1391,58 @@ uptr FindAvailableMemoryRange(uptr size, uptr alignment, uptr left_padding,
   return 0;
 }
 
+// This function (when used during initialization when there is
+// only a single thread), can be used to verify that a range
+// of memory hasn't already been mapped, and won't be mapped
+// later in the shared cache.
+//
+// If the syscall mach_vm_region_recurse fails (due to sandbox),
+// we assume that the memory is not mapped so that execution can continue.
+//
+// NOTE: range_end is inclusive
+//
+// WARNING: This function must NOT allocate memory, since it is
+// used in InitializeShadowMemory between where we search for
+// space for shadow and where we actually allocate it.
+bool MemoryRangeIsAvailable(uptr range_start, uptr range_end) {
+  mach_vm_size_t vmsize = 0;
+  natural_t depth = 0;
+  vm_region_submap_short_info_data_64_t vminfo;
+  mach_msg_type_number_t count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
+  mach_vm_address_t address = range_start;
+
+  // First, check if the range is already mapped.
+  kern_return_t kr =
+      mach_vm_region_recurse(mach_task_self(), &address, &vmsize, &depth,
+                             (vm_region_info_t)&vminfo, &count);
+
+  if (kr == KERN_DENIED) {
+    Report(
+        "WARN: mach_vm_region_recurse returned KERN_DENIED when checking "
+        "whether an address is mapped.\n");
+    Report("HINT: Is mach_vm_region_recurse allowed by sandbox?\n");
+  }
+
+  if (kr == KERN_SUCCESS && !IntervalsAreSeparate(address, address + vmsize - 1,
+                                                  range_start, range_end)) {
+    // Overlaps with already-mapped memory
+    return false;
+  }
+
+  size_t cacheLength;
+  uptr cacheStart = (uptr)_dyld_get_shared_cache_range(&cacheLength);
+
+  if (cacheStart &&
+      !IntervalsAreSeparate(cacheStart, cacheStart + cacheLength - 1,
+                            range_start, range_end)) {
+    // Overlaps with shared cache region
+    return false;
+  }
+
+  // We believe this address is available.
+  return true;
+}
+
 // FIXME implement on this platform.
 void GetMemoryProfile(fill_profile_f cb, uptr *stats) {}
 
diff --git a/lib/libtsan/sanitizer_common/sanitizer_mac.h b/lib/libtsan/sanitizer_common/sanitizer_mac.h
index b0e4ac7f40..7f9a2b77e7 100644
--- a/lib/libtsan/sanitizer_common/sanitizer_mac.h
+++ b/lib/libtsan/sanitizer_common/sanitizer_mac.h
@@ -58,8 +58,13 @@ struct DarwinKernelVersion : VersionBase<DarwinKernelVersion> {
   DarwinKernelVersion(u16 major, u16 minor) : VersionBase(major, minor) {}
 };
 
+struct ReservedRange {
+  uptr beg, end;
+};
+
 MacosVersion GetMacosAlignedVersion();
 DarwinKernelVersion GetDarwinKernelVersion();
+void GetAppReservedRanges(InternalMmapVector<ReservedRange>& ranges);
 
 char **GetEnviron();
 
diff --git a/lib/libtsan/sanitizer_common/sanitizer_netbsd.cpp b/lib/libtsan/sanitizer_common/sanitizer_netbsd.cpp
index 5e601bdcde..737e336dfb 100644
--- a/lib/libtsan/sanitizer_common/sanitizer_netbsd.cpp
+++ b/lib/libtsan/sanitizer_common/sanitizer_netbsd.cpp
@@ -229,12 +229,12 @@ uptr internal_execve(const char *filename, char *const argv[],
   return _sys_execve(filename, argv, envp);
 }
 
-tid_t GetTid() {
+ThreadID GetTid() {
   DEFINE__REAL(int, _lwp_self);
   return _REAL(_lwp_self);
 }
 
-int TgKill(pid_t pid, tid_t tid, int sig) {
+int TgKill(pid_t pid, ThreadID tid, int sig) {
   DEFINE__REAL(int, _lwp_kill, int a, int b);
   (void)pid;
   return _REAL(_lwp_kill, tid, sig);
diff --git a/lib/libtsan/sanitizer_common/sanitizer_platform.h b/lib/libtsan/sanitizer_common/sanitizer_platform.h
index 196c0a9884..acd2da2611 100644
--- a/lib/libtsan/sanitizer_common/sanitizer_platform.h
+++ b/lib/libtsan/sanitizer_common/sanitizer_platform.h
@@ -319,7 +319,11 @@
 #endif
 
 // The first address that can be returned by mmap.
-#define SANITIZER_MMAP_BEGIN 0
+#if SANITIZER_AIX && SANITIZER_WORDSIZE == 64
+#  define SANITIZER_MMAP_BEGIN 0x0a00'0000'0000'0000ULL
+#else
+#  define SANITIZER_MMAP_BEGIN 0
+#endif
 
 // The range of addresses which can be returned my mmap.
 // FIXME: this value should be different on different platforms.  Larger values
@@ -482,4 +486,26 @@
 #  define SANITIZER_START_BACKGROUND_THREAD_IN_ASAN_INTERNAL 0
 #endif
 
+#if SANITIZER_LINUX
+#  if SANITIZER_GLIBC
+// Workaround for
+// glibc/commit/3d3572f59059e2b19b8541ea648a6172136ec42e
+// Linux: Keep termios ioctl constants strictly internal
+#    if __GLIBC_PREREQ(2, 41)
+#      define SANITIZER_TERMIOS_IOCTL_CONSTANTS 0
+#    else
+#      define SANITIZER_TERMIOS_IOCTL_CONSTANTS 1
+#    endif
+#  else
+#    define SANITIZER_TERMIOS_IOCTL_CONSTANTS 1
+#  endif
+#endif
+
+#if SANITIZER_APPLE && SANITIZER_WORDSIZE == 64
+// MTE uses the lower half of the top byte.
+#  define STRIP_MTE_TAG(addr) ((addr) & ~((uptr)0x0f << 56))
+#else
+#  define STRIP_MTE_TAG(addr) (addr)
+#endif
+
 #endif  // SANITIZER_PLATFORM_H
diff --git a/lib/libtsan/sanitizer_common/sanitizer_platform_interceptors.h b/lib/libtsan/sanitizer_common/sanitizer_platform_interceptors.h
index 29987decdf..1b300bc753 100644
--- a/lib/libtsan/sanitizer_common/sanitizer_platform_interceptors.h
+++ b/lib/libtsan/sanitizer_common/sanitizer_platform_interceptors.h
@@ -167,7 +167,7 @@ SANITIZER_WEAK_IMPORT void *aligned_alloc(__sanitizer::usize __alignment,
 
 #define SANITIZER_INTERCEPT_STRLEN SI_NOT_FUCHSIA
 #define SANITIZER_INTERCEPT_STRNLEN (SI_NOT_MAC && SI_NOT_FUCHSIA)
-#define SANITIZER_INTERCEPT_STRCMP (SI_NOT_FUCHSIA && SI_NOT_AIX)
+#define SANITIZER_INTERCEPT_STRCMP SI_NOT_FUCHSIA
 #define SANITIZER_INTERCEPT_STRSTR SI_NOT_FUCHSIA
 #define SANITIZER_INTERCEPT_STRCASESTR (SI_POSIX && SI_NOT_AIX)
 #define SANITIZER_INTERCEPT_STRTOK SI_NOT_FUCHSIA
@@ -179,8 +179,8 @@ SANITIZER_WEAK_IMPORT void *aligned_alloc(__sanitizer::usize __alignment,
 #define SANITIZER_INTERCEPT_TEXTDOMAIN SI_LINUX_NOT_ANDROID || SI_SOLARIS
 #define SANITIZER_INTERCEPT_STRCASECMP SI_POSIX
 #define SANITIZER_INTERCEPT_MEMSET 1
-#define SANITIZER_INTERCEPT_MEMMOVE SI_NOT_AIX
-#define SANITIZER_INTERCEPT_MEMCPY SI_NOT_AIX
+#define SANITIZER_INTERCEPT_MEMMOVE 1
+#define SANITIZER_INTERCEPT_MEMCPY 1
 #define SANITIZER_INTERCEPT_MEMCMP SI_NOT_FUCHSIA
 #define SANITIZER_INTERCEPT_BCMP \
   SANITIZER_INTERCEPT_MEMCMP &&  \
@@ -551,7 +551,8 @@ SANITIZER_WEAK_IMPORT void *aligned_alloc(__sanitizer::usize __alignment,
 #define SANITIZER_INTERCEPT_MALLOC_USABLE_SIZE (!SI_MAC && !SI_NETBSD)
 #define SANITIZER_INTERCEPT_MCHECK_MPROBE SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_WCSLEN 1
-#define SANITIZER_INTERCEPT_WCSCAT SI_POSIX
+#define SANITIZER_INTERCEPT_WCSNLEN 1
+#define SANITIZER_INTERCEPT_WCSCAT (SI_POSIX || SI_WINDOWS)
 #define SANITIZER_INTERCEPT_WCSDUP SI_POSIX
 #define SANITIZER_INTERCEPT_SIGNAL_AND_SIGACTION (!SI_WINDOWS && SI_NOT_FUCHSIA)
 #define SANITIZER_INTERCEPT_BSD_SIGNAL SI_ANDROID
diff --git a/lib/libtsan/sanitizer_common/sanitizer_platform_limits_posix.cpp b/lib/libtsan/sanitizer_common/sanitizer_platform_limits_posix.cpp
index 7a89bf1c74..47eb1dc326 100644
--- a/lib/libtsan/sanitizer_common/sanitizer_platform_limits_posix.cpp
+++ b/lib/libtsan/sanitizer_common/sanitizer_platform_limits_posix.cpp
@@ -76,6 +76,7 @@
 #include <sys/vt.h>
 #include <linux/cdrom.h>
 #include <linux/fd.h>
+#include <linux/filter.h>
 #if SANITIZER_ANDROID
 #include <linux/fs.h>
 #endif
@@ -151,16 +152,15 @@ typedef struct user_fpregs elf_fpregset_t;
 #if defined(__mips64)
 # include <sys/procfs.h>
 #endif
-#include <sys/user.h>
-#include <linux/if_eql.h>
-#include <linux/if_plip.h>
-#include <linux/lp.h>
-#include <linux/mroute.h>
-#include <linux/mroute6.h>
-#include <linux/scc.h>
-#include <linux/serial.h>
-#include <sys/msg.h>
-#include <sys/ipc.h>
+#      include <linux/if_eql.h>
+#      include <linux/if_plip.h>
+#      include <linux/lp.h>
+#      include <linux/mroute.h>
+#      include <linux/mroute6.h>
+#      include <linux/serial.h>
+#      include <sys/ipc.h>
+#      include <sys/msg.h>
+#      include <sys/user.h>
 #endif  // SANITIZER_ANDROID
 
 #include <link.h>
@@ -516,6 +516,7 @@ unsigned struct_ElfW_Phdr_sz = sizeof(Elf_Phdr);
   unsigned struct_seq_event_rec_sz = sizeof(struct seq_event_rec);
   unsigned struct_synth_info_sz = sizeof(struct synth_info);
   unsigned struct_vt_mode_sz = sizeof(struct vt_mode);
+  unsigned struct_sock_fprog_sz = sizeof(struct sock_fprog);
 #endif // SANITIZER_LINUX
 
 #if SANITIZER_GLIBC
@@ -532,8 +533,6 @@ unsigned struct_ElfW_Phdr_sz = sizeof(Elf_Phdr);
   unsigned struct_kbsentry_sz = sizeof(struct kbsentry);
   unsigned struct_mtconfiginfo_sz = sizeof(struct mtconfiginfo);
   unsigned struct_nr_parms_struct_sz = sizeof(struct nr_parms_struct);
-  unsigned struct_scc_modem_sz = sizeof(struct scc_modem);
-  unsigned struct_scc_stat_sz = sizeof(struct scc_stat);
   unsigned struct_serial_multiport_struct_sz
       = sizeof(struct serial_multiport_struct);
   unsigned struct_serial_struct_sz = sizeof(struct serial_struct);
@@ -543,7 +542,6 @@ unsigned struct_ElfW_Phdr_sz = sizeof(Elf_Phdr);
 
   unsigned struct_audio_buf_info_sz = sizeof(struct audio_buf_info);
   unsigned struct_ppp_stats_sz = sizeof(struct ppp_stats);
-  unsigned struct_sock_fprog_sz = sizeof(struct sock_fprog);
 #  endif  // SANITIZER_GLIBC
 
 #  if !SANITIZER_ANDROID && !SANITIZER_APPLE && !SANITIZER_HAIKU
@@ -779,16 +777,16 @@ unsigned struct_ElfW_Phdr_sz = sizeof(Elf_Phdr);
   unsigned IOCTL_SOUND_PCM_WRITE_FILTER = SOUND_PCM_WRITE_FILTER;
 #endif // SOUND_VERSION
   unsigned IOCTL_TCFLSH = TCFLSH;
-  unsigned IOCTL_TCGETA = TCGETA;
+#    if SANITIZER_TERMIOS_IOCTL_CONSTANTS
   unsigned IOCTL_TCGETS = TCGETS;
+#    endif
   unsigned IOCTL_TCSBRK = TCSBRK;
   unsigned IOCTL_TCSBRKP = TCSBRKP;
-  unsigned IOCTL_TCSETA = TCSETA;
-  unsigned IOCTL_TCSETAF = TCSETAF;
-  unsigned IOCTL_TCSETAW = TCSETAW;
+#    if SANITIZER_TERMIOS_IOCTL_CONSTANTS
   unsigned IOCTL_TCSETS = TCSETS;
   unsigned IOCTL_TCSETSF = TCSETSF;
   unsigned IOCTL_TCSETSW = TCSETSW;
+#    endif
   unsigned IOCTL_TCXONC = TCXONC;
   unsigned IOCTL_TIOCGLCKTRMIOS = TIOCGLCKTRMIOS;
   unsigned IOCTL_TIOCGSOFTCAR = TIOCGSOFTCAR;
diff --git a/lib/libtsan/sanitizer_common/sanitizer_platform_limits_posix.h b/lib/libtsan/sanitizer_common/sanitizer_platform_limits_posix.h
index a2b6c37d54..05ebee49f2 100644
--- a/lib/libtsan/sanitizer_common/sanitizer_platform_limits_posix.h
+++ b/lib/libtsan/sanitizer_common/sanitizer_platform_limits_posix.h
@@ -32,6 +32,8 @@
 #  elif SANITIZER_GLIBC || SANITIZER_ANDROID
 #    define SANITIZER_HAS_STAT64 1
 #    define SANITIZER_HAS_STATFS64 1
+#  elif SANITIZER_HAIKU
+#    include <stdint.h>
 #  endif
 
 #  if defined(__sparc__)
@@ -102,6 +104,8 @@ const unsigned struct_kernel_stat_sz = SANITIZER_ANDROID
                                            ? FIRST_32_SECOND_64(104, 128)
 #      if defined(_ABIN32) && _MIPS_SIM == _ABIN32
                                            : FIRST_32_SECOND_64(176, 216);
+#      elif SANITIZER_MUSL
+                                           : FIRST_32_SECOND_64(160, 208);
 #      else
                                            : FIRST_32_SECOND_64(160, 216);
 #      endif
@@ -476,6 +480,30 @@ struct __sanitizer_cmsghdr {
   int cmsg_level;
   int cmsg_type;
 };
+#  elif SANITIZER_MUSL
+struct __sanitizer_msghdr {
+  void *msg_name;
+  unsigned msg_namelen;
+  struct __sanitizer_iovec *msg_iov;
+  int msg_iovlen;
+#    if SANITIZER_WORDSIZE == 64
+  int __pad1;
+#    endif
+  void *msg_control;
+  unsigned msg_controllen;
+#    if SANITIZER_WORDSIZE == 64
+  int __pad2;
+#    endif
+  int msg_flags;
+};
+struct __sanitizer_cmsghdr {
+  unsigned cmsg_len;
+#    if SANITIZER_WORDSIZE == 64
+  int __pad1;
+#    endif
+  int cmsg_level;
+  int cmsg_type;
+};
 #  else
 // In POSIX, int msg_iovlen; socklen_t msg_controllen; socklen_t cmsg_len; but
 // many implementations don't conform to the standard.
@@ -603,7 +631,7 @@ typedef unsigned long __sanitizer_sigset_t;
 #  elif SANITIZER_APPLE
 typedef unsigned __sanitizer_sigset_t;
 #  elif SANITIZER_HAIKU
-typedef unsigned long __sanitizer_sigset_t;
+typedef uint64_t __sanitizer_sigset_t;
 #  elif SANITIZER_LINUX
 struct __sanitizer_sigset_t {
   // The size is determined by looking at sizeof of real sigset_t on linux.
@@ -1312,16 +1340,14 @@ extern unsigned IOCTL_SNDCTL_COPR_SENDMSG;
 extern unsigned IOCTL_SNDCTL_COPR_WCODE;
 extern unsigned IOCTL_SNDCTL_COPR_WDATA;
 extern unsigned IOCTL_TCFLSH;
-extern unsigned IOCTL_TCGETA;
-extern unsigned IOCTL_TCGETS;
 extern unsigned IOCTL_TCSBRK;
 extern unsigned IOCTL_TCSBRKP;
-extern unsigned IOCTL_TCSETA;
-extern unsigned IOCTL_TCSETAF;
-extern unsigned IOCTL_TCSETAW;
+#    if SANITIZER_TERMIOS_IOCTL_CONSTANTS
+extern unsigned IOCTL_TCGETS;
 extern unsigned IOCTL_TCSETS;
 extern unsigned IOCTL_TCSETSF;
 extern unsigned IOCTL_TCSETSW;
+#    endif
 extern unsigned IOCTL_TCXONC;
 extern unsigned IOCTL_TIOCGLCKTRMIOS;
 extern unsigned IOCTL_TIOCGSOFTCAR;
diff --git a/lib/libtsan/sanitizer_common/sanitizer_posix.cpp b/lib/libtsan/sanitizer_common/sanitizer_posix.cpp
index 69af6465a6..5b2c4e668c 100644
--- a/lib/libtsan/sanitizer_common/sanitizer_posix.cpp
+++ b/lib/libtsan/sanitizer_common/sanitizer_posix.cpp
@@ -225,17 +225,9 @@ void *MapWritableFileToMemory(void *addr, uptr size, fd_t fd, OFF_T offset) {
   return (void *)p;
 }
 
-static inline bool IntervalsAreSeparate(uptr start1, uptr end1,
-                                        uptr start2, uptr end2) {
-  CHECK(start1 <= end1);
-  CHECK(start2 <= end2);
-  return (end1 < start2) || (end2 < start1);
-}
-
+#  if !SANITIZER_APPLE
 // FIXME: this is thread-unsafe, but should not cause problems most of the time.
-// When the shadow is mapped only a single thread usually exists (plus maybe
-// several worker threads on Mac, which aren't expected to map big chunks of
-// memory).
+// When the shadow is mapped only a single thread usually exists
 bool MemoryRangeIsAvailable(uptr range_start, uptr range_end) {
   MemoryMappingLayout proc_maps(/*cache_enabled*/true);
   if (proc_maps.Error())
@@ -251,7 +243,6 @@ bool MemoryRangeIsAvailable(uptr range_start, uptr range_end) {
   return true;
 }
 
-#if !SANITIZER_APPLE
 void DumpProcessMap() {
   MemoryMappingLayout proc_maps(/*cache_enabled*/true);
   const sptr kBufSize = 4095;
@@ -265,7 +256,7 @@ void DumpProcessMap() {
   Report("End of process memory map.\n");
   UnmapOrDie(filename, kBufSize);
 }
-#endif
+#  endif
 
 const char *GetPwd() {
   return GetEnv("PWD");
diff --git a/lib/libtsan/sanitizer_common/sanitizer_posix.h b/lib/libtsan/sanitizer_common/sanitizer_posix.h
index b5491c540d..dc9c3b8822 100644
--- a/lib/libtsan/sanitizer_common/sanitizer_posix.h
+++ b/lib/libtsan/sanitizer_common/sanitizer_posix.h
@@ -67,7 +67,8 @@ uptr internal_ptrace(int request, int pid, void *addr, void *data);
 uptr internal_waitpid(int pid, int *status, int options);
 
 int internal_fork();
-fd_t internal_spawn(const char *argv[], const char *envp[], pid_t *pid);
+bool internal_spawn(const char* argv[], const char* envp[], pid_t* pid,
+                    fd_t fd_stdin, fd_t fd_stdout);
 
 int internal_sysctl(const int *name, unsigned int namelen, void *oldp,
                     uptr *oldlenp, const void *newp, uptr newlen);
diff --git a/lib/libtsan/sanitizer_common/sanitizer_posix_libcdep.cpp b/lib/libtsan/sanitizer_common/sanitizer_posix_libcdep.cpp
index b1eb2009cf..8e5e87938c 100644
--- a/lib/libtsan/sanitizer_common/sanitizer_posix_libcdep.cpp
+++ b/lib/libtsan/sanitizer_common/sanitizer_posix_libcdep.cpp
@@ -47,6 +47,8 @@ typedef void (*sa_sigaction_t)(int, siginfo_t *, void *);
 
 namespace __sanitizer {
 
+[[maybe_unused]] static atomic_uint8_t signal_handler_is_from_sanitizer[64];
+
 u32 GetUid() {
   return getuid();
 }
@@ -210,6 +212,20 @@ void UnsetAlternateSignalStack() {
   UnmapOrDie(oldstack.ss_sp, oldstack.ss_size);
 }
 
+bool IsSignalHandlerFromSanitizer(int signum) {
+  return atomic_load(&signal_handler_is_from_sanitizer[signum],
+                     memory_order_relaxed);
+}
+
+bool SetSignalHandlerFromSanitizer(int signum, bool new_state) {
+  if (signum < 0 || static_cast<unsigned>(signum) >=
+                        ARRAY_SIZE(signal_handler_is_from_sanitizer))
+    return false;
+
+  return atomic_exchange(&signal_handler_is_from_sanitizer[signum], new_state,
+                         memory_order_relaxed);
+}
+
 static void MaybeInstallSigaction(int signum,
                                   SignalHandlerType handler) {
   if (GetHandleSignalMode(signum) == kHandleSignalNo) return;
@@ -223,6 +239,9 @@ static void MaybeInstallSigaction(int signum,
   if (common_flags()->use_sigaltstack) sigact.sa_flags |= SA_ONSTACK;
   CHECK_EQ(0, internal_sigaction(signum, &sigact, nullptr));
   VReport(1, "Installed the sigaction for signal %d\n", signum);
+
+  if (common_flags()->cloak_sanitizer_signal_handlers)
+    SetSignalHandlerFromSanitizer(signum, true);
 }
 
 void InstallDeadlySignalHandlers(SignalHandlerType handler) {
diff --git a/lib/libtsan/sanitizer_common/sanitizer_procmaps_mac.cpp b/lib/libtsan/sanitizer_common/sanitizer_procmaps_mac.cpp
index a9533d6fc0..93d3929033 100644
--- a/lib/libtsan/sanitizer_common/sanitizer_procmaps_mac.cpp
+++ b/lib/libtsan/sanitizer_common/sanitizer_procmaps_mac.cpp
@@ -20,18 +20,21 @@
 #include <mach/mach.h>
 
 // These are not available in older macOS SDKs.
-#ifndef CPU_SUBTYPE_X86_64_H
-#define CPU_SUBTYPE_X86_64_H  ((cpu_subtype_t)8)   /* Haswell */
-#endif
-#ifndef CPU_SUBTYPE_ARM_V7S
-#define CPU_SUBTYPE_ARM_V7S   ((cpu_subtype_t)11)  /* Swift */
-#endif
-#ifndef CPU_SUBTYPE_ARM_V7K
-#define CPU_SUBTYPE_ARM_V7K   ((cpu_subtype_t)12)
-#endif
-#ifndef CPU_TYPE_ARM64
-#define CPU_TYPE_ARM64        (CPU_TYPE_ARM | CPU_ARCH_ABI64)
-#endif
+#  ifndef CPU_SUBTYPE_X86_64_H
+#    define CPU_SUBTYPE_X86_64_H ((cpu_subtype_t)8) /* Haswell */
+#  endif
+#  ifndef CPU_SUBTYPE_ARM_V7S
+#    define CPU_SUBTYPE_ARM_V7S ((cpu_subtype_t)11) /* Swift */
+#  endif
+#  ifndef CPU_SUBTYPE_ARM_V7K
+#    define CPU_SUBTYPE_ARM_V7K ((cpu_subtype_t)12)
+#  endif
+#  ifndef CPU_TYPE_ARM64
+#    define CPU_TYPE_ARM64 (CPU_TYPE_ARM | CPU_ARCH_ABI64)
+#  endif
+#  ifndef CPU_SUBTYPE_ARM64E
+#    define CPU_SUBTYPE_ARM64E ((cpu_subtype_t)2)
+#  endif
 
 namespace __sanitizer {
 
@@ -42,7 +45,6 @@ struct MemoryMappedSegmentData {
   const char *current_load_cmd_addr;
   u32 lc_type;
   uptr base_virt_addr;
-  uptr addr_mask;
 };
 
 template <typename Section>
@@ -51,12 +53,62 @@ static void NextSectionLoad(LoadedModule *module, MemoryMappedSegmentData *data,
   const Section *sc = (const Section *)data->current_load_cmd_addr;
   data->current_load_cmd_addr += sizeof(Section);
 
-  uptr sec_start = (sc->addr & data->addr_mask) + data->base_virt_addr;
+  uptr sec_start = sc->addr + data->base_virt_addr;
   uptr sec_end = sec_start + sc->size;
   module->addAddressRange(sec_start, sec_end, /*executable=*/false, isWritable,
                           sc->sectname);
 }
 
+static bool VerifyMemoryMapping(MemoryMappingLayout* mapping) {
+  InternalMmapVector<LoadedModule> modules;
+  modules.reserve(128);  // matches DumpProcessMap
+  mapping->DumpListOfModules(&modules);
+
+  InternalMmapVector<LoadedModule::AddressRange> segments;
+  for (uptr i = 0; i < modules.size(); ++i) {
+    for (auto& range : modules[i].ranges()) {
+      if (range.beg == range.end)
+        continue;
+      segments.push_back(range);
+    }
+  }
+
+  // Verify that none of the segments overlap:
+  // 1. Sort the segments by the start address
+  // 2. Check that every segment starts after the previous one ends.
+  Sort(segments.data(), segments.size(),
+       [](LoadedModule::AddressRange& a, LoadedModule::AddressRange& b) {
+         return a.beg < b.beg;
+       });
+
+  // To avoid spam, we only print the report message once-per-process.
+  static bool invalid_module_map_reported = false;
+  bool well_formed = true;
+
+  for (size_t i = 1; i < segments.size(); i++) {
+    uptr cur_start = segments[i].beg;
+    uptr prev_end = segments[i - 1].end;
+    if (cur_start < prev_end) {
+      well_formed = false;
+      VReport(2, "Overlapping mappings: %s start = %p, %s end = %p\n",
+              segments[i].name, (void*)cur_start, segments[i - 1].name,
+              (void*)prev_end);
+      if (!invalid_module_map_reported) {
+        Report(
+            "WARN: Invalid dyld module map detected. This is most likely a bug "
+            "in the sanitizer.\n");
+        Report("WARN: Backtraces may be unreliable.\n");
+        invalid_module_map_reported = true;
+      }
+    }
+  }
+
+  for (auto& m : modules) m.clear();
+
+  mapping->Reset();
+  return well_formed;
+}
+
 void MemoryMappedSegment::AddAddressRanges(LoadedModule *module) {
   // Don't iterate over sections when the caller hasn't set up the
   // data pointer, when there are no sections, or when the segment
@@ -82,6 +134,7 @@ void MemoryMappedSegment::AddAddressRanges(LoadedModule *module) {
 
 MemoryMappingLayout::MemoryMappingLayout(bool cache_enabled) {
   Reset();
+  VerifyMemoryMapping(this);
 }
 
 MemoryMappingLayout::~MemoryMappingLayout() {
@@ -123,7 +176,7 @@ void MemoryMappingLayout::Reset() {
 // The dyld load address should be unchanged throughout process execution,
 // and it is expensive to compute once many libraries have been loaded,
 // so cache it here and do not reset.
-static mach_header *dyld_hdr = 0;
+static const mach_header* dyld_hdr = 0;
 static const char kDyldPath[] = "/usr/lib/dyld";
 static const int kDyldImageIdx = -1;
 
@@ -187,17 +240,22 @@ typedef struct dyld_shared_cache_dylib_text_info
 
 extern bool _dyld_get_shared_cache_uuid(uuid_t uuid);
 extern const void *_dyld_get_shared_cache_range(size_t *length);
+extern intptr_t _dyld_get_image_slide(const struct mach_header* mh);
 extern int dyld_shared_cache_iterate_text(
     const uuid_t cacheUuid,
     void (^callback)(const dyld_shared_cache_dylib_text_info *info));
+SANITIZER_WEAK_IMPORT const struct mach_header* _dyld_get_dyld_header(void);
 }  // extern "C"
 
-static mach_header *GetDyldImageHeaderViaSharedCache() {
+static const mach_header* GetDyldImageHeaderViaSharedCache() {
   uuid_t uuid;
   bool hasCache = _dyld_get_shared_cache_uuid(uuid);
   if (!hasCache)
     return nullptr;
 
+  if (&_dyld_get_dyld_header != nullptr)
+    return _dyld_get_dyld_header();
+
   size_t cacheLength;
   __block uptr cacheStart = (uptr)_dyld_get_shared_cache_range(&cacheLength);
   CHECK(cacheStart && cacheLength);
@@ -255,23 +313,21 @@ static bool NextSegmentLoad(MemoryMappedSegment *segment,
   layout_data->current_load_cmd_count--;
   if (((const load_command *)lc)->cmd == kLCSegment) {
     const SegmentCommand* sc = (const SegmentCommand *)lc;
-    uptr base_virt_addr, addr_mask;
-    if (layout_data->current_image == kDyldImageIdx) {
-      base_virt_addr = (uptr)get_dyld_hdr();
-      // vmaddr is masked with 0xfffff because on macOS versions < 10.12,
-      // it contains an absolute address rather than an offset for dyld.
-      // To make matters even more complicated, this absolute address
-      // isn't actually the absolute segment address, but the offset portion
-      // of the address is accurate when combined with the dyld base address,
-      // and the mask will give just this offset.
-      addr_mask = 0xfffff;
-    } else {
-      base_virt_addr =
-          (uptr)_dyld_get_image_vmaddr_slide(layout_data->current_image);
-      addr_mask = ~0;
+    if (internal_strcmp(sc->segname, "__LINKEDIT") == 0) {
+      // The LINKEDIT sections are for internal linker use, and may alias
+      // with the LINKEDIT section for other modules. (If we included them,
+      // our memory map would contain overlappping sections.)
+      return false;
     }
 
-    segment->start = (sc->vmaddr & addr_mask) + base_virt_addr;
+    uptr base_virt_addr;
+    if (layout_data->current_image == kDyldImageIdx)
+      base_virt_addr = (uptr)_dyld_get_image_slide(get_dyld_hdr());
+    else
+      base_virt_addr =
+          (uptr)_dyld_get_image_vmaddr_slide(layout_data->current_image);
+
+    segment->start = sc->vmaddr + base_virt_addr;
     segment->end = segment->start + sc->vmsize;
     // Most callers don't need section information, so only fill this struct
     // when required.
@@ -281,9 +337,9 @@ static bool NextSegmentLoad(MemoryMappedSegment *segment,
           (const char *)lc + sizeof(SegmentCommand);
       seg_data->lc_type = kLCSegment;
       seg_data->base_virt_addr = base_virt_addr;
-      seg_data->addr_mask = addr_mask;
       internal_strncpy(seg_data->name, sc->segname,
                        ARRAY_SIZE(seg_data->name));
+      seg_data->name[ARRAY_SIZE(seg_data->name) - 1] = 0;
     }
 
     // Return the initial protection.
@@ -297,6 +353,7 @@ static bool NextSegmentLoad(MemoryMappedSegment *segment,
                             ? kDyldPath
                             : _dyld_get_image_name(layout_data->current_image);
       internal_strncpy(segment->filename, src, segment->filename_size);
+      segment->filename[segment->filename_size - 1] = 0;
     }
     segment->arch = layout_data->current_arch;
     internal_memcpy(segment->uuid, layout_data->current_uuid, kModuleUUIDSize);
@@ -311,18 +368,26 @@ ModuleArch ModuleArchFromCpuType(cpu_type_t cputype, cpu_subtype_t cpusubtype) {
     case CPU_TYPE_I386:
       return kModuleArchI386;
     case CPU_TYPE_X86_64:
-      if (cpusubtype == CPU_SUBTYPE_X86_64_ALL) return kModuleArchX86_64;
-      if (cpusubtype == CPU_SUBTYPE_X86_64_H) return kModuleArchX86_64H;
+      if (cpusubtype == CPU_SUBTYPE_X86_64_ALL)
+        return kModuleArchX86_64;
+      if (cpusubtype == CPU_SUBTYPE_X86_64_H)
+        return kModuleArchX86_64H;
       CHECK(0 && "Invalid subtype of x86_64");
       return kModuleArchUnknown;
     case CPU_TYPE_ARM:
-      if (cpusubtype == CPU_SUBTYPE_ARM_V6) return kModuleArchARMV6;
-      if (cpusubtype == CPU_SUBTYPE_ARM_V7) return kModuleArchARMV7;
-      if (cpusubtype == CPU_SUBTYPE_ARM_V7S) return kModuleArchARMV7S;
-      if (cpusubtype == CPU_SUBTYPE_ARM_V7K) return kModuleArchARMV7K;
+      if (cpusubtype == CPU_SUBTYPE_ARM_V6)
+        return kModuleArchARMV6;
+      if (cpusubtype == CPU_SUBTYPE_ARM_V7)
+        return kModuleArchARMV7;
+      if (cpusubtype == CPU_SUBTYPE_ARM_V7S)
+        return kModuleArchARMV7S;
+      if (cpusubtype == CPU_SUBTYPE_ARM_V7K)
+        return kModuleArchARMV7K;
       CHECK(0 && "Invalid subtype of ARM");
       return kModuleArchUnknown;
     case CPU_TYPE_ARM64:
+      if (cpusubtype == CPU_SUBTYPE_ARM64E)
+        return kModuleArchARM64E;
       return kModuleArchARM64;
     default:
       CHECK(0 && "Invalid CPU type");
diff --git a/lib/libtsan/sanitizer_common/sanitizer_redefine_builtins.h b/lib/libtsan/sanitizer_common/sanitizer_redefine_builtins.h
index bda0f04687..7d88911176 100644
--- a/lib/libtsan/sanitizer_common/sanitizer_redefine_builtins.h
+++ b/lib/libtsan/sanitizer_common/sanitizer_redefine_builtins.h
@@ -15,7 +15,7 @@
 #    define SANITIZER_REDEFINE_BUILTINS_H
 
 // The asm hack only works with GCC and Clang.
-#    if !defined(_WIN32) && !defined(_AIX)
+#    if !defined(_WIN32) && !defined(_AIX) && !defined(__APPLE__)
 
 asm(R"(
     .set memcpy, __sanitizer_internal_memcpy
diff --git a/lib/libtsan/sanitizer_common/sanitizer_signal_interceptors.inc b/lib/libtsan/sanitizer_common/sanitizer_signal_interceptors.inc
index 94e4e2954a..8511e4d55f 100644
--- a/lib/libtsan/sanitizer_common/sanitizer_signal_interceptors.inc
+++ b/lib/libtsan/sanitizer_common/sanitizer_signal_interceptors.inc
@@ -45,6 +45,8 @@ using namespace __sanitizer;
 INTERCEPTOR(uptr, bsd_signal, int signum, uptr handler) {
   SIGNAL_INTERCEPTOR_ENTER();
   if (GetHandleSignalMode(signum) == kHandleSignalExclusive) return 0;
+
+  // TODO: support cloak_sanitizer_signal_handlers
   SIGNAL_INTERCEPTOR_SIGNAL_IMPL(bsd_signal, signum, handler);
 }
 #define INIT_BSD_SIGNAL COMMON_INTERCEPT_FUNCTION(bsd_signal)
@@ -56,19 +58,55 @@ INTERCEPTOR(uptr, bsd_signal, int signum, uptr handler) {
 INTERCEPTOR(uptr, signal, int signum, uptr handler) {
   SIGNAL_INTERCEPTOR_ENTER();
   if (GetHandleSignalMode(signum) == kHandleSignalExclusive)
+    // The user can neither view nor change the signal handler, regardless of
+    // the cloak_sanitizer_signal_handlers setting. This differs from
+    // sigaction().
     return (uptr) nullptr;
-  SIGNAL_INTERCEPTOR_SIGNAL_IMPL(signal, signum, handler);
+
+  uptr ret = +[](auto signal, int signum, uptr handler) {
+    SIGNAL_INTERCEPTOR_SIGNAL_IMPL(signal, signum, handler);
+  }(signal, signum, handler);
+
+  if (ret != sig_err && SetSignalHandlerFromSanitizer(signum, false))
+    // If the user sets a signal handler, it becomes uncloaked, even if they
+    // reuse a sanitizer's signal handler.
+    ret = sig_dfl;
+
+  return ret;
 }
 #define INIT_SIGNAL COMMON_INTERCEPT_FUNCTION(signal)
 
 INTERCEPTOR(int, sigaction_symname, int signum,
             const __sanitizer_sigaction *act, __sanitizer_sigaction *oldact) {
   SIGNAL_INTERCEPTOR_ENTER();
+
   if (GetHandleSignalMode(signum) == kHandleSignalExclusive) {
     if (!oldact) return 0;
     act = nullptr;
+    // If cloak_sanitizer_signal_handlers=true, the user can neither view nor
+    // change the signal handle.
+    // If false, the user can view but not change the signal handler. This
+    // differs from signal().
   }
-  SIGNAL_INTERCEPTOR_SIGACTION_IMPL(signum, act, oldact);
+
+  int ret = +[](int signum, const __sanitizer_sigaction* act,
+                __sanitizer_sigaction* oldact) {
+    SIGNAL_INTERCEPTOR_SIGACTION_IMPL(signum, act, oldact);
+  }(signum, act, oldact);
+
+  if (act) {
+    if (ret == 0 && SetSignalHandlerFromSanitizer(signum, false)) {
+      // If the user sets a signal handler, it becomes uncloaked, even if they
+      // reuse a sanitizer's signal handler.
+
+      if (oldact)
+        oldact->handler = reinterpret_cast<__sanitizer_sighandler_ptr>(sig_dfl);
+    }
+  } else if (ret == 0 && oldact && IsSignalHandlerFromSanitizer(signum)) {
+    oldact->handler = reinterpret_cast<__sanitizer_sighandler_ptr>(sig_dfl);
+  }
+
+  return ret;
 }
 #define INIT_SIGACTION COMMON_INTERCEPT_FUNCTION(sigaction_symname)
 
diff --git a/lib/libtsan/sanitizer_common/sanitizer_stoptheworld.h b/lib/libtsan/sanitizer_common/sanitizer_stoptheworld.h
index 7891c1081f..b4ed23abb9 100644
--- a/lib/libtsan/sanitizer_common/sanitizer_stoptheworld.h
+++ b/lib/libtsan/sanitizer_common/sanitizer_stoptheworld.h
@@ -38,7 +38,7 @@ class SuspendedThreadsList {
   }
 
   virtual uptr ThreadCount() const { UNIMPLEMENTED(); }
-  virtual tid_t GetThreadID(uptr index) const { UNIMPLEMENTED(); }
+  virtual ThreadID GetThreadID(uptr index) const { UNIMPLEMENTED(); }
 
  protected:
   ~SuspendedThreadsList() {}
diff --git a/lib/libtsan/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp b/lib/libtsan/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp
index 24929b8c4b..2bf547f4a7 100644
--- a/lib/libtsan/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp
+++ b/lib/libtsan/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp
@@ -94,17 +94,17 @@ class SuspendedThreadsListLinux final : public SuspendedThreadsList {
  public:
   SuspendedThreadsListLinux() { thread_ids_.reserve(1024); }
 
-  tid_t GetThreadID(uptr index) const override;
+  ThreadID GetThreadID(uptr index) const override;
   uptr ThreadCount() const override;
-  bool ContainsTid(tid_t thread_id) const;
-  void Append(tid_t tid);
+  bool ContainsTid(ThreadID thread_id) const;
+  void Append(ThreadID tid);
 
   PtraceRegistersStatus GetRegistersAndSP(uptr index,
                                           InternalMmapVector<uptr> *buffer,
                                           uptr *sp) const override;
 
  private:
-  InternalMmapVector<tid_t> thread_ids_;
+  InternalMmapVector<ThreadID> thread_ids_;
 };
 
 // Structure for passing arguments into the tracer thread.
@@ -137,10 +137,10 @@ class ThreadSuspender {
  private:
   SuspendedThreadsListLinux suspended_threads_list_;
   pid_t pid_;
-  bool SuspendThread(tid_t thread_id);
+  bool SuspendThread(ThreadID thread_id);
 };
 
-bool ThreadSuspender::SuspendThread(tid_t tid) {
+bool ThreadSuspender::SuspendThread(ThreadID tid) {
   int pterrno;
   if (internal_iserror(internal_ptrace(PTRACE_ATTACH, tid, nullptr, nullptr),
                        &pterrno)) {
@@ -210,7 +210,7 @@ void ThreadSuspender::KillAllThreads() {
 bool ThreadSuspender::SuspendAllThreads() {
   ThreadLister thread_lister(pid_);
   bool retry = true;
-  InternalMmapVector<tid_t> threads;
+  InternalMmapVector<ThreadID> threads;
   threads.reserve(128);
   for (int i = 0; i < 30 && retry; ++i) {
     retry = false;
@@ -226,7 +226,7 @@ bool ThreadSuspender::SuspendAllThreads() {
       case ThreadLister::Ok:
         break;
     }
-    for (tid_t tid : threads) {
+    for (ThreadID tid : threads) {
       // Are we already attached to this thread?
       // Currently this check takes linear time, however the number of threads
       // is usually small.
@@ -403,7 +403,77 @@ struct ScopedSetTracerPID {
   }
 };
 
+// This detects whether ptrace is blocked (e.g., by seccomp), by forking and
+// then attempting ptrace.
+// This separate check is necessary because StopTheWorld() creates a thread
+// with a shared virtual address space and shared TLS, and therefore
+// cannot use waitpid() due to the shared errno.
+static void TestPTrace() {
+#  if SANITIZER_SPARC
+  // internal_fork() on SPARC actually calls __fork(). We can't safely fork,
+  // because it's possible seccomp has been configured to disallow fork() but
+  // allow clone().
+  VReport(1, "WARNING: skipping TestPTrace() because this is SPARC\n");
+  VReport(1,
+          "If seccomp blocks ptrace, LeakSanitizer may hang without further "
+          "notice\n");
+  VReport(
+      1,
+      "If seccomp does not block ptrace, you can safely ignore this warning\n");
+#  else
+  // Heuristic: only check the first time this is called. This is not always
+  // correct (e.g., user manually triggers leak detection, then updates
+  // seccomp, then leak detection is triggered again).
+  static bool checked = false;
+  if (checked)
+    return;
+  checked = true;
+
+  // Hopefully internal_fork() is not too expensive, thanks to copy-on-write.
+  // Besides, this is only called the first time.
+  // Note that internal_fork() on non-SPARC Linux actually calls
+  // SYSCALL(clone); thus, it is reasonable to use it because if seccomp kills
+  // TestPTrace(), it would have killed StopTheWorld() anyway.
+  int pid = internal_fork();
+
+  if (pid < 0) {
+    int rverrno;
+    if (internal_iserror(pid, &rverrno))
+      VReport(0, "WARNING: TestPTrace() failed to fork (errno %d)\n", rverrno);
+
+    // We don't abort the sanitizer - it's still worth letting the sanitizer
+    // try.
+    return;
+  }
+
+  if (pid == 0) {
+    // Child subprocess
+
+    // TODO: consider checking return value of internal_ptrace, to handle
+    //       SCMP_ACT_ERRNO. However, be careful not to consume too many
+    //       resources performing a proper ptrace.
+    internal_ptrace(PTRACE_ATTACH, 0, nullptr, nullptr);
+    internal__exit(0);
+  } else {
+    int wstatus;
+    internal_waitpid(pid, &wstatus, 0);
+
+    // Handle SCMP_ACT_KILL
+    if (WIFSIGNALED(wstatus)) {
+      VReport(0,
+              "WARNING: ptrace appears to be blocked (is seccomp enabled?). "
+              "LeakSanitizer may hang.\n");
+      VReport(0, "Child exited with signal %d.\n", WTERMSIG(wstatus));
+      // We don't abort the sanitizer - it's still worth letting the sanitizer
+      // try.
+    }
+  }
+#  endif
+}
+
 void StopTheWorld(StopTheWorldCallback callback, void *argument) {
+  TestPTrace();
+
   StopTheWorldScope in_stoptheworld;
   // Prepare the arguments for TracerThread.
   struct TracerThreadArgument tracer_thread_argument;
@@ -457,7 +527,8 @@ void StopTheWorld(StopTheWorldCallback callback, void *argument) {
     internal_prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0);
     // Allow the tracer thread to start.
     tracer_thread_argument.mutex.Unlock();
-    // NOTE: errno is shared between this thread and the tracer thread.
+    // NOTE: errno is shared between this thread and the tracer thread
+    //       (clone was called without CLONE_SETTLS / newtls).
     // internal_waitpid() may call syscall() which can access/spoil errno,
     // so we can't call it now. Instead we for the tracer thread to finish using
     // the spin loop below. Man page for sched_yield() says "In the Linux
@@ -546,7 +617,7 @@ static constexpr uptr kExtraRegs[] = {0};
 #error "Unsupported architecture"
 #endif // SANITIZER_ANDROID && defined(__arm__)
 
-tid_t SuspendedThreadsListLinux::GetThreadID(uptr index) const {
+ThreadID SuspendedThreadsListLinux::GetThreadID(uptr index) const {
   CHECK_LT(index, thread_ids_.size());
   return thread_ids_[index];
 }
@@ -555,14 +626,14 @@ uptr SuspendedThreadsListLinux::ThreadCount() const {
   return thread_ids_.size();
 }
 
-bool SuspendedThreadsListLinux::ContainsTid(tid_t thread_id) const {
+bool SuspendedThreadsListLinux::ContainsTid(ThreadID thread_id) const {
   for (uptr i = 0; i < thread_ids_.size(); i++) {
     if (thread_ids_[i] == thread_id) return true;
   }
   return false;
 }
 
-void SuspendedThreadsListLinux::Append(tid_t tid) {
+void SuspendedThreadsListLinux::Append(ThreadID tid) {
   thread_ids_.push_back(tid);
 }
 
diff --git a/lib/libtsan/sanitizer_common/sanitizer_stoptheworld_mac.cpp b/lib/libtsan/sanitizer_common/sanitizer_stoptheworld_mac.cpp
index 8136164676..d6ef37ac84 100644
--- a/lib/libtsan/sanitizer_common/sanitizer_stoptheworld_mac.cpp
+++ b/lib/libtsan/sanitizer_common/sanitizer_stoptheworld_mac.cpp
@@ -23,7 +23,7 @@
 
 namespace __sanitizer {
 typedef struct {
-  tid_t tid;
+  ThreadID tid;
   thread_t thread;
 } SuspendedThreadInfo;
 
@@ -31,7 +31,7 @@ class SuspendedThreadsListMac final : public SuspendedThreadsList {
  public:
   SuspendedThreadsListMac() = default;
 
-  tid_t GetThreadID(uptr index) const override;
+  ThreadID GetThreadID(uptr index) const override;
   thread_t GetThread(uptr index) const;
   uptr ThreadCount() const override;
   bool ContainsThread(thread_t thread) const;
@@ -111,7 +111,7 @@ typedef x86_thread_state32_t regs_struct;
 #error "Unsupported architecture"
 #endif
 
-tid_t SuspendedThreadsListMac::GetThreadID(uptr index) const {
+ThreadID SuspendedThreadsListMac::GetThreadID(uptr index) const {
   CHECK_LT(index, threads_.size());
   return threads_[index].tid;
 }
diff --git a/lib/libtsan/sanitizer_common/sanitizer_stoptheworld_netbsd_libcdep.cpp b/lib/libtsan/sanitizer_common/sanitizer_stoptheworld_netbsd_libcdep.cpp
index 58a0cfdbf9..33d603fec8 100644
--- a/lib/libtsan/sanitizer_common/sanitizer_stoptheworld_netbsd_libcdep.cpp
+++ b/lib/libtsan/sanitizer_common/sanitizer_stoptheworld_netbsd_libcdep.cpp
@@ -52,17 +52,17 @@ class SuspendedThreadsListNetBSD final : public SuspendedThreadsList {
  public:
   SuspendedThreadsListNetBSD() { thread_ids_.reserve(1024); }
 
-  tid_t GetThreadID(uptr index) const;
+  ThreadID GetThreadID(uptr index) const;
   uptr ThreadCount() const;
-  bool ContainsTid(tid_t thread_id) const;
-  void Append(tid_t tid);
+  bool ContainsTid(ThreadID thread_id) const;
+  void Append(ThreadID tid);
 
   PtraceRegistersStatus GetRegistersAndSP(uptr index,
                                           InternalMmapVector<uptr> *buffer,
                                           uptr *sp) const;
 
  private:
-  InternalMmapVector<tid_t> thread_ids_;
+  InternalMmapVector<ThreadID> thread_ids_;
 };
 
 struct TracerThreadArgument {
@@ -313,7 +313,7 @@ void StopTheWorld(StopTheWorldCallback callback, void *argument) {
   }
 }
 
-tid_t SuspendedThreadsListNetBSD::GetThreadID(uptr index) const {
+ThreadID SuspendedThreadsListNetBSD::GetThreadID(uptr index) const {
   CHECK_LT(index, thread_ids_.size());
   return thread_ids_[index];
 }
@@ -322,7 +322,7 @@ uptr SuspendedThreadsListNetBSD::ThreadCount() const {
   return thread_ids_.size();
 }
 
-bool SuspendedThreadsListNetBSD::ContainsTid(tid_t thread_id) const {
+bool SuspendedThreadsListNetBSD::ContainsTid(ThreadID thread_id) const {
   for (uptr i = 0; i < thread_ids_.size(); i++) {
     if (thread_ids_[i] == thread_id)
       return true;
@@ -330,7 +330,7 @@ bool SuspendedThreadsListNetBSD::ContainsTid(tid_t thread_id) const {
   return false;
 }
 
-void SuspendedThreadsListNetBSD::Append(tid_t tid) {
+void SuspendedThreadsListNetBSD::Append(ThreadID tid) {
   thread_ids_.push_back(tid);
 }
 
diff --git a/lib/libtsan/sanitizer_common/sanitizer_stoptheworld_win.cpp b/lib/libtsan/sanitizer_common/sanitizer_stoptheworld_win.cpp
index fa15f8a9f0..43df59544d 100644
--- a/lib/libtsan/sanitizer_common/sanitizer_stoptheworld_win.cpp
+++ b/lib/libtsan/sanitizer_common/sanitizer_stoptheworld_win.cpp
@@ -38,7 +38,7 @@ struct SuspendedThreadsListWindows final : public SuspendedThreadsList {
                                           InternalMmapVector<uptr> *buffer,
                                           uptr *sp) const override;
 
-  tid_t GetThreadID(uptr index) const override;
+  ThreadID GetThreadID(uptr index) const override;
   uptr ThreadCount() const override;
 };
 
@@ -68,7 +68,7 @@ PtraceRegistersStatus SuspendedThreadsListWindows::GetRegistersAndSP(
   return REGISTERS_AVAILABLE;
 }
 
-tid_t SuspendedThreadsListWindows::GetThreadID(uptr index) const {
+ThreadID SuspendedThreadsListWindows::GetThreadID(uptr index) const {
   CHECK_LT(index, threadIds.size());
   return threadIds[index];
 }
diff --git a/lib/libtsan/sanitizer_common/sanitizer_symbolizer_internal.h b/lib/libtsan/sanitizer_common/sanitizer_symbolizer_internal.h
index 2345aee985..6442a2980b 100644
--- a/lib/libtsan/sanitizer_common/sanitizer_symbolizer_internal.h
+++ b/lib/libtsan/sanitizer_common/sanitizer_symbolizer_internal.h
@@ -83,7 +83,7 @@ class SymbolizerProcess {
   const char *SendCommand(const char *command);
 
  protected:
-  ~SymbolizerProcess() {}
+  ~SymbolizerProcess();
 
   /// The maximum number of arguments required to invoke a tool process.
   static const unsigned kArgVMax = 16;
@@ -114,6 +114,10 @@ class SymbolizerProcess {
   fd_t input_fd_;
   fd_t output_fd_;
 
+  // We hold on to the child's stdin fd (the read end of the pipe)
+  // so that when we write to it, we don't get a SIGPIPE
+  fd_t child_stdin_fd_;
+
   InternalMmapVector<char> buffer_;
 
   static const uptr kMaxTimesRestarted = 5;
diff --git a/lib/libtsan/sanitizer_common/sanitizer_symbolizer_libcdep.cpp b/lib/libtsan/sanitizer_common/sanitizer_symbolizer_libcdep.cpp
index 565701c85d..cc31d3d805 100644
--- a/lib/libtsan/sanitizer_common/sanitizer_symbolizer_libcdep.cpp
+++ b/lib/libtsan/sanitizer_common/sanitizer_symbolizer_libcdep.cpp
@@ -476,10 +476,11 @@ const char *LLVMSymbolizer::FormatAndSendCommand(const char *command_prefix,
   return symbolizer_process_->SendCommand(buffer_);
 }
 
-SymbolizerProcess::SymbolizerProcess(const char *path, bool use_posix_spawn)
+SymbolizerProcess::SymbolizerProcess(const char* path, bool use_posix_spawn)
     : path_(path),
       input_fd_(kInvalidFd),
       output_fd_(kInvalidFd),
+      child_stdin_fd_(kInvalidFd),
       times_restarted_(0),
       failed_to_start_(false),
       reported_invalid_path_(false),
@@ -488,6 +489,11 @@ SymbolizerProcess::SymbolizerProcess(const char *path, bool use_posix_spawn)
   CHECK_NE(path_[0], '\0');
 }
 
+SymbolizerProcess::~SymbolizerProcess() {
+  if (child_stdin_fd_ != kInvalidFd)
+    CloseFile(child_stdin_fd_);
+}
+
 static bool IsSameModule(const char *path) {
   if (const char *ProcessName = GetProcessName()) {
     if (const char *SymbolizerName = StripModuleName(path)) {
@@ -533,6 +539,10 @@ bool SymbolizerProcess::Restart() {
     CloseFile(input_fd_);
   if (output_fd_ != kInvalidFd)
     CloseFile(output_fd_);
+  if (child_stdin_fd_ != kInvalidFd) {
+    CloseFile(child_stdin_fd_);
+    child_stdin_fd_ = kInvalidFd;  // Don't free in destructor
+  }
   return StartSymbolizerSubprocess();
 }
 
diff --git a/lib/libtsan/sanitizer_common/sanitizer_symbolizer_mac.cpp b/lib/libtsan/sanitizer_common/sanitizer_symbolizer_mac.cpp
index 88536fc4e6..d3259984b1 100644
--- a/lib/libtsan/sanitizer_common/sanitizer_symbolizer_mac.cpp
+++ b/lib/libtsan/sanitizer_common/sanitizer_symbolizer_mac.cpp
@@ -78,13 +78,25 @@ class AtosSymbolizerProcess final : public SymbolizerProcess {
   }
 
   bool ReachedEndOfOutput(const char *buffer, uptr length) const override {
-    return (length >= 1 && buffer[length - 1] == '\n');
+    if (common_flags()->symbolize_inline_frames) {
+      // When running with -i, atos sends two newlines at the end of each
+      // address it symbolizes. This indicates the end of the set of frames
+      // for a particular address.
+      return length >= 2 && buffer[length - 1] == '\n' &&
+             buffer[length - 2] == '\n';
+    } else {
+      // When running without -i, atos only sends a single newline at
+      // the end of each address it symbolizes.
+      return length >= 1 && buffer[length - 1] == '\n';
+    }
   }
 
   void GetArgV(const char *path_to_binary,
                const char *(&argv)[kArgVMax]) const override {
     int i = 0;
     argv[i++] = path_to_binary;
+    if (common_flags()->symbolize_inline_frames)
+      argv[i++] = "-i";
     argv[i++] = "-p";
     argv[i++] = &pid_str_[0];
     if (GetMacosAlignedVersion() == MacosVersion(10, 9)) {
@@ -102,12 +114,16 @@ class AtosSymbolizerProcess final : public SymbolizerProcess {
 
 #undef K_ATOS_ENV_VAR
 
-static bool ParseCommandOutput(const char *str, uptr addr, char **out_name,
-                               char **out_module, char **out_file, uptr *line,
-                               uptr *start_address) {
+// Parses a single frame (one line) from str, and returns the pointer to the
+// next character to parse (i.e. after the newline) if successful. If
+// it fails, returns NULL.
+static const char* ParseCommandOutput(const char* str, uptr addr,
+                                      char** out_name, char** out_module,
+                                      char** out_file, uptr* line,
+                                      uptr* start_address) {
   // Trim ending newlines.
   char *trim;
-  ExtractTokenUpToDelimiter(str, "\n", &trim);
+  str = ExtractTokenUpToDelimiter(str, "\n", &trim);
 
   // The line from `atos` is in one of these formats:
   //   myfunction (in library.dylib) (sourcefile.c:17)
@@ -124,7 +140,7 @@ static bool ParseCommandOutput(const char *str, uptr addr, char **out_name,
   if (rest[0] == '\0') {
     InternalFree(symbol_name);
     InternalFree(trim);
-    return false;
+    return NULL;
   }
 
   if (internal_strncmp(symbol_name, "0x", 2) != 0)
@@ -149,7 +165,7 @@ static bool ParseCommandOutput(const char *str, uptr addr, char **out_name,
   }
 
   InternalFree(trim);
-  return true;
+  return str;
 }
 
 AtosSymbolizer::AtosSymbolizer(const char *path, LowLevelAllocator *allocator)
@@ -161,31 +177,72 @@ bool AtosSymbolizer::SymbolizePC(uptr addr, SymbolizedStack *stack) {
   char command[32];
   internal_snprintf(command, sizeof(command), "0x%zx\n", addr);
   const char *buf = process_->SendCommand(command);
-  if (!buf) return false;
-  uptr line;
-  uptr start_address = AddressInfo::kUnknown;
-  if (!ParseCommandOutput(buf, addr, &stack->info.function, &stack->info.module,
-                          &stack->info.file, &line, &start_address)) {
-    Report("WARNING: atos failed to symbolize address \"0x%zx\"\n", addr);
+  if (!buf)
     return false;
-  }
-  stack->info.line = (int)line;
 
-  if (start_address == AddressInfo::kUnknown) {
-    // Fallback to dladdr() to get function start address if atos doesn't report
-    // it.
-    Dl_info info;
-    int result = dladdr((const void *)addr, &info);
-    if (result)
-      start_address = reinterpret_cast<uptr>(info.dli_saddr);
+  SymbolizedStack* last = stack;
+  bool top_frame = true;
+
+  // Parse one line of input (i.e. one frame).
+  //
+  // When symbolize_inline_frames=true, an empty line
+  // (i.e. \n at the beginning of a line) indicates that the last
+  // frame has been sent.
+  //
+  // When symbolize_inline_frames=false, the symbolizer will send only
+  // one frame (without a empty line), so loop runs exactly once
+  // and hits an early `break`.
+  while (*buf != '\n') {
+    uptr line;
+    uptr start_address = AddressInfo::kUnknown;
+
+    SymbolizedStack* cur;
+    if (top_frame) {
+      cur = stack;
+    } else {
+      cur = SymbolizedStack::New(stack->info.address);
+      cur->info.FillModuleInfo(stack->info.module, stack->info.module_offset,
+                               stack->info.module_arch);
+      last->next = cur;
+      last = cur;
+    }
+
+    // Parse one line of input (i.e. one frame)
+    // If this succeeds, buf will be updated to point to the first character
+    // after the newline.
+    buf = ParseCommandOutput(buf, addr, &cur->info.function, &cur->info.module,
+                             &cur->info.file, &line, &start_address);
+
+    // Upon failure, ParseCommandOutput returns NULL.
+    if (!buf) {
+      Report("WARNING: atos failed to symbolize address \"0x%zx\"\n", addr);
+      return false;
+    }
+    cur->info.line = (int)line;
+
+    if (top_frame && start_address == AddressInfo::kUnknown) {
+      // Fallback to dladdr() to get function start address if atos doesn't
+      // report it.
+      Dl_info info;
+      int result = dladdr((const void*)addr, &info);
+      if (result)
+        start_address = reinterpret_cast<uptr>(info.dli_saddr);
+    }
+
+    // Only assign to `function_offset` if we were able to get the function's
+    // start address and we got a sensible `start_address` (dladdr doesn't
+    // always ensure that `addr >= sym_addr`).
+    if (start_address != AddressInfo::kUnknown && addr >= start_address) {
+      cur->info.function_offset = addr - start_address;
+    }
+
+    // atos only sends one line when inline frames are off
+    if (!common_flags()->symbolize_inline_frames)
+      break;
+
+    top_frame = false;
   }
 
-  // Only assign to `function_offset` if we were able to get the function's
-  // start address and we got a sensible `start_address` (dladdr doesn't always
-  // ensure that `addr >= sym_addr`).
-  if (start_address != AddressInfo::kUnknown && addr >= start_address) {
-    stack->info.function_offset = addr - start_address;
-  }
   return true;
 }
 
diff --git a/lib/libtsan/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp b/lib/libtsan/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp
index f8d821e125..ab6aee7c9f 100644
--- a/lib/libtsan/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp
+++ b/lib/libtsan/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp
@@ -156,30 +156,34 @@ bool SymbolizerProcess::StartSymbolizerSubprocess() {
     Printf("\n");
   }
 
+  fd_t infd[2] = {}, outfd[2] = {};
+  if (!CreateTwoHighNumberedPipes(infd, outfd)) {
+    Report(
+        "WARNING: Can't create a socket pair to start "
+        "external symbolizer (errno: %d)\n",
+        errno);
+    return false;
+  }
+
   if (use_posix_spawn_) {
 #  if SANITIZER_APPLE
-    fd_t fd = internal_spawn(argv, const_cast<const char **>(GetEnvP()), &pid);
-    if (fd == kInvalidFd) {
+    bool success = internal_spawn(argv, const_cast<const char**>(GetEnvP()),
+                                  &pid, outfd[0], infd[1]);
+    if (!success) {
       Report("WARNING: failed to spawn external symbolizer (errno: %d)\n",
              errno);
+      internal_close(infd[0]);
+      internal_close(outfd[1]);
       return false;
     }
 
-    input_fd_ = fd;
-    output_fd_ = fd;
+    // We intentionally hold on to the read-end so that we don't get a SIGPIPE
+    child_stdin_fd_ = outfd[0];
+
 #  else   // SANITIZER_APPLE
     UNIMPLEMENTED();
 #  endif  // SANITIZER_APPLE
   } else {
-    fd_t infd[2] = {}, outfd[2] = {};
-    if (!CreateTwoHighNumberedPipes(infd, outfd)) {
-      Report(
-          "WARNING: Can't create a socket pair to start "
-          "external symbolizer (errno: %d)\n",
-          errno);
-      return false;
-    }
-
     pid = StartSubprocess(path_, argv, GetEnvP(), /* stdin */ outfd[0],
                           /* stdout */ infd[1]);
     if (pid < 0) {
@@ -187,11 +191,11 @@ bool SymbolizerProcess::StartSymbolizerSubprocess() {
       internal_close(outfd[1]);
       return false;
     }
-
-    input_fd_ = infd[0];
-    output_fd_ = outfd[1];
   }
 
+  input_fd_ = infd[0];
+  output_fd_ = outfd[1];
+
   CHECK_GT(pid, 0);
 
   // Check that symbolizer subprocess started successfully.
@@ -505,6 +509,13 @@ static void ChooseSymbolizerTools(IntrusiveList<SymbolizerTool> *list,
   }
 
 #  if SANITIZER_APPLE
+  if (list->empty()) {
+    Report(
+        "WARN: No external symbolizers found. Symbols may be missing or "
+        "unreliable.\n");
+    Report(
+        "HINT: Is PATH set? Does sandbox allow file-read of /usr/bin/atos?\n");
+  }
   VReport(2, "Using dladdr symbolizer.\n");
   list->push_back(new (*allocator) DlAddrSymbolizer());
 #  endif  // SANITIZER_APPLE
diff --git a/lib/libtsan/sanitizer_common/sanitizer_thread_registry.cpp b/lib/libtsan/sanitizer_common/sanitizer_thread_registry.cpp
index cdc24f4a88..d726d28243 100644
--- a/lib/libtsan/sanitizer_common/sanitizer_thread_registry.cpp
+++ b/lib/libtsan/sanitizer_common/sanitizer_thread_registry.cpp
@@ -80,7 +80,7 @@ void ThreadContextBase::SetFinished() {
   OnFinished();
 }
 
-void ThreadContextBase::SetStarted(tid_t _os_id, ThreadType _thread_type,
+void ThreadContextBase::SetStarted(ThreadID _os_id, ThreadType _thread_type,
                                    void *arg) {
   status = ThreadStatusRunning;
   os_id = _os_id;
@@ -228,7 +228,8 @@ static bool FindThreadContextByOsIdCallback(ThreadContextBase *tctx,
           tctx->status != ThreadStatusDead);
 }
 
-ThreadContextBase *ThreadRegistry::FindThreadContextByOsIDLocked(tid_t os_id) {
+ThreadContextBase *ThreadRegistry::FindThreadContextByOsIDLocked(
+    ThreadID os_id) {
   return FindThreadContextLocked(FindThreadContextByOsIdCallback,
                                  (void *)os_id);
 }
@@ -322,8 +323,8 @@ ThreadStatus ThreadRegistry::FinishThread(u32 tid) {
   return prev_status;
 }
 
-void ThreadRegistry::StartThread(u32 tid, tid_t os_id, ThreadType thread_type,
-                                 void *arg) {
+void ThreadRegistry::StartThread(u32 tid, ThreadID os_id,
+                                 ThreadType thread_type, void *arg) {
   ThreadRegistryLock l(this);
   running_threads_++;
   ThreadContextBase *tctx = threads_[tid];
diff --git a/lib/libtsan/sanitizer_common/sanitizer_thread_registry.h b/lib/libtsan/sanitizer_common/sanitizer_thread_registry.h
index e06abb3932..8adc420c8c 100644
--- a/lib/libtsan/sanitizer_common/sanitizer_thread_registry.h
+++ b/lib/libtsan/sanitizer_common/sanitizer_thread_registry.h
@@ -43,7 +43,7 @@ class ThreadContextBase {
   const u32 tid;  // Thread ID. Main thread should have tid = 0.
   u64 unique_id;  // Unique thread ID.
   u32 reuse_count;  // Number of times this tid was reused.
-  tid_t os_id;     // PID (used for reporting).
+  ThreadID os_id;   // PID (used for reporting).
   uptr user_id;   // Some opaque user thread id (e.g. pthread_t).
   char name[64];  // As annotated by user.
 
@@ -62,7 +62,7 @@ class ThreadContextBase {
   void SetDead();
   void SetJoined(void *arg);
   void SetFinished();
-  void SetStarted(tid_t _os_id, ThreadType _thread_type, void *arg);
+  void SetStarted(ThreadID _os_id, ThreadType _thread_type, void *arg);
   void SetCreated(uptr _user_id, u64 _unique_id, bool _detached,
                   u32 _parent_tid, u32 _stack_tid, void *arg);
   void Reset();
@@ -126,7 +126,7 @@ class SANITIZER_MUTEX ThreadRegistry {
   // is found.
   ThreadContextBase *FindThreadContextLocked(FindThreadCallback cb,
                                              void *arg);
-  ThreadContextBase *FindThreadContextByOsIDLocked(tid_t os_id);
+  ThreadContextBase *FindThreadContextByOsIDLocked(ThreadID os_id);
 
   void SetThreadName(u32 tid, const char *name);
   void SetThreadNameByUserId(uptr user_id, const char *name);
@@ -134,7 +134,7 @@ class SANITIZER_MUTEX ThreadRegistry {
   void JoinThread(u32 tid, void *arg);
   // Finishes thread and returns previous status.
   ThreadStatus FinishThread(u32 tid);
-  void StartThread(u32 tid, tid_t os_id, ThreadType thread_type, void *arg);
+  void StartThread(u32 tid, ThreadID os_id, ThreadType thread_type, void *arg);
   u32 ConsumeThreadUserId(uptr user_id);
   void SetThreadUserId(u32 tid, uptr user_id);
 
diff --git a/lib/libtsan/sanitizer_common/sanitizer_win.cpp b/lib/libtsan/sanitizer_common/sanitizer_win.cpp
index 48ebe78c40..ed4f60deef 100644
--- a/lib/libtsan/sanitizer_common/sanitizer_win.cpp
+++ b/lib/libtsan/sanitizer_common/sanitizer_win.cpp
@@ -108,9 +108,7 @@ int internal_dlinfo(void *handle, int request, void *p) {
 
 // In contrast to POSIX, on Windows GetCurrentThreadId()
 // returns a system-unique identifier.
-tid_t GetTid() {
-  return GetCurrentThreadId();
-}
+ThreadID GetTid() { return GetCurrentThreadId(); }
 
 uptr GetThreadSelf() {
   return GetTid();
diff --git a/lib/libtsan/tsan_debugging.cpp b/lib/libtsan/tsan_debugging.cpp
index 41fa293dba..b3422af756 100644
--- a/lib/libtsan/tsan_debugging.cpp
+++ b/lib/libtsan/tsan_debugging.cpp
@@ -165,7 +165,7 @@ int __tsan_get_report_mutex(void *report, uptr idx, uptr *mutex_id, void **addr,
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
-int __tsan_get_report_thread(void *report, uptr idx, int *tid, tid_t *os_id,
+int __tsan_get_report_thread(void *report, uptr idx, int *tid, ThreadID *os_id,
                              int *running, const char **name, int *parent_tid,
                              void **trace, uptr trace_size) {
   const ReportDesc *rep = (ReportDesc *)report;
@@ -242,7 +242,7 @@ const char *__tsan_locate_address(uptr addr, char *name, uptr name_size,
 
 SANITIZER_INTERFACE_ATTRIBUTE
 int __tsan_get_alloc_stack(uptr addr, uptr *trace, uptr size, int *thread_id,
-                           tid_t *os_id) {
+                           ThreadID *os_id) {
   MBlock *b = 0;
   Allocator *a = allocator();
   if (a->PointerIsMine((void *)addr)) {
diff --git a/lib/libtsan/tsan_flags.cpp b/lib/libtsan/tsan_flags.cpp
index 3fd58f4698..efaaef8b7a 100644
--- a/lib/libtsan/tsan_flags.cpp
+++ b/lib/libtsan/tsan_flags.cpp
@@ -20,6 +20,43 @@
 #include "tsan_rtl.h"
 #include "ubsan/ubsan_flags.h"
 
+#if SANITIZER_APPLE && !SANITIZER_GO
+namespace __sanitizer {
+
+template <>
+inline bool FlagHandler<LockDuringWriteSetting>::Parse(const char *value) {
+  if (internal_strcmp(value, "on") == 0) {
+    *t_ = kLockDuringAllWrites;
+    return true;
+  }
+  if (internal_strcmp(value, "disable_for_current_process") == 0) {
+    *t_ = kNoLockDuringWritesCurrentProcess;
+    return true;
+  }
+  if (internal_strcmp(value, "disable_for_all_processes") == 0) {
+    *t_ = kNoLockDuringWritesAllProcesses;
+    return true;
+  }
+  Printf("ERROR: Invalid value for signal handler option: '%s'\n", value);
+  return false;
+}
+
+template <>
+inline bool FlagHandler<LockDuringWriteSetting>::Format(char *buffer,
+                                                        uptr size) {
+  switch (*t_) {
+    case kLockDuringAllWrites:
+      return FormatString(buffer, size, "on");
+    case kNoLockDuringWritesCurrentProcess:
+      return FormatString(buffer, size, "disable_for_current_process");
+    case kNoLockDuringWritesAllProcesses:
+      return FormatString(buffer, size, "disable_for_all_processes");
+  }
+}
+
+}  // namespace __sanitizer
+#endif  // SANITIZER_APPLE && !SANITIZER_GO
+
 namespace __tsan {
 
 // Can be overriden in frontend.
diff --git a/lib/libtsan/tsan_flags.h b/lib/libtsan/tsan_flags.h
index da27d5b992..e63d7c405a 100644
--- a/lib/libtsan/tsan_flags.h
+++ b/lib/libtsan/tsan_flags.h
@@ -16,6 +16,14 @@
 #include "sanitizer_common/sanitizer_flags.h"
 #include "sanitizer_common/sanitizer_deadlock_detector_interface.h"
 
+#if SANITIZER_APPLE && !SANITIZER_GO
+enum LockDuringWriteSetting {
+  kLockDuringAllWrites,
+  kNoLockDuringWritesCurrentProcess,
+  kNoLockDuringWritesAllProcesses,
+};
+#endif
+
 namespace __tsan {
 
 struct Flags : DDFlags {
diff --git a/lib/libtsan/tsan_flags.inc b/lib/libtsan/tsan_flags.inc
index 731d776cc8..77ab910f08 100644
--- a/lib/libtsan/tsan_flags.inc
+++ b/lib/libtsan/tsan_flags.inc
@@ -80,3 +80,15 @@ TSAN_FLAG(bool, shared_ptr_interceptor, true,
 TSAN_FLAG(bool, print_full_thread_history, false,
           "If set, prints thread creation stacks for the threads involved in "
           "the report and their ancestors up to the main thread.")
+
+#if SANITIZER_APPLE && !SANITIZER_GO
+TSAN_FLAG(LockDuringWriteSetting, lock_during_write, kLockDuringAllWrites,
+          "Determines whether to obtain a lock while writing logs or error "
+          "reports. "
+          "\"on\" - [default] lock during all writes. "
+          "\"disable_for_current_process\" - don't lock during all writes in "
+          "the current process, but do lock for all writes in child "
+          "processes."
+          "\"disable_for_all_processes\" - don't lock during all writes in "
+          "the current process and it's children processes.")
+#endif
diff --git a/lib/libtsan/tsan_interceptors.h b/lib/libtsan/tsan_interceptors.h
index a357a870fd..f8cc8ff3b4 100644
--- a/lib/libtsan/tsan_interceptors.h
+++ b/lib/libtsan/tsan_interceptors.h
@@ -1,6 +1,9 @@
 #ifndef TSAN_INTERCEPTORS_H
 #define TSAN_INTERCEPTORS_H
 
+#if SANITIZER_APPLE && !SANITIZER_GO
+#  include "sanitizer_common/sanitizer_mac.h"
+#endif
 #include "sanitizer_common/sanitizer_stacktrace.h"
 #include "tsan_rtl.h"
 
@@ -43,7 +46,12 @@ inline bool in_symbolizer() {
 #endif
 
 inline bool MustIgnoreInterceptor(ThreadState *thr) {
-  return !thr->is_inited || thr->ignore_interceptors || thr->in_ignored_lib;
+  return !thr->is_inited || thr->ignore_interceptors || thr->in_ignored_lib
+#if SANITIZER_APPLE && !SANITIZER_GO
+         || (flags()->lock_during_write != kLockDuringAllWrites &&
+             thr->in_internal_write_call)
+#endif
+      ;
 }
 
 }  // namespace __tsan
diff --git a/lib/libtsan/tsan_interceptors_mac.cpp b/lib/libtsan/tsan_interceptors_mac.cpp
index 978664411f..c5e12b472a 100644
--- a/lib/libtsan/tsan_interceptors_mac.cpp
+++ b/lib/libtsan/tsan_interceptors_mac.cpp
@@ -281,6 +281,25 @@ TSAN_INTERCEPTOR(void, os_unfair_lock_lock, os_unfair_lock_t lock) {
   Acquire(thr, pc, (uptr)lock);
 }
 
+// os_unfair_lock_lock_with_flags was introduced in macOS 15
+#  if defined(__MAC_15_0) || defined(__IPHONE_18_0) || defined(__TVOS_18_0) || \
+      defined(__VISIONOS_2_0) || defined(__WATCHOS_11_0)
+#    pragma clang diagnostic push
+#    pragma clang diagnostic ignored "-Wunguarded-availability-new"
+// We're just intercepting this - if it doesn't exist on the platform, then the
+// process shouldn't have called it in the first place.
+TSAN_INTERCEPTOR(void, os_unfair_lock_lock_with_flags, os_unfair_lock_t lock,
+                 os_unfair_lock_flags_t flags) {
+  if (!cur_thread()->is_inited || cur_thread()->is_dead) {
+    return REAL(os_unfair_lock_lock_with_flags)(lock, flags);
+  }
+  SCOPED_TSAN_INTERCEPTOR(os_unfair_lock_lock_with_flags, lock, flags);
+  REAL(os_unfair_lock_lock_with_flags)(lock, flags);
+  Acquire(thr, pc, (uptr)lock);
+}
+#    pragma clang diagnostic pop
+#  endif
+
 TSAN_INTERCEPTOR(void, os_unfair_lock_lock_with_options, os_unfair_lock_t lock,
                  u32 options) {
   if (!cur_thread()->is_inited || cur_thread()->is_dead) {
diff --git a/lib/libtsan/tsan_interceptors_posix.cpp b/lib/libtsan/tsan_interceptors_posix.cpp
index 14b25a8995..714220a010 100644
--- a/lib/libtsan/tsan_interceptors_posix.cpp
+++ b/lib/libtsan/tsan_interceptors_posix.cpp
@@ -22,6 +22,7 @@
 #include "sanitizer_common/sanitizer_internal_defs.h"
 #include "sanitizer_common/sanitizer_libc.h"
 #include "sanitizer_common/sanitizer_linux.h"
+#include "sanitizer_common/sanitizer_placement_new.h"
 #include "sanitizer_common/sanitizer_platform_interceptors.h"
 #include "sanitizer_common/sanitizer_platform_limits_netbsd.h"
 #include "sanitizer_common/sanitizer_platform_limits_posix.h"
@@ -30,6 +31,9 @@
 #include "sanitizer_common/sanitizer_tls_get_addr.h"
 #include "sanitizer_common/sanitizer_vector.h"
 #include "tsan_fd.h"
+#if SANITIZER_APPLE && !SANITIZER_GO
+#  include "tsan_flags.h"
+#endif
 #include "tsan_interceptors.h"
 #include "tsan_interface.h"
 #include "tsan_mman.h"
@@ -78,17 +82,6 @@ struct ucontext_t {
 };
 #endif
 
-#if defined(__x86_64__) || defined(__mips__) || SANITIZER_PPC64V1 || \
-    defined(__s390x__)
-#define PTHREAD_ABI_BASE  "GLIBC_2.3.2"
-#elif defined(__aarch64__) || SANITIZER_PPC64V2
-#define PTHREAD_ABI_BASE  "GLIBC_2.17"
-#elif SANITIZER_LOONGARCH64
-#define PTHREAD_ABI_BASE  "GLIBC_2.36"
-#elif SANITIZER_RISCV64
-#  define PTHREAD_ABI_BASE "GLIBC_2.27"
-#endif
-
 extern "C" int pthread_attr_init(void *attr);
 extern "C" int pthread_attr_destroy(void *attr);
 DECLARE_REAL(int, pthread_attr_getdetachstate, void *, void *)
@@ -340,11 +333,6 @@ void ScopedInterceptor::DisableIgnoresImpl() {
 }
 
 #define TSAN_INTERCEPT(func) INTERCEPT_FUNCTION(func)
-#if SANITIZER_FREEBSD || SANITIZER_NETBSD
-#  define TSAN_INTERCEPT_VER(func, ver) INTERCEPT_FUNCTION(func)
-#else
-#  define TSAN_INTERCEPT_VER(func, ver) INTERCEPT_FUNCTION_VER(func, ver)
-#endif
 #if SANITIZER_FREEBSD
 #  define TSAN_MAYBE_INTERCEPT_FREEBSD_ALIAS(func) \
     INTERCEPT_FUNCTION(_pthread_##func)
@@ -1145,6 +1133,22 @@ TSAN_INTERCEPTOR(int, pthread_create,
 
 TSAN_INTERCEPTOR(int, pthread_join, void *th, void **ret) {
   SCOPED_INTERCEPTOR_RAW(pthread_join, th, ret);
+#if SANITIZER_ANDROID
+  {
+    // In Bionic, if the target thread has already exited when pthread_detach is
+    // called, pthread_detach will call pthread_join internally to clean it up.
+    // In that case, the thread has already been consumed by the pthread_detach
+    // interceptor.
+    Tid tid = ctx->thread_registry.FindThread(
+        [](ThreadContextBase* tctx, void* arg) {
+          return tctx->user_id == (uptr)arg;
+        },
+        th);
+    if (tid == kInvalidTid) {
+      return REAL(pthread_join)(th, ret);
+    }
+  }
+#endif
   Tid tid = ThreadConsumeTid(thr, pc, (uptr)th);
   ThreadIgnoreBegin(thr, pc);
   int res = BLOCK_REAL(pthread_join)(th, ret);
@@ -1664,6 +1668,14 @@ TSAN_INTERCEPTOR(int, pthread_barrier_wait, void *b) {
 
 TSAN_INTERCEPTOR(int, pthread_once, void *o, void (*f)()) {
   SCOPED_INTERCEPTOR_RAW(pthread_once, o, f);
+#if SANITIZER_APPLE && !SANITIZER_GO
+  if (flags()->lock_during_write != kLockDuringAllWrites &&
+      cur_thread_init()->in_internal_write_call) {
+    // This is needed to make it through process launch without hanging
+    f();
+    return 0;
+  }
+#endif
   if (o == 0 || f == 0)
     return errno_EINVAL;
   atomic_uint32_t *a;
@@ -2141,13 +2153,29 @@ static void ReportErrnoSpoiling(ThreadState *thr, uptr pc, int sig) {
   // StackTrace::GetNestInstructionPc(pc) is used because return address is
   // expected, OutputReport() will undo this.
   ObtainCurrentStack(thr, StackTrace::GetNextInstructionPc(pc), &stack);
-  ThreadRegistryLock l(&ctx->thread_registry);
-  ScopedReport rep(ReportTypeErrnoInSignal);
-  rep.SetSigNum(sig);
-  if (!IsFiredSuppression(ctx, ReportTypeErrnoInSignal, stack)) {
-    rep.AddStack(stack, true);
-    OutputReport(thr, rep);
+  // Use alloca, because malloc during signal handling deadlocks
+  ScopedReport *rep = (ScopedReport *)__builtin_alloca(sizeof(ScopedReport));
+  bool suppressed;
+  // Take a new scope as Apple platforms require the below locks released
+  // before symbolizing in order to avoid a deadlock
+  {
+    ThreadRegistryLock l(&ctx->thread_registry);
+    new (rep) ScopedReport(ReportTypeErrnoInSignal);
+    rep->SetSigNum(sig);
+    suppressed = IsFiredSuppression(ctx, ReportTypeErrnoInSignal, stack);
+    if (!suppressed)
+      rep->AddStack(stack, true);
+#if SANITIZER_APPLE
+  }  // Close this scope to release the locks before writing report
+#endif
+    if (!suppressed)
+      OutputReport(thr, *rep);
+
+    // Need to manually destroy this because we used placement new to allocate
+    rep->~ScopedReport();
+#if !SANITIZER_APPLE
   }
+#endif
 }
 
 static void CallUserSignalHandler(ThreadState *thr, bool sync, bool acquire,
@@ -2411,7 +2439,11 @@ TSAN_INTERCEPTOR(int, vfork, int fake) {
 }
 #endif
 
-#if SANITIZER_LINUX
+#if SANITIZER_LINUX && !SANITIZER_ANDROID
+// Bionic's pthread_create internally calls clone. When the CLONE_THREAD flag is
+// set, clone does not create a new process but a new thread. This is a
+// workaround for Android. Disabling the interception of clone solves the
+// problem in most scenarios.
 TSAN_INTERCEPTOR(int, clone, int (*fn)(void *), void *stack, int flags,
                  void *arg, int *parent_tid, void *tls, pid_t *child_tid) {
   SCOPED_INTERCEPTOR_RAW(clone, fn, stack, flags, arg, parent_tid, tls,
@@ -2888,12 +2920,12 @@ TSAN_INTERCEPTOR(void, _lwp_exit) {
 #endif
 
 #if SANITIZER_FREEBSD
-TSAN_INTERCEPTOR(void, thr_exit, tid_t *state) {
+TSAN_INTERCEPTOR(void, thr_exit, ThreadID *state) {
   SCOPED_TSAN_INTERCEPTOR(thr_exit, state);
   DestroyThreadState();
   REAL(thr_exit(state));
 }
-#define TSAN_MAYBE_INTERCEPT_THR_EXIT TSAN_INTERCEPT(thr_exit)
+#  define TSAN_MAYBE_INTERCEPT_THR_EXIT TSAN_INTERCEPT(thr_exit)
 #else
 #define TSAN_MAYBE_INTERCEPT_THR_EXIT
 #endif
@@ -3024,12 +3056,26 @@ void InitializeInterceptors() {
   TSAN_INTERCEPT(pthread_timedjoin_np);
   #endif
 
-  TSAN_INTERCEPT_VER(pthread_cond_init, PTHREAD_ABI_BASE);
-  TSAN_INTERCEPT_VER(pthread_cond_signal, PTHREAD_ABI_BASE);
-  TSAN_INTERCEPT_VER(pthread_cond_broadcast, PTHREAD_ABI_BASE);
-  TSAN_INTERCEPT_VER(pthread_cond_wait, PTHREAD_ABI_BASE);
-  TSAN_INTERCEPT_VER(pthread_cond_timedwait, PTHREAD_ABI_BASE);
-  TSAN_INTERCEPT_VER(pthread_cond_destroy, PTHREAD_ABI_BASE);
+  // In glibc versions older than 2.36, dlsym(RTLD_NEXT, "pthread_cond_init")
+  // may return an outdated symbol (max(2.2,base_version)) if the port was
+  // introduced before 2.3.2 (when the new pthread_cond_t was introduced).
+#if SANITIZER_GLIBC && !__GLIBC_PREREQ(2, 36) &&                      \
+    (defined(__x86_64__) || defined(__mips__) || SANITIZER_PPC64V1 || \
+     defined(__s390x__))
+  INTERCEPT_FUNCTION_VER(pthread_cond_init, "GLIBC_2.3.2");
+  INTERCEPT_FUNCTION_VER(pthread_cond_signal, "GLIBC_2.3.2");
+  INTERCEPT_FUNCTION_VER(pthread_cond_broadcast, "GLIBC_2.3.2");
+  INTERCEPT_FUNCTION_VER(pthread_cond_wait, "GLIBC_2.3.2");
+  INTERCEPT_FUNCTION_VER(pthread_cond_timedwait, "GLIBC_2.3.2");
+  INTERCEPT_FUNCTION_VER(pthread_cond_destroy, "GLIBC_2.3.2");
+#else
+  INTERCEPT_FUNCTION(pthread_cond_init);
+  INTERCEPT_FUNCTION(pthread_cond_signal);
+  INTERCEPT_FUNCTION(pthread_cond_broadcast);
+  INTERCEPT_FUNCTION(pthread_cond_wait);
+  INTERCEPT_FUNCTION(pthread_cond_timedwait);
+  INTERCEPT_FUNCTION(pthread_cond_destroy);
+#endif
 
   TSAN_MAYBE_PTHREAD_COND_CLOCKWAIT;
 
@@ -3120,7 +3166,7 @@ void InitializeInterceptors() {
 
   TSAN_INTERCEPT(fork);
   TSAN_INTERCEPT(vfork);
-#if SANITIZER_LINUX
+#if SANITIZER_LINUX && !SANITIZER_ANDROID
   TSAN_INTERCEPT(clone);
 #endif
 #if !SANITIZER_ANDROID
diff --git a/lib/libtsan/tsan_interface.h b/lib/libtsan/tsan_interface.h
index 6c19744990..db94cf48f9 100644
--- a/lib/libtsan/tsan_interface.h
+++ b/lib/libtsan/tsan_interface.h
@@ -16,7 +16,7 @@
 #define TSAN_INTERFACE_H
 
 #include <sanitizer_common/sanitizer_internal_defs.h>
-using __sanitizer::tid_t;
+using __sanitizer::ThreadID;
 using __sanitizer::uptr;
 
 // This header should NOT include any other headers.
@@ -175,7 +175,7 @@ int __tsan_get_report_mutex(void *report, uptr idx, uptr *mutex_id, void **addr,
 
 // Returns information about threads included in the report.
 SANITIZER_INTERFACE_ATTRIBUTE
-int __tsan_get_report_thread(void *report, uptr idx, int *tid, tid_t *os_id,
+int __tsan_get_report_thread(void *report, uptr idx, int *tid, ThreadID *os_id,
                              int *running, const char **name, int *parent_tid,
                              void **trace, uptr trace_size);
 
@@ -192,7 +192,7 @@ const char *__tsan_locate_address(uptr addr, char *name, uptr name_size,
 // Returns the allocation stack for a heap pointer.
 SANITIZER_INTERFACE_ATTRIBUTE
 int __tsan_get_alloc_stack(uptr addr, uptr *trace, uptr size, int *thread_id,
-                           tid_t *os_id);
+                           ThreadID *os_id);
 
 #endif  // SANITIZER_GO
 
diff --git a/lib/libtsan/tsan_interface_ann.cpp b/lib/libtsan/tsan_interface_ann.cpp
index befd6a3690..02ca82369a 100644
--- a/lib/libtsan/tsan_interface_ann.cpp
+++ b/lib/libtsan/tsan_interface_ann.cpp
@@ -437,16 +437,30 @@ void __tsan_mutex_post_divert(void *addr, unsigned flagz) {
 }
 
 static void ReportMutexHeldWrongContext(ThreadState *thr, uptr pc) {
-  ThreadRegistryLock l(&ctx->thread_registry);
-  ScopedReport rep(ReportTypeMutexHeldWrongContext);
-  for (uptr i = 0; i < thr->mset.Size(); ++i) {
-    MutexSet::Desc desc = thr->mset.Get(i);
-    rep.AddMutex(desc.addr, desc.stack_id);
+  // Use alloca, because malloc during signal handling deadlocks
+  ScopedReport *rep = (ScopedReport *)__builtin_alloca(sizeof(ScopedReport));
+  // Take a new scope as Apple platforms require the below locks released
+  // before symbolizing in order to avoid a deadlock
+  {
+    ThreadRegistryLock l(&ctx->thread_registry);
+    new (rep) ScopedReport(ReportTypeMutexHeldWrongContext);
+    for (uptr i = 0; i < thr->mset.Size(); ++i) {
+      MutexSet::Desc desc = thr->mset.Get(i);
+      rep->AddMutex(desc.addr, desc.stack_id);
+    }
+    VarSizeStackTrace trace;
+    ObtainCurrentStack(thr, pc, &trace);
+    rep->AddStack(trace, true);
+#if SANITIZER_APPLE
+  }  // Close this scope to release the locks
+#endif
+    OutputReport(thr, *rep);
+
+    // Need to manually destroy this because we used placement new to allocate
+    rep->~ScopedReport();
+#if !SANITIZER_APPLE
   }
-  VarSizeStackTrace trace;
-  ObtainCurrentStack(thr, pc, &trace);
-  rep.AddStack(trace, true);
-  OutputReport(thr, rep);
+#endif
 }
 
 INTERFACE_ATTRIBUTE
diff --git a/lib/libtsan/tsan_mman.cpp b/lib/libtsan/tsan_mman.cpp
index 0ea83fb3b5..caacb36758 100644
--- a/lib/libtsan/tsan_mman.cpp
+++ b/lib/libtsan/tsan_mman.cpp
@@ -182,10 +182,24 @@ static void SignalUnsafeCall(ThreadState *thr, uptr pc) {
   ObtainCurrentStack(thr, pc, &stack);
   if (IsFiredSuppression(ctx, ReportTypeSignalUnsafe, stack))
     return;
-  ThreadRegistryLock l(&ctx->thread_registry);
-  ScopedReport rep(ReportTypeSignalUnsafe);
-  rep.AddStack(stack, true);
-  OutputReport(thr, rep);
+  // Use alloca, because malloc during signal handling deadlocks
+  ScopedReport *rep = (ScopedReport *)__builtin_alloca(sizeof(ScopedReport));
+  // Take a new scope as Apple platforms require the below locks released
+  // before symbolizing in order to avoid a deadlock
+  {
+    ThreadRegistryLock l(&ctx->thread_registry);
+    new (rep) ScopedReport(ReportTypeSignalUnsafe);
+    rep->AddStack(stack, true);
+#if SANITIZER_APPLE
+  }  // Close this scope to release the locks
+#endif
+    OutputReport(thr, *rep);
+
+    // Need to manually destroy this because we used placement new to allocate
+    rep->~ScopedReport();
+#if !SANITIZER_APPLE
+  }
+#endif
 }
 
 
diff --git a/lib/libtsan/tsan_platform.h b/lib/libtsan/tsan_platform.h
index ada594bc11..7089be4d5d 100644
--- a/lib/libtsan/tsan_platform.h
+++ b/lib/libtsan/tsan_platform.h
@@ -681,6 +681,32 @@ struct MappingGoMips64_47 {
   static const uptr kShadowAdd = 0x200000000000ull;
 };
 
+/* Go on linux/riscv64 (39-bit VMA)
+0000 0001 0000 - 000f 0000 0000: executable and heap (60 GiB)
+000f 0000 0000 - 0010 0000 0000: -
+0010 0000 0000 - 0030 0000 0000: shadow - 128 GiB ( ~ 2 * app)
+0030 0000 0000 - 0038 0000 0000: metainfo - 32 GiB ( ~ 0.5 * app)
+0038 0000 0000 - 0040 0000 0000: -
+*/
+struct MappingGoRiscv64_39 {
+  static const uptr kMetaShadowBeg = 0x003000000000ull;
+  static const uptr kMetaShadowEnd = 0x003800000000ull;
+  static const uptr kShadowBeg = 0x001000000000ull;
+  static const uptr kShadowEnd = 0x003000000000ull;
+  static const uptr kLoAppMemBeg = 0x000000010000ull;
+  static const uptr kLoAppMemEnd = 0x000f00000000ull;
+  static const uptr kMidAppMemBeg = 0;
+  static const uptr kMidAppMemEnd = 0;
+  static const uptr kHiAppMemBeg = 0;
+  static const uptr kHiAppMemEnd = 0;
+  static const uptr kHeapMemBeg = 0;
+  static const uptr kHeapMemEnd = 0;
+  static const uptr kVdsoBeg = 0;
+  static const uptr kShadowMsk = 0;
+  static const uptr kShadowXor = 0;
+  static const uptr kShadowAdd = 0x001000000000ull;
+};
+
 /* Go on linux/riscv64 (48-bit VMA)
 0000 0001 0000 - 00e0 0000 0000: executable and heap (896 GiB)
 00e0 0000 0000 - 2000 0000 0000: -
@@ -689,13 +715,13 @@ struct MappingGoMips64_47 {
 3000 0000 0000 - 3100 0000 0000: metainfo - 1 TiB ( ~ 1 * app)
 3100 0000 0000 - 8000 0000 0000: -
 */
-struct MappingGoRiscv64 {
+struct MappingGoRiscv64_48 {
   static const uptr kMetaShadowBeg = 0x300000000000ull;
   static const uptr kMetaShadowEnd = 0x310000000000ull;
   static const uptr kShadowBeg = 0x200000000000ull;
   static const uptr kShadowEnd = 0x240000000000ull;
   static const uptr kLoAppMemBeg = 0x000000010000ull;
-  static const uptr kLoAppMemEnd = 0x000e00000000ull;
+  static const uptr kLoAppMemEnd = 0x00e000000000ull;
   static const uptr kMidAppMemBeg = 0;
   static const uptr kMidAppMemEnd = 0;
   static const uptr kHiAppMemBeg = 0;
@@ -756,7 +782,12 @@ ALWAYS_INLINE auto SelectMapping(Arg arg) {
 #  elif defined(__loongarch_lp64)
   return Func::template Apply<MappingGoLoongArch64_47>(arg);
 #  elif SANITIZER_RISCV64
-  return Func::template Apply<MappingGoRiscv64>(arg);
+  switch (vmaSize) {
+    case 39:
+      return Func::template Apply<MappingGoRiscv64_39>(arg);
+    case 48:
+      return Func::template Apply<MappingGoRiscv64_48>(arg);
+  }
 #  elif SANITIZER_WINDOWS
   return Func::template Apply<MappingGoWindows>(arg);
 #  else
@@ -827,7 +858,8 @@ void ForEachMapping() {
   Func::template Apply<MappingGoAarch64>();
   Func::template Apply<MappingGoLoongArch64_47>();
   Func::template Apply<MappingGoMips64_47>();
-  Func::template Apply<MappingGoRiscv64>();
+  Func::template Apply<MappingGoRiscv64_39>();
+  Func::template Apply<MappingGoRiscv64_48>();
   Func::template Apply<MappingGoS390x>();
 }
 
@@ -926,7 +958,9 @@ struct IsAppMemImpl {
 };
 
 ALWAYS_INLINE
-bool IsAppMem(uptr mem) { return SelectMapping<IsAppMemImpl>(mem); }
+bool IsAppMem(uptr mem) {
+  return SelectMapping<IsAppMemImpl>(STRIP_MTE_TAG(mem));
+}
 
 struct IsShadowMemImpl {
   template <typename Mapping>
@@ -965,7 +999,8 @@ struct MemToShadowImpl {
 
 ALWAYS_INLINE
 RawShadow *MemToShadow(uptr x) {
-  return reinterpret_cast<RawShadow *>(SelectMapping<MemToShadowImpl>(x));
+  return reinterpret_cast<RawShadow*>(
+      SelectMapping<MemToShadowImpl>(STRIP_MTE_TAG(x)));
 }
 
 struct MemToMetaImpl {
@@ -979,7 +1014,9 @@ struct MemToMetaImpl {
 };
 
 ALWAYS_INLINE
-u32 *MemToMeta(uptr x) { return SelectMapping<MemToMetaImpl>(x); }
+u32* MemToMeta(uptr x) {
+  return SelectMapping<MemToMetaImpl>(STRIP_MTE_TAG(x));
+}
 
 struct ShadowToMemImpl {
   template <typename Mapping>
diff --git a/lib/libtsan/tsan_platform_linux.cpp b/lib/libtsan/tsan_platform_linux.cpp
index 2c55645a15..c974f549ac 100644
--- a/lib/libtsan/tsan_platform_linux.cpp
+++ b/lib/libtsan/tsan_platform_linux.cpp
@@ -393,9 +393,9 @@ void InitializePlatformEarly() {
     Die();
   }
 #    else
-  if (vmaSize != 48) {
+  if (vmaSize != 39 && vmaSize != 48) {
     Printf("FATAL: ThreadSanitizer: unsupported VMA range\n");
-    Printf("FATAL: Found %zd - Supported 48\n", vmaSize);
+    Printf("FATAL: Found %zd - Supported 39 and 48\n", vmaSize);
     Die();
   }
 #    endif
@@ -415,7 +415,7 @@ void InitializePlatform() {
   // is not compiled with -pie.
 #if !SANITIZER_GO
   {
-#    if SANITIZER_LINUX && (defined(__aarch64__) || defined(__loongarch_lp64))
+#    if INIT_LONGJMP_XOR_KEY
     // Initialize the xor key used in {sig}{set,long}jump.
     InitializeLongjmpXorKey();
 #    endif
@@ -486,8 +486,20 @@ int ExtractRecvmsgFDs(void *msgp, int *fds, int nfd) {
 
 // Reverse operation of libc stack pointer mangling
 static uptr UnmangleLongJmpSp(uptr mangled_sp) {
-#if defined(__x86_64__)
-# if SANITIZER_LINUX
+#    if SANITIZER_ANDROID && INIT_LONGJMP_XOR_KEY
+  if (longjmp_xor_key == 0) {
+    // bionic libc initialization process: __libc_init_globals ->
+    // __libc_init_vdso (calls strcmp) -> __libc_init_setjmp_cookie. strcmp is
+    // intercepted by TSan, so during TSan initialization the setjmp_cookie
+    // remains uninitialized. On Android, longjmp_xor_key must be set on first
+    // use.
+    InitializeLongjmpXorKey();
+    CHECK_NE(longjmp_xor_key, 0);
+  }
+#    endif
+
+#    if defined(__x86_64__)
+#      if SANITIZER_LINUX
   // Reverse of:
   //   xor  %fs:0x30, %rsi
   //   rol  $0x11, %rsi
@@ -542,13 +554,23 @@ static uptr UnmangleLongJmpSp(uptr mangled_sp) {
 # else
 #  define LONG_JMP_SP_ENV_SLOT 2
 # endif
-#elif SANITIZER_LINUX
-# ifdef __aarch64__
-#  define LONG_JMP_SP_ENV_SLOT 13
-# elif defined(__loongarch__)
-#  define LONG_JMP_SP_ENV_SLOT 1
-# elif defined(__mips64)
-#  define LONG_JMP_SP_ENV_SLOT 1
+#    elif SANITIZER_ANDROID
+#      ifdef __aarch64__
+#        define LONG_JMP_SP_ENV_SLOT 3
+#      elif SANITIZER_RISCV64
+#        define LONG_JMP_SP_ENV_SLOT 3
+#      elif defined(__x86_64__)
+#        define LONG_JMP_SP_ENV_SLOT 6
+#      else
+#        error unsupported
+#      endif
+#    elif SANITIZER_LINUX
+#      ifdef __aarch64__
+#        define LONG_JMP_SP_ENV_SLOT 13
+#      elif defined(__loongarch__)
+#        define LONG_JMP_SP_ENV_SLOT 1
+#      elif defined(__mips64)
+#        define LONG_JMP_SP_ENV_SLOT 1
 #      elif SANITIZER_RISCV64
 #        define LONG_JMP_SP_ENV_SLOT 13
 #      elif defined(__s390x__)
@@ -556,7 +578,7 @@ static uptr UnmangleLongJmpSp(uptr mangled_sp) {
 #      else
 #        define LONG_JMP_SP_ENV_SLOT 6
 #      endif
-#endif
+#    endif
 
 uptr ExtractLongJmpSp(uptr *env) {
   uptr mangled_sp = env[LONG_JMP_SP_ENV_SLOT];
@@ -653,7 +675,13 @@ ThreadState *cur_thread() {
     }
     CHECK_EQ(0, internal_sigprocmask(SIG_SETMASK, &oldset, nullptr));
   }
-  return thr;
+
+  // Skia calls mallopt(M_THREAD_DISABLE_MEM_INIT, 1), which sets the least
+  // significant bit of TLS_SLOT_SANITIZER to 1. Scudo allocator uses this bit
+  // as a flag to disable memory initialization. This is a workaround to get the
+  // correct ThreadState pointer.
+  uptr addr = reinterpret_cast<uptr>(thr);
+  return reinterpret_cast<ThreadState*>(addr & ~1ULL);
 }
 
 void set_cur_thread(ThreadState *thr) {
diff --git a/lib/libtsan/tsan_platform_mac.cpp b/lib/libtsan/tsan_platform_mac.cpp
index eb344df168..da735fba66 100644
--- a/lib/libtsan/tsan_platform_mac.cpp
+++ b/lib/libtsan/tsan_platform_mac.cpp
@@ -226,9 +226,20 @@ static void ThreadTerminateCallback(uptr thread) {
 void InitializePlatformEarly() {
 #  if !SANITIZER_GO && SANITIZER_IOS
   uptr max_vm = GetMaxUserVirtualAddress() + 1;
-  if (max_vm != HiAppMemEnd()) {
-    Printf("ThreadSanitizer: unsupported vm address limit %p, expected %p.\n",
-           (void *)max_vm, (void *)HiAppMemEnd());
+  if (max_vm < HiAppMemEnd()) {
+    Report(
+        "ThreadSanitizer: Unsupported virtual memory layout:\n\tVM address "
+        "limit = %p\n\tExpected %p.\n",
+        (void*)max_vm, (void*)HiAppMemEnd());
+    Die();
+  }
+  // In some configurations, the max_vm is expanded, but much of this space is
+  // already mapped. TSAN will not work in this configuration.
+  if (!MemoryRangeIsAvailable(HiAppMemEnd() - 1, HiAppMemEnd() - 1)) {
+    Report(
+        "ThreadSanitizer: Unsupported virtual memory layout: Address %p is "
+        "already mapped.\n",
+        (void*)(HiAppMemEnd() - 1));
     Die();
   }
 #endif
@@ -248,7 +259,9 @@ void InitializePlatform() {
 
   ThreadEventCallbacks callbacks = {
       .create = ThreadCreateCallback,
+      .start = nullptr,
       .terminate = ThreadTerminateCallback,
+      .destroy = nullptr,
   };
   InstallPthreadIntrospectionHook(callbacks);
 #endif
diff --git a/lib/libtsan/tsan_report.h b/lib/libtsan/tsan_report.h
index bfe470797f..53bb21964d 100644
--- a/lib/libtsan/tsan_report.h
+++ b/lib/libtsan/tsan_report.h
@@ -12,6 +12,8 @@
 #ifndef TSAN_REPORT_H
 #define TSAN_REPORT_H
 
+#include "sanitizer_common/sanitizer_internal_defs.h"
+#include "sanitizer_common/sanitizer_stacktrace.h"
 #include "sanitizer_common/sanitizer_symbolizer.h"
 #include "sanitizer_common/sanitizer_thread_registry.h"
 #include "sanitizer_common/sanitizer_vector.h"
@@ -56,6 +58,7 @@ struct ReportMop {
   bool atomic;
   uptr external_tag;
   Vector<ReportMopMutex> mset;
+  StackTrace stack_trace;
   ReportStack *stack;
 
   ReportMop();
@@ -79,25 +82,34 @@ struct ReportLocation {
   int fd = 0;
   bool fd_closed = false;
   bool suppressable = false;
+  StackID stack_id = 0;
   ReportStack *stack = nullptr;
 };
 
 struct ReportThread {
   Tid id;
-  tid_t os_id;
+  ThreadID os_id;
   bool running;
   ThreadType thread_type;
   char *name;
   Tid parent_tid;
+  StackID stack_id;
   ReportStack *stack;
+  bool suppressable;
 };
 
 struct ReportMutex {
   int id;
   uptr addr;
+  StackID stack_id;
   ReportStack *stack;
 };
 
+struct AddedLocationAddr {
+  uptr addr;
+  usize locs_idx;
+};
+
 class ReportDesc {
  public:
   ReportType typ;
@@ -105,6 +117,7 @@ class ReportDesc {
   Vector<ReportStack*> stacks;
   Vector<ReportMop*> mops;
   Vector<ReportLocation*> locs;
+  Vector<AddedLocationAddr> added_location_addrs;
   Vector<ReportMutex*> mutexes;
   Vector<ReportThread*> threads;
   Vector<Tid> unique_tids;
diff --git a/lib/libtsan/tsan_rtl.cpp b/lib/libtsan/tsan_rtl.cpp
index 0d7247a56a..feee566f44 100644
--- a/lib/libtsan/tsan_rtl.cpp
+++ b/lib/libtsan/tsan_rtl.cpp
@@ -40,6 +40,13 @@ SANITIZER_WEAK_DEFAULT_IMPL
 void __tsan_test_only_on_fork() {}
 #endif
 
+#if SANITIZER_APPLE && !SANITIZER_GO
+// Override weak symbol from sanitizer_common
+extern void __tsan_set_in_internal_write_call(bool value) {
+  __tsan::cur_thread_init()->in_internal_write_call = value;
+}
+#endif
+
 namespace __tsan {
 
 #if !SANITIZER_GO
@@ -893,6 +900,13 @@ void ForkChildAfter(ThreadState* thr, uptr pc, bool start_thread) {
     ThreadIgnoreBegin(thr, pc);
     ThreadIgnoreSyncBegin(thr, pc);
   }
+
+#  if SANITIZER_APPLE && !SANITIZER_GO
+  // This flag can have inheritance disabled - we are the child so act
+  // accordingly
+  if (flags()->lock_during_write == kNoLockDuringWritesCurrentProcess)
+    flags()->lock_during_write = kLockDuringAllWrites;
+#  endif
 }
 #endif
 
diff --git a/lib/libtsan/tsan_rtl.h b/lib/libtsan/tsan_rtl.h
index dc32980e90..635654616b 100644
--- a/lib/libtsan/tsan_rtl.h
+++ b/lib/libtsan/tsan_rtl.h
@@ -236,6 +236,10 @@ struct alignas(SANITIZER_CACHE_LINE_SIZE) ThreadState {
 
   const ReportDesc *current_report;
 
+#if SANITIZER_APPLE && !SANITIZER_GO
+  bool in_internal_write_call;
+#endif
+
   explicit ThreadState(Tid tid);
 };
 
@@ -420,6 +424,7 @@ class ScopedReportBase {
   void AddSleep(StackID stack_id);
   void SetCount(int count);
   void SetSigNum(int sig);
+  void SymbolizeStackElems(void);
 
   const ReportDesc *GetReport() const;
 
@@ -498,7 +503,7 @@ void ForkChildAfter(ThreadState *thr, uptr pc, bool start_thread);
 
 void ReportRace(ThreadState *thr, RawShadow *shadow_mem, Shadow cur, Shadow old,
                 AccessType typ);
-bool OutputReport(ThreadState *thr, const ScopedReport &srep);
+bool OutputReport(ThreadState *thr, ScopedReport &srep);
 bool IsFiredSuppression(Context *ctx, ReportType type, StackTrace trace);
 bool IsExpectedReport(uptr addr, uptr size);
 
@@ -559,7 +564,7 @@ void ThreadIgnoreSyncBegin(ThreadState *thr, uptr pc);
 void ThreadIgnoreSyncEnd(ThreadState *thr);
 
 Tid ThreadCreate(ThreadState *thr, uptr pc, uptr uid, bool detached);
-void ThreadStart(ThreadState *thr, Tid tid, tid_t os_id,
+void ThreadStart(ThreadState *thr, Tid tid, ThreadID os_id,
                  ThreadType thread_type);
 void ThreadFinish(ThreadState *thr);
 Tid ThreadConsumeTid(ThreadState *thr, uptr pc, uptr uid);
diff --git a/lib/libtsan/tsan_rtl_aarch64.S b/lib/libtsan/tsan_rtl_aarch64.S
index 7d920bee4a..124bd59a91 100644
--- a/lib/libtsan/tsan_rtl_aarch64.S
+++ b/lib/libtsan/tsan_rtl_aarch64.S
@@ -4,10 +4,8 @@
 #include "sanitizer_common/sanitizer_asm.h"
 #include "builtins/assembly.h"
 
-#if !defined(__APPLE__)
-.section .text
-#else
-.section __TEXT,__text
+TEXT_SECTION
+#if defined(__APPLE__)
 .align 3
 #endif
 
@@ -222,6 +220,6 @@ ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(__sigsetjmp))
 
 NO_EXEC_STACK_DIRECTIVE
 
-GNU_PROPERTY_BTI_PAC
+GNU_PROPERTY_BTI_PAC_GCS
 
 #endif
diff --git a/lib/libtsan/tsan_rtl_access.cpp b/lib/libtsan/tsan_rtl_access.cpp
index 487fa49063..b2e70475e0 100644
--- a/lib/libtsan/tsan_rtl_access.cpp
+++ b/lib/libtsan/tsan_rtl_access.cpp
@@ -419,6 +419,11 @@ NOINLINE void TraceRestartMemoryAccess(ThreadState* thr, uptr pc, uptr addr,
 
 ALWAYS_INLINE USED void MemoryAccess(ThreadState* thr, uptr pc, uptr addr,
                                      uptr size, AccessType typ) {
+#if SANITIZER_APPLE && !SANITIZER_GO
+  // Swift symbolizer can be intercepted and deadlock without this
+  if (thr->in_symbolizer)
+    return;
+#endif
   RawShadow* shadow_mem = MemToShadow(addr);
   UNUSED char memBuf[4][64];
   DPrintf2("#%d: Access: %d@%d %p/%zd typ=0x%x {%s, %s, %s, %s}\n", thr->tid,
@@ -684,7 +689,7 @@ void MemoryAccessRangeT(ThreadState* thr, uptr pc, uptr addr, uptr size) {
     DCHECK(IsAppMem(addr + size - 1));
   }
   if (!IsShadowMem(shadow_mem)) {
-    Printf("Bad shadow start addr: %p (%p)\n", shadow_mem, (void*)addr);
+    Printf("Bad shadow start addr: %p (%p)\n", (void*)shadow_mem, (void*)addr);
     DCHECK(IsShadowMem(shadow_mem));
   }
 
@@ -693,12 +698,12 @@ void MemoryAccessRangeT(ThreadState* thr, uptr pc, uptr addr, uptr size) {
   RawShadow* shadow_mem_end =
       shadow_mem + rounded_size / kShadowCell * kShadowCnt;
   if (!IsShadowMem(shadow_mem_end - 1)) {
-    Printf("Bad shadow end addr: %p (%p)\n", shadow_mem_end - 1,
+    Printf("Bad shadow end addr: %p (%p)\n", (void*)(shadow_mem_end - 1),
            (void*)(addr + size - 1));
     Printf(
         "Shadow start addr (ok): %p (%p); size: 0x%zx; rounded_size: 0x%zx; "
         "kShadowMultiplier: %zx\n",
-        shadow_mem, (void*)addr, size, rounded_size, kShadowMultiplier);
+        (void*)shadow_mem, (void*)addr, size, rounded_size, kShadowMultiplier);
     DCHECK(IsShadowMem(shadow_mem_end - 1));
   }
 #endif
diff --git a/lib/libtsan/tsan_rtl_amd64.S b/lib/libtsan/tsan_rtl_amd64.S
index f848be9dd4..8b9b706a82 100644
--- a/lib/libtsan/tsan_rtl_amd64.S
+++ b/lib/libtsan/tsan_rtl_amd64.S
@@ -3,6 +3,8 @@
 
 #include "sanitizer_common/sanitizer_asm.h"
 
+.att_syntax
+
 #if !defined(__APPLE__)
 .section .text
 #else
diff --git a/lib/libtsan/tsan_rtl_mutex.cpp b/lib/libtsan/tsan_rtl_mutex.cpp
index 2a8aa1915c..30f5e96493 100644
--- a/lib/libtsan/tsan_rtl_mutex.cpp
+++ b/lib/libtsan/tsan_rtl_mutex.cpp
@@ -11,14 +11,15 @@
 //===----------------------------------------------------------------------===//
 
 #include <sanitizer_common/sanitizer_deadlock_detector_interface.h>
+#include <sanitizer_common/sanitizer_placement_new.h>
 #include <sanitizer_common/sanitizer_stackdepot.h>
 
-#include "tsan_rtl.h"
 #include "tsan_flags.h"
-#include "tsan_sync.h"
-#include "tsan_report.h"
-#include "tsan_symbolize.h"
 #include "tsan_platform.h"
+#include "tsan_report.h"
+#include "tsan_rtl.h"
+#include "tsan_symbolize.h"
+#include "tsan_sync.h"
 
 namespace __tsan {
 
@@ -55,14 +56,28 @@ static void ReportMutexMisuse(ThreadState *thr, uptr pc, ReportType typ,
     return;
   if (!ShouldReport(thr, typ))
     return;
-  ThreadRegistryLock l(&ctx->thread_registry);
-  ScopedReport rep(typ);
-  rep.AddMutex(addr, creation_stack_id);
-  VarSizeStackTrace trace;
-  ObtainCurrentStack(thr, pc, &trace);
-  rep.AddStack(trace, true);
-  rep.AddLocation(addr, 1);
-  OutputReport(thr, rep);
+  // Use alloca, because malloc during signal handling deadlocks
+  ScopedReport *rep = (ScopedReport *)__builtin_alloca(sizeof(ScopedReport));
+  // Take a new scope as Apple platforms require the below locks released
+  // before symbolizing in order to avoid a deadlock
+  {
+    ThreadRegistryLock l(&ctx->thread_registry);
+    new (rep) ScopedReport(typ);
+    rep->AddMutex(addr, creation_stack_id);
+    VarSizeStackTrace trace;
+    ObtainCurrentStack(thr, pc, &trace);
+    rep->AddStack(trace, true);
+    rep->AddLocation(addr, 1);
+#if SANITIZER_APPLE
+  }  // Close this scope to release the locks
+#endif
+    OutputReport(thr, *rep);
+
+    // Need to manually destroy this because we used placement new to allocate
+    rep->~ScopedReport();
+#if !SANITIZER_APPLE
+  }
+#endif
 }
 
 static void RecordMutexLock(ThreadState *thr, uptr pc, uptr addr,
@@ -528,51 +543,81 @@ void AfterSleep(ThreadState *thr, uptr pc) {
 void ReportDeadlock(ThreadState *thr, uptr pc, DDReport *r) {
   if (r == 0 || !ShouldReport(thr, ReportTypeDeadlock))
     return;
-  ThreadRegistryLock l(&ctx->thread_registry);
-  ScopedReport rep(ReportTypeDeadlock);
-  for (int i = 0; i < r->n; i++) {
-    rep.AddMutex(r->loop[i].mtx_ctx0, r->loop[i].stk[0]);
-    rep.AddUniqueTid((int)r->loop[i].thr_ctx);
-    rep.AddThread((int)r->loop[i].thr_ctx);
-  }
-  uptr dummy_pc = 0x42;
-  for (int i = 0; i < r->n; i++) {
-    for (int j = 0; j < (flags()->second_deadlock_stack ? 2 : 1); j++) {
-      u32 stk = r->loop[i].stk[j];
-      if (stk && stk != kInvalidStackID) {
-        rep.AddStack(StackDepotGet(stk), true);
-      } else {
-        // Sometimes we fail to extract the stack trace (FIXME: investigate),
-        // but we should still produce some stack trace in the report.
-        rep.AddStack(StackTrace(&dummy_pc, 1), true);
+  // Use alloca, because malloc during signal handling deadlocks
+  ScopedReport *rep = (ScopedReport *)__builtin_alloca(sizeof(ScopedReport));
+  // Take a new scope as Apple platforms require the below locks released
+  // before symbolizing in order to avoid a deadlock
+  {
+    ThreadRegistryLock l(&ctx->thread_registry);
+    new (rep) ScopedReport(ReportTypeDeadlock);
+    for (int i = 0; i < r->n; i++) {
+      rep->AddMutex(r->loop[i].mtx_ctx0, r->loop[i].stk[0]);
+      rep->AddUniqueTid((int)r->loop[i].thr_ctx);
+      rep->AddThread((int)r->loop[i].thr_ctx);
+    }
+    uptr dummy_pc = 0x42;
+    for (int i = 0; i < r->n; i++) {
+      for (int j = 0; j < (flags()->second_deadlock_stack ? 2 : 1); j++) {
+        u32 stk = r->loop[i].stk[j];
+        StackTrace stack;
+        if (stk && stk != kInvalidStackID) {
+          stack = StackDepotGet(stk);
+        } else {
+          // Sometimes we fail to extract the stack trace (FIXME: investigate),
+          // but we should still produce some stack trace in the report.
+          stack = StackTrace(&dummy_pc, 1);
+        }
+        rep->AddStack(stack, true);
       }
     }
+#if SANITIZER_APPLE
+  }  // Close this scope to release the locks
+#endif
+    OutputReport(thr, *rep);
+
+    // Need to manually destroy this because we used placement new to allocate
+    rep->~ScopedReport();
+#if !SANITIZER_APPLE
   }
-  OutputReport(thr, rep);
+#endif
 }
 
 void ReportDestroyLocked(ThreadState *thr, uptr pc, uptr addr,
                          FastState last_lock, StackID creation_stack_id) {
-  // We need to lock the slot during RestoreStack because it protects
-  // the slot journal.
-  Lock slot_lock(&ctx->slots[static_cast<uptr>(last_lock.sid())].mtx);
-  ThreadRegistryLock l0(&ctx->thread_registry);
-  Lock slots_lock(&ctx->slot_mtx);
-  ScopedReport rep(ReportTypeMutexDestroyLocked);
-  rep.AddMutex(addr, creation_stack_id);
-  VarSizeStackTrace trace;
-  ObtainCurrentStack(thr, pc, &trace);
-  rep.AddStack(trace, true);
+  // Use alloca, because malloc during signal handling deadlocks
+  ScopedReport *rep = (ScopedReport *)__builtin_alloca(sizeof(ScopedReport));
+  // Take a new scope as Apple platforms require the below locks released
+  // before symbolizing in order to avoid a deadlock
+  {
+    // We need to lock the slot during RestoreStack because it protects
+    // the slot journal.
+    Lock slot_lock(&ctx->slots[static_cast<uptr>(last_lock.sid())].mtx);
+    ThreadRegistryLock l0(&ctx->thread_registry);
+    Lock slots_lock(&ctx->slot_mtx);
+    new (rep) ScopedReport(ReportTypeMutexDestroyLocked);
+    rep->AddMutex(addr, creation_stack_id);
+    VarSizeStackTrace trace;
+    ObtainCurrentStack(thr, pc, &trace);
+    rep->AddStack(trace, true);
 
-  Tid tid;
-  DynamicMutexSet mset;
-  uptr tag;
-  if (!RestoreStack(EventType::kLock, last_lock.sid(), last_lock.epoch(), addr,
-                    0, kAccessWrite, &tid, &trace, mset, &tag))
-    return;
-  rep.AddStack(trace, true);
-  rep.AddLocation(addr, 1);
-  OutputReport(thr, rep);
+    Tid tid;
+    DynamicMutexSet mset;
+    uptr tag;
+    if (!RestoreStack(EventType::kLock, last_lock.sid(), last_lock.epoch(),
+                      addr, 0, kAccessWrite, &tid, &trace, mset, &tag))
+      return;
+    rep->AddStack(trace, true);
+    rep->AddLocation(addr, 1);
+#if SANITIZER_APPLE
+  }  // Close this scope to release the locks
+#endif
+    OutputReport(thr, *rep);
+
+    // Need to manually destroy this because we used placement new to allocate
+    rep->~ScopedReport();
+#if !SANITIZER_APPLE
+  }
+#endif
 }
 
 }  // namespace __tsan
diff --git a/lib/libtsan/tsan_rtl_report.cpp b/lib/libtsan/tsan_rtl_report.cpp
index 0820bf1ade..4e58305b58 100644
--- a/lib/libtsan/tsan_rtl_report.cpp
+++ b/lib/libtsan/tsan_rtl_report.cpp
@@ -11,10 +11,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_internal_defs.h"
 #include "sanitizer_common/sanitizer_libc.h"
 #include "sanitizer_common/sanitizer_placement_new.h"
 #include "sanitizer_common/sanitizer_stackdepot.h"
 #include "sanitizer_common/sanitizer_stacktrace.h"
+#include "tsan_defs.h"
 #include "tsan_fd.h"
 #include "tsan_flags.h"
 #include "tsan_mman.h"
@@ -109,7 +111,13 @@ static ReportStack *SymbolizeStack(StackTrace trace) {
     // instruction.
     if ((pc & kExternalPCBit) == 0)
       pc1 = StackTrace::GetPreviousInstructionPc(pc);
-    SymbolizedStack *ent = SymbolizeCode(pc1);
+    SymbolizedStack* ent = SymbolizeCode(pc1, si == trace.size - 1);
+#if SANITIZER_GO
+    if (ent == nullptr) {
+      // Go might have 0 frames for this PC (wrapper frames aren't reported).
+      continue;
+    }
+#endif
     CHECK_NE(ent, 0);
     SymbolizedStack *last = ent;
     while (last->next) {
@@ -187,10 +195,8 @@ void ScopedReportBase::AddMemoryAccess(uptr addr, uptr external_tag, Shadow s,
   mop->size = size;
   mop->write = !(typ & kAccessRead);
   mop->atomic = typ & kAccessAtomic;
-  mop->stack = SymbolizeStack(stack);
   mop->external_tag = external_tag;
-  if (mop->stack)
-    mop->stack->suppressable = true;
+  mop->stack_trace = stack;
   for (uptr i = 0; i < mset->Size(); i++) {
     MutexSet::Desc d = mset->Get(i);
     int id = this->AddMutex(d.addr, d.stack_id);
@@ -199,6 +205,56 @@ void ScopedReportBase::AddMemoryAccess(uptr addr, uptr external_tag, Shadow s,
   }
 }
 
+void ScopedReportBase::SymbolizeStackElems() {
+  // symbolize memory ops
+  for (usize i = 0, size = rep_->mops.Size(); i < size; i++) {
+    ReportMop *mop = rep_->mops[i];
+    mop->stack = SymbolizeStack(mop->stack_trace);
+    if (mop->stack)
+      mop->stack->suppressable = true;
+  }
+
+  // symbolize locations
+  for (usize i = 0, size = rep_->locs.Size(); i < size; i++) {
+    // added locations have a NULL placeholder - don't dereference them
+    if (ReportLocation *loc = rep_->locs[i])
+      loc->stack = SymbolizeStackId(loc->stack_id);
+  }
+
+  // symbolize any added locations
+  for (usize i = 0, size = rep_->added_location_addrs.Size(); i < size; i++) {
+    AddedLocationAddr *added_loc = &rep_->added_location_addrs[i];
+    if (ReportLocation *loc = SymbolizeData(added_loc->addr)) {
+      loc->suppressable = true;
+      rep_->locs[added_loc->locs_idx] = loc;
+    }
+  }
+
+  // Filter out any added location placeholders that could not be symbolized
+  usize j = 0;
+  for (usize i = 0, size = rep_->locs.Size(); i < size; i++) {
+    if (rep_->locs[i] != nullptr) {
+      rep_->locs[j] = rep_->locs[i];
+      j++;
+    }
+  }
+  rep_->locs.Resize(j);
+
+  // symbolize threads
+  for (usize i = 0, size = rep_->threads.Size(); i < size; i++) {
+    ReportThread *rt = rep_->threads[i];
+    rt->stack = SymbolizeStackId(rt->stack_id);
+    if (rt->stack)
+      rt->stack->suppressable = rt->suppressable;
+  }
+
+  // symbolize mutexes
+  for (usize i = 0, size = rep_->mutexes.Size(); i < size; i++) {
+    ReportMutex *rm = rep_->mutexes[i];
+    rm->stack = SymbolizeStackId(rm->stack_id);
+  }
+}
+
 void ScopedReportBase::AddUniqueTid(Tid unique_tid) {
   rep_->unique_tids.PushBack(unique_tid);
 }
@@ -216,10 +272,8 @@ void ScopedReportBase::AddThread(const ThreadContext *tctx, bool suppressable) {
   rt->name = internal_strdup(tctx->name);
   rt->parent_tid = tctx->parent_tid;
   rt->thread_type = tctx->thread_type;
-  rt->stack = 0;
-  rt->stack = SymbolizeStackId(tctx->creation_stack_id);
-  if (rt->stack)
-    rt->stack->suppressable = suppressable;
+  rt->stack_id = tctx->creation_stack_id;
+  rt->suppressable = suppressable;
 }
 
 #if !SANITIZER_GO
@@ -270,7 +324,7 @@ int ScopedReportBase::AddMutex(uptr addr, StackID creation_stack_id) {
   rep_->mutexes.PushBack(rm);
   rm->id = rep_->mutexes.Size() - 1;
   rm->addr = addr;
-  rm->stack = SymbolizeStackId(creation_stack_id);
+  rm->stack_id = creation_stack_id;
   return rm->id;
 }
 
@@ -288,7 +342,7 @@ void ScopedReportBase::AddLocation(uptr addr, uptr size) {
     loc->fd_closed = closed;
     loc->fd = fd;
     loc->tid = creat_tid;
-    loc->stack = SymbolizeStackId(creat_stack);
+    loc->stack_id = creat_stack;
     rep_->locs.PushBack(loc);
     AddThread(creat_tid);
     return;
@@ -310,7 +364,7 @@ void ScopedReportBase::AddLocation(uptr addr, uptr size) {
     loc->heap_chunk_size = b->siz;
     loc->external_tag = b->tag;
     loc->tid = b->tid;
-    loc->stack = SymbolizeStackId(b->stk);
+    loc->stack_id = b->stk;
     rep_->locs.PushBack(loc);
     AddThread(b->tid);
     return;
@@ -324,11 +378,8 @@ void ScopedReportBase::AddLocation(uptr addr, uptr size) {
     AddThread(tctx);
   }
 #endif
-  if (ReportLocation *loc = SymbolizeData(addr)) {
-    loc->suppressable = true;
-    rep_->locs.PushBack(loc);
-    return;
-  }
+  rep_->added_location_addrs.PushBack({addr, rep_->locs.Size()});
+  rep_->locs.PushBack(nullptr);
 }
 
 #if !SANITIZER_GO
@@ -628,11 +679,12 @@ static bool HandleRacyStacks(ThreadState *thr, VarSizeStackTrace traces[2]) {
   return false;
 }
 
-bool OutputReport(ThreadState *thr, const ScopedReport &srep) {
+bool OutputReport(ThreadState *thr, ScopedReport &srep) {
   // These should have been checked in ShouldReport.
   // It's too late to check them here, we have already taken locks.
   CHECK(flags()->report_bugs);
   CHECK(!thr->suppress_reports);
+  srep.SymbolizeStackElems();
   atomic_store_relaxed(&ctx->last_symbolize_time_ns, NanoTime());
   const ReportDesc *rep = srep.GetReport();
   CHECK_EQ(thr->current_report, nullptr);
@@ -761,65 +813,80 @@ void ReportRace(ThreadState *thr, RawShadow *shadow_mem, Shadow cur, Shadow old,
   DynamicMutexSet mset1;
   MutexSet *mset[kMop] = {&thr->mset, mset1};
 
-  // We need to lock the slot during RestoreStack because it protects
-  // the slot journal.
-  Lock slot_lock(&ctx->slots[static_cast<uptr>(s[1].sid())].mtx);
-  ThreadRegistryLock l0(&ctx->thread_registry);
-  Lock slots_lock(&ctx->slot_mtx);
-  if (SpuriousRace(old))
-    return;
-  if (!RestoreStack(EventType::kAccessExt, s[1].sid(), s[1].epoch(), addr1,
-                    size1, typ1, &tids[1], &traces[1], mset[1], &tags[1])) {
-    StoreShadow(&ctx->last_spurious_race, old.raw());
-    return;
-  }
-
-  if (IsFiredSuppression(ctx, rep_typ, traces[1]))
-    return;
-
-  if (HandleRacyStacks(thr, traces))
-    return;
-
-  // If any of the accesses has a tag, treat this as an "external" race.
-  uptr tag = kExternalTagNone;
-  for (uptr i = 0; i < kMop; i++) {
-    if (tags[i] != kExternalTagNone) {
-      rep_typ = ReportTypeExternalRace;
-      tag = tags[i];
-      break;
+  // Use alloca, because malloc during signal handling deadlocks
+  ScopedReport *rep = (ScopedReport *)__builtin_alloca(sizeof(ScopedReport));
+  // Take a new scope as Apple platforms require the below locks released
+  // before symbolizing in order to avoid a deadlock
+  {
+    // We need to lock the slot during RestoreStack because it protects
+    // the slot journal.
+    Lock slot_lock(&ctx->slots[static_cast<uptr>(s[1].sid())].mtx);
+    ThreadRegistryLock l0(&ctx->thread_registry);
+    Lock slots_lock(&ctx->slot_mtx);
+    if (SpuriousRace(old))
+      return;
+    if (!RestoreStack(EventType::kAccessExt, s[1].sid(), s[1].epoch(), addr1,
+                      size1, typ1, &tids[1], &traces[1], mset[1], &tags[1])) {
+      StoreShadow(&ctx->last_spurious_race, old.raw());
+      return;
     }
-  }
 
-  ScopedReport rep(rep_typ, tag);
-  for (uptr i = 0; i < kMop; i++)
-    rep.AddMemoryAccess(addr, tags[i], s[i], tids[i], traces[i], mset[i]);
+    if (IsFiredSuppression(ctx, rep_typ, traces[1]))
+      return;
 
-  for (uptr i = 0; i < kMop; i++) {
-    ThreadContext *tctx = static_cast<ThreadContext *>(
-        ctx->thread_registry.GetThreadLocked(tids[i]));
-    rep.AddThread(tctx);
-  }
+    if (HandleRacyStacks(thr, traces))
+      return;
 
-  rep.AddLocation(addr_min, addr_max - addr_min);
-
-  if (flags()->print_full_thread_history) {
-    const ReportDesc *rep_desc = rep.GetReport();
-    for (uptr i = 0; i < rep_desc->threads.Size(); i++) {
-      Tid parent_tid = rep_desc->threads[i]->parent_tid;
-      if (parent_tid == kMainTid || parent_tid == kInvalidTid)
-        continue;
-      ThreadContext *parent_tctx = static_cast<ThreadContext *>(
-          ctx->thread_registry.GetThreadLocked(parent_tid));
-      rep.AddThread(parent_tctx);
+    // If any of the accesses has a tag, treat this as an "external" race.
+    uptr tag = kExternalTagNone;
+    for (uptr i = 0; i < kMop; i++) {
+      if (tags[i] != kExternalTagNone) {
+        rep_typ = ReportTypeExternalRace;
+        tag = tags[i];
+        break;
+      }
+    }
+
+    new (rep) ScopedReport(rep_typ, tag);
+    for (uptr i = 0; i < kMop; i++)
+      rep->AddMemoryAccess(addr, tags[i], s[i], tids[i], traces[i], mset[i]);
+
+    for (uptr i = 0; i < kMop; i++) {
+      ThreadContext *tctx = static_cast<ThreadContext *>(
+          ctx->thread_registry.GetThreadLocked(tids[i]));
+      rep->AddThread(tctx);
+    }
+
+    rep->AddLocation(addr_min, addr_max - addr_min);
+
+    if (flags()->print_full_thread_history) {
+      const ReportDesc *rep_desc = rep->GetReport();
+      for (uptr i = 0; i < rep_desc->threads.Size(); i++) {
+        Tid parent_tid = rep_desc->threads[i]->parent_tid;
+        if (parent_tid == kMainTid || parent_tid == kInvalidTid)
+          continue;
+        ThreadContext *parent_tctx = static_cast<ThreadContext *>(
+            ctx->thread_registry.GetThreadLocked(parent_tid));
+        rep->AddThread(parent_tctx);
+      }
     }
-  }
 
 #if !SANITIZER_GO
-  if (!((typ0 | typ1) & kAccessFree) &&
-      s[1].epoch() <= thr->last_sleep_clock.Get(s[1].sid()))
-    rep.AddSleep(thr->last_sleep_stack_id);
+    if (!((typ0 | typ1) & kAccessFree) &&
+        s[1].epoch() <= thr->last_sleep_clock.Get(s[1].sid()))
+      rep->AddSleep(thr->last_sleep_stack_id);
+#endif
+
+#if SANITIZER_APPLE
+  }  // Close this scope to release the locks
+#endif
+    OutputReport(thr, *rep);
+
+    // Need to manually destroy this because we used placement new to allocate
+    rep->~ScopedReport();
+#if !SANITIZER_APPLE
+  }
 #endif
-  OutputReport(thr, rep);
 }
 
 void PrintCurrentStack(ThreadState *thr, uptr pc) {
diff --git a/lib/libtsan/tsan_rtl_thread.cpp b/lib/libtsan/tsan_rtl_thread.cpp
index 8d29e25a6d..978d853b0b 100644
--- a/lib/libtsan/tsan_rtl_thread.cpp
+++ b/lib/libtsan/tsan_rtl_thread.cpp
@@ -88,15 +88,33 @@ void ThreadFinalize(ThreadState *thr) {
 #if !SANITIZER_GO
   if (!ShouldReport(thr, ReportTypeThreadLeak))
     return;
-  ThreadRegistryLock l(&ctx->thread_registry);
   Vector<ThreadLeak> leaks;
-  ctx->thread_registry.RunCallbackForEachThreadLocked(CollectThreadLeaks,
-                                                      &leaks);
+  {
+    ThreadRegistryLock l(&ctx->thread_registry);
+    ctx->thread_registry.RunCallbackForEachThreadLocked(CollectThreadLeaks,
+                                                        &leaks);
+  }
+
   for (uptr i = 0; i < leaks.Size(); i++) {
-    ScopedReport rep(ReportTypeThreadLeak);
-    rep.AddThread(leaks[i].tctx, true);
-    rep.SetCount(leaks[i].count);
-    OutputReport(thr, rep);
+    // Use alloca, because malloc during signal handling deadlocks
+    ScopedReport *rep = (ScopedReport *)__builtin_alloca(sizeof(ScopedReport));
+    // Take a new scope as Apple platforms require the below locks released
+    // before symbolizing in order to avoid a deadlock
+    {
+      ThreadRegistryLock l(&ctx->thread_registry);
+      new (rep) ScopedReport(ReportTypeThreadLeak);
+      rep->AddThread(leaks[i].tctx, true);
+      rep->SetCount(leaks[i].count);
+#  if SANITIZER_APPLE
+    }  // Close this scope to release the locks
+#  endif
+      OutputReport(thr, *rep);
+
+      // Need to manually destroy this because we used placement new to allocate
+      rep->~ScopedReport();
+#  if !SANITIZER_APPLE
+    }
+#  endif
   }
 #endif
 }
@@ -149,7 +167,7 @@ struct OnStartedArgs {
   uptr tls_size;
 };
 
-void ThreadStart(ThreadState *thr, Tid tid, tid_t os_id,
+void ThreadStart(ThreadState *thr, Tid tid, ThreadID os_id,
                  ThreadType thread_type) {
   ctx->thread_registry.StartThread(tid, os_id, thread_type, thr);
   if (!thr->ignore_sync) {
@@ -188,10 +206,14 @@ void ThreadStart(ThreadState *thr, Tid tid, tid_t os_id,
   }
 #endif
 
-#if !SANITIZER_GO
+#if !SANITIZER_GO && !SANITIZER_ANDROID
   // Don't imitate stack/TLS writes for the main thread,
   // because its initialization is synchronized with all
   // subsequent threads anyway.
+  // Because thr is created by MmapOrDie, the thr object
+  // is not in tls, the pointer to the thr object is in
+  // TLS_SLOT_SANITIZER slot. So skip this check on
+  // Android platform.
   if (tid != kMainTid) {
     if (stk_addr && stk_size) {
       const uptr pc = StackTrace::GetNextInstructionPc(
diff --git a/lib/libtsan/tsan_symbolize.cpp b/lib/libtsan/tsan_symbolize.cpp
index 2e2744d2ea..b382b63247 100644
--- a/lib/libtsan/tsan_symbolize.cpp
+++ b/lib/libtsan/tsan_symbolize.cpp
@@ -79,7 +79,7 @@ static void AddFrame(void *ctx, const char *function_name, const char *file,
   info->column = column;
 }
 
-SymbolizedStack *SymbolizeCode(uptr addr) {
+SymbolizedStack* SymbolizeCode(uptr addr, bool leaf) {
   // Check if PC comes from non-native land.
   if (addr & kExternalPCBit) {
     SymbolizedStackBuilder ssb = {nullptr, nullptr, addr};
diff --git a/lib/libtsan/tsan_symbolize.h b/lib/libtsan/tsan_symbolize.h
index 7adaa04dc2..2fe34f00a3 100644
--- a/lib/libtsan/tsan_symbolize.h
+++ b/lib/libtsan/tsan_symbolize.h
@@ -19,7 +19,7 @@ namespace __tsan {
 
 void EnterSymbolizer();
 void ExitSymbolizer();
-SymbolizedStack *SymbolizeCode(uptr addr);
+SymbolizedStack* SymbolizeCode(uptr addr, bool leaf);
 ReportLocation *SymbolizeData(uptr addr);
 void SymbolizeFlush();
 
diff --git a/lib/libtsan/tsan_trace.h b/lib/libtsan/tsan_trace.h
index 01bb7b34f4..1e791ff765 100644
--- a/lib/libtsan/tsan_trace.h
+++ b/lib/libtsan/tsan_trace.h
@@ -190,7 +190,7 @@ struct Trace {
   Mutex mtx;
   IList<TraceHeader, &TraceHeader::trace_parts, TracePart> parts;
   // First node non-queued into ctx->trace_part_recycle.
-  TracePart* local_head;
+  TracePart* local_head = nullptr;
   // Final position in the last part for finished threads.
   Event* final_pos = nullptr;
   // Number of trace parts allocated on behalf of this trace specifically.
diff --git a/lib/libunwind/include/__libunwind_config.h b/lib/libunwind/include/__libunwind_config.h
index bb7fe4c83a..980d11ef5d 100644
--- a/lib/libunwind/include/__libunwind_config.h
+++ b/lib/libunwind/include/__libunwind_config.h
@@ -73,11 +73,11 @@
 #  define _LIBUNWIND_HIGHEST_DWARF_REGISTER _LIBUNWIND_HIGHEST_DWARF_REGISTER_PPC
 # elif defined(__aarch64__)
 #  define _LIBUNWIND_TARGET_AARCH64 1
-#  define _LIBUNWIND_CONTEXT_SIZE 66
+#define _LIBUNWIND_CONTEXT_SIZE 67
 #  if defined(__SEH__)
 #    define _LIBUNWIND_CURSOR_SIZE 164
 #  else
-#    define _LIBUNWIND_CURSOR_SIZE 78
+#define _LIBUNWIND_CURSOR_SIZE 79
 #  endif
 #  define _LIBUNWIND_HIGHEST_DWARF_REGISTER _LIBUNWIND_HIGHEST_DWARF_REGISTER_ARM64
 # elif defined(__arm__)
@@ -212,4 +212,13 @@
 # define _LIBUNWIND_HIGHEST_DWARF_REGISTER 287
 #endif // _LIBUNWIND_IS_NATIVE_ONLY
 
+#if defined(__has_feature)
+#  if __has_feature(ptrauth_calls) && __has_feature(ptrauth_returns)
+#    define _LIBUNWIND_TARGET_AARCH64_AUTHENTICATED_UNWINDING 1
+#  elif __has_feature(ptrauth_calls) != __has_feature(ptrauth_returns)
+#    error "Either both or none of ptrauth_calls and ptrauth_returns "\
+           "is allowed to be enabled"
+#  endif
+#endif
+
 #endif // ____LIBUNWIND_CONFIG_H__
diff --git a/lib/libunwind/include/libunwind.h b/lib/libunwind/include/libunwind.h
index b2dae8feed..56ca711027 100644
--- a/lib/libunwind/include/libunwind.h
+++ b/lib/libunwind/include/libunwind.h
@@ -43,6 +43,109 @@
   #define LIBUNWIND_AVAIL
 #endif
 
+#if defined(_LIBUNWIND_TARGET_AARCH64_AUTHENTICATED_UNWINDING)
+
+  #include <ptrauth.h>
+
+  // `__ptrauth_restricted_intptr` is a feature of apple clang that predates
+  // support for direct application of `__ptrauth` to integer types. This
+  // guard is necessary to support compilation with those compiler.
+  #if __has_extension(ptrauth_restricted_intptr_qualifier)
+    #define __unwind_ptrauth_restricted_intptr(...) \
+      __ptrauth_restricted_intptr(__VA_ARGS__)
+  #else
+    #define __unwind_ptrauth_restricted_intptr(...) \
+      __ptrauth(__VA_ARGS__)
+  #endif
+
+  // ptrauth_string_discriminator("unw_proc_info_t::handler") == 0x7405
+  #define __ptrauth_unwind_upi_handler_disc 0x7405
+
+  #define __ptrauth_unwind_upi_handler \
+    __ptrauth(ptrauth_key_function_pointer, 1, __ptrauth_unwind_upi_handler_disc)
+
+  #define __ptrauth_unwind_upi_handler_intptr \
+    __unwind_ptrauth_restricted_intptr(ptrauth_key_function_pointer, 1,\
+                                       __ptrauth_unwind_upi_handler_disc)
+
+  // ptrauth_string_discriminator("unw_proc_info_t::start_ip") == 0xCA2C
+  #define __ptrauth_unwind_upi_startip \
+    __unwind_ptrauth_restricted_intptr(ptrauth_key_process_independent_code, 1, 0xCA2C)
+
+  // ptrauth_string_discriminator("unw_proc_info_t::end_ip") == 0xE183
+  #define __ptrauth_unwind_upi_endip \
+    __unwind_ptrauth_restricted_intptr(ptrauth_key_process_independent_code, 1, 0xE183)
+
+  // ptrauth_string_discriminator("unw_proc_info_t::lsda") == 0x83DE
+  #define __ptrauth_unwind_upi_lsda \
+    __unwind_ptrauth_restricted_intptr(ptrauth_key_process_dependent_data, 1, 0x83DE)
+
+  // ptrauth_string_discriminator("unw_proc_info_t::flags") == 0x79A1
+  #define __ptrauth_unwind_upi_flags \
+    __unwind_ptrauth_restricted_intptr(ptrauth_key_process_dependent_data, 1, 0x79A1)
+
+  // ptrauth_string_discriminator("unw_proc_info_t::unwind_info") == 0xC20C
+  #define __ptrauth_unwind_upi_info \
+    __unwind_ptrauth_restricted_intptr(ptrauth_key_process_dependent_data, 1, 0xC20C)
+
+  // ptrauth_string_discriminator("unw_proc_info_t::extra") == 0x03DF
+  #define __ptrauth_unwind_upi_extra \
+    __unwind_ptrauth_restricted_intptr(ptrauth_key_process_dependent_data, 1, 0x03DF)
+
+  // ptrauth_string_discriminator("Registers_arm64::link_reg_t") == 0x8301
+  #define __ptrauth_unwind_registers_arm64_link_reg \
+    __unwind_ptrauth_restricted_intptr(ptrauth_key_process_dependent_code, 1, 0x8301)
+
+  // ptrauth_string_discriminator("UnwindInfoSections::dso_base") == 0x4FF5
+  #define __ptrauth_unwind_uis_dso_base \
+    __unwind_ptrauth_restricted_intptr(ptrauth_key_process_dependent_data, 1, 0x4FF5)
+
+  // ptrauth_string_discriminator("UnwindInfoSections::dwarf_section") == 0x4974
+  #define __ptrauth_unwind_uis_dwarf_section \
+    __unwind_ptrauth_restricted_intptr(ptrauth_key_process_dependent_data, 1, 0x4974)
+
+  // ptrauth_string_discriminator("UnwindInfoSections::dwarf_section_length") == 0x2A9A
+  #define __ptrauth_unwind_uis_dwarf_section_length \
+    __unwind_ptrauth_restricted_intptr(ptrauth_key_process_dependent_data, 1, 0x2A9A)
+
+  // ptrauth_string_discriminator("UnwindInfoSections::compact_unwind_section") == 0xA27B
+  #define __ptrauth_unwind_uis_compact_unwind_section \
+    __unwind_ptrauth_restricted_intptr(ptrauth_key_process_dependent_data, 1, 0xA27B)
+
+  // ptrauth_string_discriminator("UnwindInfoSections::compact_unwind_section_length") == 0x5D0A
+  #define __ptrauth_unwind_uis_compact_unwind_section_length \
+    __unwind_ptrauth_restricted_intptr(ptrauth_key_process_dependent_data, 1, 0x5D0A)
+
+  // ptrauth_string_discriminator("CIE_Info::personality") == 0x6A40
+  #define __ptrauth_unwind_cie_info_personality_disc 0x6A40
+  #define __ptrauth_unwind_cie_info_personality \
+    __unwind_ptrauth_restricted_intptr(ptrauth_key_function_pointer, 1, \
+                                       __ptrauth_unwind_cie_info_personality_disc)
+
+  // ptrauth_string_discriminator("personality") == 0x7EAD)
+  #define __ptrauth_unwind_pauthtest_personality_disc 0x7EAD
+
+#else
+
+  #define __unwind_ptrauth_restricted_intptr(...)
+  #define __ptrauth_unwind_upi_handler
+  #define __ptrauth_unwind_upi_handler_intptr
+  #define __ptrauth_unwind_upi_startip
+  #define __ptrauth_unwind_upi_endip
+  #define __ptrauth_unwind_upi_lsda
+  #define __ptrauth_unwind_upi_flags
+  #define __ptrauth_unwind_upi_info
+  #define __ptrauth_unwind_upi_extra
+  #define __ptrauth_unwind_registers_arm64_link_reg
+  #define __ptrauth_unwind_uis_dso_base
+  #define __ptrauth_unwind_uis_dwarf_section
+  #define __ptrauth_unwind_uis_dwarf_section_length
+  #define __ptrauth_unwind_uis_compact_unwind_section
+  #define __ptrauth_unwind_uis_compact_unwind_section_length
+  #define __ptrauth_unwind_cie_info_personality
+
+#endif
+
 #if defined(_WIN32) && defined(__SEH__)
   #define LIBUNWIND_CURSOR_ALIGNMENT_ATTR __attribute__((__aligned__(16)))
 #else
@@ -88,17 +191,18 @@ typedef double unw_fpreg_t;
 #endif
 
 struct unw_proc_info_t {
-  unw_word_t  start_ip;         /* start address of function */
-  unw_word_t  end_ip;           /* address after end of function */
-  unw_word_t  lsda;             /* address of language specific data area, */
-                                /*  or zero if not used */
-  unw_word_t  handler;          /* personality routine, or zero if not used */
-  unw_word_t  gp;               /* not used */
-  unw_word_t  flags;            /* not used */
-  uint32_t    format;           /* compact unwind encoding, or zero if none */
-  uint32_t    unwind_info_size; /* size of DWARF unwind info, or zero if none */
-  unw_word_t  unwind_info;      /* address of DWARF unwind info, or zero */
-  unw_word_t  extra;            /* mach_header of mach-o image containing func */
+  unw_word_t __ptrauth_unwind_upi_startip start_ip; /* start address of function */
+  unw_word_t __ptrauth_unwind_upi_endip end_ip;     /* address after end of function */
+  unw_word_t __ptrauth_unwind_upi_lsda lsda;        /* address of language specific data area, */
+                                                    /* or zero if not used */
+
+  unw_word_t __ptrauth_unwind_upi_handler_intptr handler;
+  unw_word_t  gp;                                   /* not used */
+  unw_word_t __ptrauth_unwind_upi_flags flags;      /* not used */
+  uint32_t   format;                                /* compact unwind encoding, or zero if none */
+  uint32_t   unwind_info_size;                      /* size of DWARF unwind info, or zero if none */
+  unw_word_t __ptrauth_unwind_upi_info unwind_info; /* address of DWARF unwind info, or zero */
+  unw_word_t __ptrauth_unwind_upi_extra extra;      /* mach_header of mach-o image containing func */
 };
 typedef struct unw_proc_info_t unw_proc_info_t;
 
@@ -130,6 +234,7 @@ extern int unw_is_fpreg(unw_cursor_t *, unw_regnum_t) LIBUNWIND_AVAIL;
 extern int unw_is_signal_frame(unw_cursor_t *) LIBUNWIND_AVAIL;
 extern int unw_get_proc_name(unw_cursor_t *, char *, size_t, unw_word_t *) LIBUNWIND_AVAIL;
 //extern int       unw_get_save_loc(unw_cursor_t*, int, unw_save_loc_t*);
+extern const char *unw_strerror(int) LIBUNWIND_AVAIL;
 
 extern unw_addr_space_t unw_local_addr_space;
 
@@ -532,6 +637,7 @@ enum {
   UNW_AARCH64_X31 = 31,
   UNW_AARCH64_SP = 31,
   UNW_AARCH64_PC = 32,
+  UNW_AARCH64_VG = 46,
 
   // reserved block
   UNW_AARCH64_RA_SIGN_STATE = 34,
diff --git a/lib/libunwind/include/unwind_arm_ehabi.h b/lib/libunwind/include/unwind_arm_ehabi.h
index 6277a1457f..68e02e4760 100644
--- a/lib/libunwind/include/unwind_arm_ehabi.h
+++ b/lib/libunwind/include/unwind_arm_ehabi.h
@@ -125,8 +125,11 @@ _Unwind_VRS_Pop(_Unwind_Context *context, _Unwind_VRS_RegClass regclass,
                 uint32_t discriminator,
                 _Unwind_VRS_DataRepresentation representation);
 
+extern _Unwind_Reason_Code __gnu_unwind_frame(_Unwind_Exception *,
+                                              _Unwind_Context *);
+
 #if defined(_LIBUNWIND_UNWIND_LEVEL1_EXTERNAL_LINKAGE)
-#define _LIBUNWIND_EXPORT_UNWIND_LEVEL1 extern
+#define _LIBUNWIND_EXPORT_UNWIND_LEVEL1 extern __inline__
 #else
 #define _LIBUNWIND_EXPORT_UNWIND_LEVEL1 static __inline__
 #endif
diff --git a/lib/libunwind/src/AddressSpace.hpp b/lib/libunwind/src/AddressSpace.hpp
index 5551c7d4be..52477b16b3 100644
--- a/lib/libunwind/src/AddressSpace.hpp
+++ b/lib/libunwind/src/AddressSpace.hpp
@@ -129,22 +129,27 @@ struct UnwindInfoSections {
     defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND) ||                              \
     defined(_LIBUNWIND_USE_DL_ITERATE_PHDR)
   // No dso_base for SEH.
-  uintptr_t       dso_base;
+  uintptr_t __ptrauth_unwind_uis_dso_base
+                  dso_base = 0;
 #endif
 #if defined(_LIBUNWIND_USE_DL_ITERATE_PHDR)
   size_t          text_segment_length;
 #endif
 #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
-  uintptr_t       dwarf_section;
-  size_t          dwarf_section_length;
+  uintptr_t __ptrauth_unwind_uis_dwarf_section
+                  dwarf_section = 0;
+  size_t __ptrauth_unwind_uis_dwarf_section_length
+                  dwarf_section_length = 0;
 #endif
 #if defined(_LIBUNWIND_SUPPORT_DWARF_INDEX)
   uintptr_t       dwarf_index_section;
   size_t          dwarf_index_section_length;
 #endif
 #if defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND)
-  uintptr_t       compact_unwind_section;
-  size_t          compact_unwind_section_length;
+  uintptr_t __ptrauth_unwind_uis_compact_unwind_section
+                  compact_unwind_section = 0;
+  size_t __ptrauth_unwind_uis_compact_unwind_section_length
+                  compact_unwind_section_length = 0;
 #endif
 #if defined(_LIBUNWIND_ARM_EHABI)
   uintptr_t       arm_section;
@@ -196,11 +201,16 @@ public:
   static int64_t  getSLEB128(pint_t &addr, pint_t end);
 
   pint_t getEncodedP(pint_t &addr, pint_t end, uint8_t encoding,
-                     pint_t datarelBase = 0);
-  bool findFunctionName(pint_t addr, char *buf, size_t bufLen,
-                        unw_word_t *offset);
-  bool findUnwindSections(pint_t targetAddr, UnwindInfoSections &info);
-  bool findOtherFDE(pint_t targetAddr, pint_t &fde);
+                     pint_t datarelBase = 0, pint_t *resultAddr = nullptr);
+  template <typename R>
+  bool findFunctionName(typename R::link_hardened_reg_arg_t addr, char *buf,
+                        size_t bufLen, unw_word_t *offset);
+  template <typename R>
+  bool findUnwindSections(typename R::link_hardened_reg_arg_t targetAddr,
+                          UnwindInfoSections &info);
+  template <typename R>
+  bool findOtherFDE(typename R::link_hardened_reg_arg_t targetAddr,
+                    pint_t &fde);
 
   static LocalAddressSpace sThisAddressSpace;
 };
@@ -269,7 +279,7 @@ inline int64_t LocalAddressSpace::getSLEB128(pint_t &addr, pint_t end) {
 
 inline LocalAddressSpace::pint_t
 LocalAddressSpace::getEncodedP(pint_t &addr, pint_t end, uint8_t encoding,
-                               pint_t datarelBase) {
+                               pint_t datarelBase, pint_t *resultAddr) {
   pint_t startAddr = addr;
   const uint8_t *p = (uint8_t *)addr;
   pint_t result;
@@ -353,8 +363,14 @@ LocalAddressSpace::getEncodedP(pint_t &addr, pint_t end, uint8_t encoding,
     break;
   }
 
-  if (encoding & DW_EH_PE_indirect)
+  if (encoding & DW_EH_PE_indirect) {
+    if (resultAddr)
+      *resultAddr = result;
     result = getP(result);
+  } else {
+    if (resultAddr)
+      *resultAddr = startAddr;
+  }
 
   return result;
 }
@@ -486,9 +502,9 @@ static int findUnwindSectionsByPhdr(struct dl_phdr_info *pinfo,
 
 #endif  // defined(_LIBUNWIND_USE_DL_ITERATE_PHDR)
 
-
-inline bool LocalAddressSpace::findUnwindSections(pint_t targetAddr,
-                                                  UnwindInfoSections &info) {
+template <typename R>
+inline bool LocalAddressSpace::findUnwindSections(
+    typename R::link_hardened_reg_arg_t targetAddr, UnwindInfoSections &info) {
 #ifdef __APPLE__
   dyld_unwind_sections dyldInfo;
   if (_dyld_find_unwind_sections((void *)targetAddr, &dyldInfo)) {
@@ -658,16 +674,21 @@ inline bool LocalAddressSpace::findUnwindSections(pint_t targetAddr,
   return false;
 }
 
-inline bool LocalAddressSpace::findOtherFDE(pint_t targetAddr, pint_t &fde) {
+template <typename R>
+inline bool
+LocalAddressSpace::findOtherFDE(typename R::link_hardened_reg_arg_t targetAddr,
+                                pint_t &fde) {
   // TO DO: if OS has way to dynamically register FDEs, check that.
   (void)targetAddr;
   (void)fde;
   return false;
 }
 
-inline bool LocalAddressSpace::findFunctionName(pint_t addr, char *buf,
-                                                size_t bufLen,
-                                                unw_word_t *offset) {
+template <typename R>
+inline bool
+LocalAddressSpace::findFunctionName(typename R::link_hardened_reg_arg_t addr,
+                                    char *buf, size_t bufLen,
+                                    unw_word_t *offset) {
 #if _LIBUNWIND_USE_DLADDR
   Dl_info dyldInfo;
   if (dladdr((void *)addr, &dyldInfo)) {
diff --git a/lib/libunwind/src/CompactUnwinder.hpp b/lib/libunwind/src/CompactUnwinder.hpp
index a7a8a153d8..cd2e0e3431 100644
--- a/lib/libunwind/src/CompactUnwinder.hpp
+++ b/lib/libunwind/src/CompactUnwinder.hpp
@@ -601,11 +601,17 @@ int CompactUnwinder_arm64<A>::stepWithCompactEncodingFrameless(
     savedRegisterLoc -= 8;
   }
 
+  // We load the link register prior to setting the new SP as the authentication
+  // schema for LR entangles the SP of the old frame into the diversifier.
+  Registers_arm64::reg_t linkRegister = registers.getRegister(UNW_AARCH64_LR);
+
   // subtract stack size off of sp
   registers.setSP(savedRegisterLoc);
 
-  // set pc to be value in lr
-  registers.setIP(registers.getRegister(UNW_AARCH64_LR));
+  // Set pc to be value in lr. This needs to be performed after the new SP has
+  // been set, as the PC authentication schema entangles the SP of the new
+  // frame.
+  registers.setIP(linkRegister);
 
   return UNW_STEP_SUCCESS;
 }
@@ -614,7 +620,7 @@ template <typename A>
 int CompactUnwinder_arm64<A>::stepWithCompactEncodingFrame(
     compact_unwind_encoding_t encoding, uint64_t, A &addressSpace,
     Registers_arm64 &registers) {
-  uint64_t savedRegisterLoc = registers.getFP() - 8;
+  Registers_arm64::reg_t savedRegisterLoc = registers.getFP() - 8;
 
   if (encoding & UNWIND_ARM64_FRAME_X19_X20_PAIR) {
     registers.setRegister(UNW_AARCH64_X19, addressSpace.get64(savedRegisterLoc));
@@ -680,11 +686,16 @@ int CompactUnwinder_arm64<A>::stepWithCompactEncodingFrame(
     savedRegisterLoc -= 8;
   }
 
-  uint64_t fp = registers.getFP();
+  Registers_arm64::reg_t fp = registers.getFP();
+
   // fp points to old fp
   registers.setFP(addressSpace.get64(fp));
-  // old sp is fp less saved fp and lr
+
+  // Old sp is fp less saved fp and lr. We need to set this prior to setting
+  // the lr as the pointer authentication schema for the lr incorporates the
+  // sp as part of the diversifier.
   registers.setSP(fp + 16);
+
   // pop return address into pc
   registers.setIP(addressSpace.get64(fp + 8));
 
diff --git a/lib/libunwind/src/DwarfInstructions.hpp b/lib/libunwind/src/DwarfInstructions.hpp
index e7be0d6d5d..165c4a99e9 100644
--- a/lib/libunwind/src/DwarfInstructions.hpp
+++ b/lib/libunwind/src/DwarfInstructions.hpp
@@ -22,7 +22,6 @@
 #include "dwarf2.h"
 #include "libunwind_ext.h"
 
-
 namespace libunwind {
 
 
@@ -34,8 +33,10 @@ public:
   typedef typename A::pint_t pint_t;
   typedef typename A::sint_t sint_t;
 
-  static int stepWithDwarf(A &addressSpace, pint_t pc, pint_t fdeStart,
-                           R &registers, bool &isSignalFrame, bool stage2);
+  static int stepWithDwarf(A &addressSpace,
+                           typename R::link_hardened_reg_arg_t pc,
+                           pint_t fdeStart, R &registers, bool &isSignalFrame,
+                           bool stage2);
 
 private:
 
@@ -64,9 +65,10 @@ private:
 
   static pint_t getCFA(A &addressSpace, const PrologInfo &prolog,
                        const R &registers) {
-    if (prolog.cfaRegister != 0)
-      return (pint_t)((sint_t)registers.getRegister((int)prolog.cfaRegister) +
-             prolog.cfaRegisterOffset);
+    if (prolog.cfaRegister != 0) {
+      uintptr_t cfaRegister = registers.getRegister((int)prolog.cfaRegister);
+      return (pint_t)(cfaRegister + prolog.cfaRegisterOffset);
+    }
     if (prolog.cfaExpression != 0)
       return evaluateExpression((pint_t)prolog.cfaExpression, addressSpace,
                                 registers, 0);
@@ -207,16 +209,16 @@ bool DwarfInstructions<A, R>::isReturnAddressSignedWithPC(A &addressSpace,
 #endif
 
 template <typename A, typename R>
-int DwarfInstructions<A, R>::stepWithDwarf(A &addressSpace, pint_t pc,
-                                           pint_t fdeStart, R &registers,
-                                           bool &isSignalFrame, bool stage2) {
+int DwarfInstructions<A, R>::stepWithDwarf(
+    A &addressSpace, typename R::link_hardened_reg_arg_t pc, pint_t fdeStart,
+    R &registers, bool &isSignalFrame, bool stage2) {
   FDE_Info fdeInfo;
   CIE_Info cieInfo;
   if (CFI_Parser<A>::decodeFDE(addressSpace, fdeStart, &fdeInfo,
                                &cieInfo) == NULL) {
     PrologInfo prolog;
-    if (CFI_Parser<A>::parseFDEInstructions(addressSpace, fdeInfo, cieInfo, pc,
-                                            R::getArch(), &prolog)) {
+    if (CFI_Parser<A>::template parseFDEInstructions<R>(
+            addressSpace, fdeInfo, cieInfo, pc, R::getArch(), &prolog)) {
       // get pointer to cfa (architecture specific)
       pint_t cfa = getCFA(addressSpace, prolog, registers);
 
@@ -264,7 +266,7 @@ int DwarfInstructions<A, R>::stepWithDwarf(A &addressSpace, pint_t pc,
       // by a CFI directive later on.
       newRegisters.setSP(cfa);
 
-      pint_t returnAddress = 0;
+      typename R::reg_t returnAddress = 0;
       constexpr int lastReg = R::lastDwarfRegNum();
       static_assert(static_cast<int>(CFI_Parser<A>::kMaxRegisterNumber) >=
                         lastReg,
@@ -300,7 +302,16 @@ int DwarfInstructions<A, R>::stepWithDwarf(A &addressSpace, pint_t pc,
 
       isSignalFrame = cieInfo.isSignalFrame;
 
-#if defined(_LIBUNWIND_TARGET_AARCH64)
+#if defined(_LIBUNWIND_TARGET_AARCH64) &&                                      \
+    !defined(_LIBUNWIND_TARGET_AARCH64_AUTHENTICATED_UNWINDING)
+      // There are two ways of return address signing: pac-ret (enabled via
+      // -mbranch-protection=pac-ret) and ptrauth-returns (enabled as part of
+      // Apple's arm64e or experimental pauthtest ABI on Linux). The code
+      // below handles signed RA for pac-ret, while ptrauth-returns uses
+      // different logic.
+      // TODO: unify logic for both cases, see
+      // https://github.com/llvm/llvm-project/issues/160110
+      //
       // If the target is aarch64 then the return address may have been signed
       // using the v8.3 pointer authentication extensions. The original
       // return address needs to be authenticated before the return address is
diff --git a/lib/libunwind/src/DwarfParser.hpp b/lib/libunwind/src/DwarfParser.hpp
index 7e85025dd0..22de49023c 100644
--- a/lib/libunwind/src/DwarfParser.hpp
+++ b/lib/libunwind/src/DwarfParser.hpp
@@ -23,6 +23,10 @@
 
 #include "config.h"
 
+#if defined(_LIBUNWIND_TARGET_AARCH64_AUTHENTICATED_UNWINDING)
+#include <ptrauth.h>
+#endif
+
 namespace libunwind {
 
 /// CFI_Parser does basic parsing of a CFI (Call Frame Information) records.
@@ -33,6 +37,7 @@ template <typename A>
 class CFI_Parser {
 public:
   typedef typename A::pint_t pint_t;
+  typedef pint_t __ptrauth_unwind_cie_info_personality personality_t;
 
   /// Information encoded in a CIE (Common Information Entry)
   struct CIE_Info {
@@ -43,7 +48,7 @@ public:
     uint8_t   lsdaEncoding;
     uint8_t   personalityEncoding;
     uint8_t   personalityOffsetInCIE;
-    pint_t    personality;
+    personality_t personality;
     uint32_t  codeAlignFactor;
     int       dataAlignFactor;
     bool      isSignalFrame;
@@ -155,14 +160,17 @@ public:
     }
   };
 
-  static bool findFDE(A &addressSpace, pint_t pc, pint_t ehSectionStart,
-                      size_t sectionLength, pint_t fdeHint, FDE_Info *fdeInfo,
-                      CIE_Info *cieInfo);
+  template <typename R>
+  static bool findFDE(A &addressSpace, typename R::link_hardened_reg_arg_t pc,
+                      pint_t ehSectionStart, size_t sectionLength,
+                      pint_t fdeHint, FDE_Info *fdeInfo, CIE_Info *cieInfo);
   static const char *decodeFDE(A &addressSpace, pint_t fdeStart,
                                FDE_Info *fdeInfo, CIE_Info *cieInfo,
                                bool useCIEInfo = false);
+  template <typename R>
   static bool parseFDEInstructions(A &addressSpace, const FDE_Info &fdeInfo,
-                                   const CIE_Info &cieInfo, pint_t upToPC,
+                                   const CIE_Info &cieInfo,
+                                   typename R::link_hardened_reg_arg_t upToPC,
                                    int arch, PrologInfo *results);
 
   static const char *parseCIE(A &addressSpace, pint_t cie, CIE_Info *cieInfo);
@@ -234,9 +242,12 @@ const char *CFI_Parser<A>::decodeFDE(A &addressSpace, pint_t fdeStart,
 
 /// Scan an eh_frame section to find an FDE for a pc
 template <typename A>
-bool CFI_Parser<A>::findFDE(A &addressSpace, pint_t pc, pint_t ehSectionStart,
-                            size_t sectionLength, pint_t fdeHint,
-                            FDE_Info *fdeInfo, CIE_Info *cieInfo) {
+template <typename R>
+bool CFI_Parser<A>::findFDE(A &addressSpace,
+                            typename R::link_hardened_reg_arg_t pc,
+                            pint_t ehSectionStart, size_t sectionLength,
+                            pint_t fdeHint, FDE_Info *fdeInfo,
+                            CIE_Info *cieInfo) {
   //fprintf(stderr, "findFDE(0x%llX)\n", (long long)pc);
   pint_t p = (fdeHint != 0) ? fdeHint : ehSectionStart;
   const pint_t ehSectionEnd = (sectionLength == SIZE_MAX)
@@ -273,7 +284,7 @@ bool CFI_Parser<A>::findFDE(A &addressSpace, pint_t pc, pint_t ehSectionStart,
           pint_t pcRange = addressSpace.getEncodedP(
               p, nextCFI, cieInfo->pointerEncoding & 0x0F);
           // Test if pc is within the function this FDE covers.
-          if ((pcStart < pc) && (pc <= pcStart + pcRange)) {
+          if ((pcStart <= pc) && (pc < pcStart + pcRange)) {
             // parse rest of info
             fdeInfo->lsda = 0;
             // check for augmentation length
@@ -369,6 +380,7 @@ const char *CFI_Parser<A>::parseCIE(A &addressSpace, pint_t cie,
   cieInfo->returnAddressRegister = (uint8_t)raReg;
   // parse augmentation data based on augmentation string
   const char *result = NULL;
+  pint_t resultAddr = 0;
   if (addressSpace.get8(strStart) == 'z') {
     // parse augmentation data length
     addressSpace.getULEB128(p, cieContentEnd);
@@ -377,13 +389,41 @@ const char *CFI_Parser<A>::parseCIE(A &addressSpace, pint_t cie,
       case 'z':
         cieInfo->fdesHaveAugmentationData = true;
         break;
-      case 'P':
+      case 'P': {
         cieInfo->personalityEncoding = addressSpace.get8(p);
         ++p;
         cieInfo->personalityOffsetInCIE = (uint8_t)(p - cie);
-        cieInfo->personality = addressSpace
-            .getEncodedP(p, cieContentEnd, cieInfo->personalityEncoding);
+        pint_t personality = addressSpace.getEncodedP(
+            p, cieContentEnd, cieInfo->personalityEncoding,
+            /*datarelBase=*/0, &resultAddr);
+#if defined(_LIBUNWIND_TARGET_AARCH64_AUTHENTICATED_UNWINDING)
+        if (personality) {
+          // The GOT for the personality function was signed address
+          // authenticated. Manually re-sign with the CIE_Info::personality
+          // schema. If we could guarantee the encoding of the personality we
+          // could avoid this by simply giving resultAddr the correct ptrauth
+          // schema and performing an assignment.
+#if defined(__arm64e__)
+          const auto oldDiscriminator = resultAddr;
+#else
+          const auto oldDiscriminator = ptrauth_blend_discriminator(
+              (void *)resultAddr, __ptrauth_unwind_pauthtest_personality_disc);
+#endif
+          const auto discriminator = ptrauth_blend_discriminator(
+              &cieInfo->personality,
+              __ptrauth_unwind_cie_info_personality_disc);
+          void *signedPtr = ptrauth_auth_and_resign(
+              (void *)personality, ptrauth_key_function_pointer,
+              oldDiscriminator, ptrauth_key_function_pointer, discriminator);
+          personality = (pint_t)signedPtr;
+        }
+#endif
+        // We use memmove to set the CIE personality as we have already
+        // re-signed the pointer to the correct schema.
+        memmove((void *)&cieInfo->personality, (void *)&personality,
+                sizeof(personality));
         break;
+      }
       case 'L':
         cieInfo->lsdaEncoding = addressSpace.get8(p);
         ++p;
@@ -417,10 +457,10 @@ const char *CFI_Parser<A>::parseCIE(A &addressSpace, pint_t cie,
 
 /// "run" the DWARF instructions and create the abstract PrologInfo for an FDE
 template <typename A>
-bool CFI_Parser<A>::parseFDEInstructions(A &addressSpace,
-                                         const FDE_Info &fdeInfo,
-                                         const CIE_Info &cieInfo, pint_t upToPC,
-                                         int arch, PrologInfo *results) {
+template <typename R>
+bool CFI_Parser<A>::parseFDEInstructions(
+    A &addressSpace, const FDE_Info &fdeInfo, const CIE_Info &cieInfo,
+    typename R::link_hardened_reg_arg_t upToPC, int arch, PrologInfo *results) {
   // Alloca is used for the allocation of the rememberStack entries. It removes
   // the dependency on new/malloc but the below for loop can not be refactored
   // into functions. Entry could be saved during the processing of a CIE and
@@ -808,12 +848,10 @@ bool CFI_Parser<A>::parseFDEInstructions(A &addressSpace,
             results->savedRegisters[UNW_AARCH64_RA_SIGN_STATE].value ^ 0x3;
         results->setRegisterValue(UNW_AARCH64_RA_SIGN_STATE, value,
                                   initialState);
-        // When calculating the value of the PC, it is assumed that the CFI
-        // instruction is placed before the signing instruction, however it is
-        // placed after. Because of this, we need to take into account the CFI
-        // instruction is one instruction call later than expected, and reduce
-        // the PC value by 4 bytes to compensate.
-        results->ptrAuthDiversifier = fdeInfo.pcStart + codeOffset - 0x4;
+        // When using Feat_PAuthLR, the PC value needs to be captured so that
+        // during unwinding, the correct PC value is used for re-authentication.
+        // It is assumed that the CFI is placed before the signing instruction.
+        results->ptrAuthDiversifier = fdeInfo.pcStart + codeOffset;
         _LIBUNWIND_TRACE_DWARF(
             "DW_CFA_AARCH64_negate_ra_state_with_pc(pc=0x%" PRIx64 ")\n",
             static_cast<uint64_t>(results->ptrAuthDiversifier));
diff --git a/lib/libunwind/src/EHHeaderParser.hpp b/lib/libunwind/src/EHHeaderParser.hpp
index 0662a1321e..b5d927027f 100644
--- a/lib/libunwind/src/EHHeaderParser.hpp
+++ b/lib/libunwind/src/EHHeaderParser.hpp
@@ -37,8 +37,9 @@ public:
 
   static bool decodeEHHdr(A &addressSpace, pint_t ehHdrStart, pint_t ehHdrEnd,
                           EHHeaderInfo &ehHdrInfo);
-  static bool findFDE(A &addressSpace, pint_t pc, pint_t ehHdrStart,
-                      uint32_t sectionLength,
+  template <typename R>
+  static bool findFDE(A &addressSpace, typename R::link_hardened_reg_arg_t pc,
+                      pint_t ehHdrStart, uint32_t sectionLength,
                       typename CFI_Parser<A>::FDE_Info *fdeInfo,
                       typename CFI_Parser<A>::CIE_Info *cieInfo);
 
@@ -112,8 +113,10 @@ bool EHHeaderParser<A>::decodeTableEntry(
 }
 
 template <typename A>
-bool EHHeaderParser<A>::findFDE(A &addressSpace, pint_t pc, pint_t ehHdrStart,
-                                uint32_t sectionLength,
+template <typename R>
+bool EHHeaderParser<A>::findFDE(A &addressSpace,
+                                typename R::link_hardened_reg_arg_t pc,
+                                pint_t ehHdrStart, uint32_t sectionLength,
                                 typename CFI_Parser<A>::FDE_Info *fdeInfo,
                                 typename CFI_Parser<A>::CIE_Info *cieInfo) {
   pint_t ehHdrEnd = ehHdrStart + sectionLength;
diff --git a/lib/libunwind/src/Registers.hpp b/lib/libunwind/src/Registers.hpp
index 2c3bfb7e84..88c2d3b4e8 100644
--- a/lib/libunwind/src/Registers.hpp
+++ b/lib/libunwind/src/Registers.hpp
@@ -17,8 +17,13 @@
 
 #include "config.h"
 #include "libunwind.h"
+#include "libunwind_ext.h"
 #include "shadow_stack_unwind.h"
 
+#if defined(_LIBUNWIND_HAVE_GETAUXVAL) || defined(_LIBUNWIND_HAVE_ELF_AUX_INFO)
+#include <sys/auxv.h>
+#endif
+
 namespace libunwind {
 
 // For emulating 128-bit registers
@@ -60,6 +65,10 @@ public:
   Registers_x86();
   Registers_x86(const void *registers);
 
+  typedef uint32_t reg_t;
+  typedef uint32_t link_reg_t;
+  typedef const link_reg_t &link_hardened_reg_arg_t;
+
   bool        validRegister(int num) const;
   uint32_t    getRegister(int num) const;
   void        setRegister(int num, uint32_t value);
@@ -278,6 +287,10 @@ public:
   Registers_x86_64();
   Registers_x86_64(const void *registers);
 
+  typedef uint64_t reg_t;
+  typedef uint64_t link_reg_t;
+  typedef const link_reg_t &link_hardened_reg_arg_t;
+
   bool        validRegister(int num) const;
   uint64_t    getRegister(int num) const;
   void        setRegister(int num, uint64_t value);
@@ -597,6 +610,10 @@ public:
   Registers_ppc();
   Registers_ppc(const void *registers);
 
+  typedef uint32_t reg_t;
+  typedef uint32_t link_reg_t;
+  typedef const link_reg_t &link_hardened_reg_arg_t;
+
   bool        validRegister(int num) const;
   uint32_t    getRegister(int num) const;
   void        setRegister(int num, uint32_t value);
@@ -1169,6 +1186,10 @@ public:
   Registers_ppc64();
   Registers_ppc64(const void *registers);
 
+  typedef uint64_t reg_t;
+  typedef uint64_t link_reg_t;
+  typedef const link_reg_t &link_hardened_reg_arg_t;
+
   bool        validRegister(int num) const;
   uint64_t    getRegister(int num) const;
   void        setRegister(int num, uint64_t value);
@@ -1814,7 +1835,9 @@ inline const char *Registers_ppc64::getRegisterName(int regNum) {
 /// Registers_arm64  holds the register state of a thread in a 64-bit arm
 /// process.
 class _LIBUNWIND_HIDDEN Registers_arm64;
-extern "C" void __libunwind_Registers_arm64_jumpto(Registers_arm64 *);
+extern "C" int64_t __libunwind_Registers_arm64_za_disable();
+extern "C" void __libunwind_Registers_arm64_jumpto(Registers_arm64 *,
+                                                   unsigned walkedFrames);
 
 #if defined(_LIBUNWIND_USE_GCS)
 extern "C" void *__libunwind_shstk_get_jump_target() {
@@ -1824,8 +1847,21 @@ extern "C" void *__libunwind_shstk_get_jump_target() {
 
 class _LIBUNWIND_HIDDEN Registers_arm64 {
 public:
-  Registers_arm64();
+  Registers_arm64() = default;
   Registers_arm64(const void *registers);
+  Registers_arm64(const Registers_arm64 &);
+  Registers_arm64 &operator=(const Registers_arm64 &);
+
+  typedef uint64_t reg_t;
+  typedef uint64_t __ptrauth_unwind_registers_arm64_link_reg link_reg_t;
+
+  // Use `link_hardened_reg_arg_t` to pass values of `link_reg_t` type as
+  // function arguments. We need to use a const l-value reference to keep
+  // signature of `__ptrauth`-qualified values of `link_reg_t` type on AArch64
+  // PAuth-enabled ABI intact. Passing the raw pointer by value would cause
+  // authentication on the caller side and make the pointer prone to
+  // substitution if spilled to the stack in the callee.
+  typedef const link_reg_t &link_hardened_reg_arg_t;
 
   bool        validRegister(int num) const;
   uint64_t    getRegister(int num) const;
@@ -1837,7 +1873,14 @@ public:
   v128        getVectorRegister(int num) const;
   void        setVectorRegister(int num, v128 value);
   static const char *getRegisterName(int num);
-  void        jumpto() { __libunwind_Registers_arm64_jumpto(this); }
+  void        jumpto(unsigned walkedFrames = 0) {
+    zaDisable();
+    __libunwind_Registers_arm64_jumpto(this, walkedFrames);
+  }
+#ifdef _LIBUNWIND_TRACE_RET_INJECT
+  _LIBUNWIND_TRACE_NO_INLINE
+  void        returnto(unsigned walkedFrames) { jumpto(walkedFrames); }
+#endif
   static constexpr int lastDwarfRegNum() {
     return _LIBUNWIND_HIGHEST_DWARF_REGISTER_ARM64;
   }
@@ -1845,27 +1888,104 @@ public:
 
   uint64_t  getSP() const         { return _registers.__sp; }
   void      setSP(uint64_t value) { _registers.__sp = value; }
-  uint64_t  getIP() const         { return _registers.__pc; }
-  void      setIP(uint64_t value) { _registers.__pc = value; }
-  uint64_t  getFP() const         { return _registers.__fp; }
-  void      setFP(uint64_t value) { _registers.__fp = value; }
+  uint64_t  getIP() const {
+    uint64_t value = _registers.__pc;
+#if defined(_LIBUNWIND_TARGET_AARCH64_AUTHENTICATED_UNWINDING)
+    // Note the value of the PC was signed to its address in the register state
+    // but everyone else expects it to be sign by the SP, so convert on return.
+    value = (uint64_t)ptrauth_auth_and_resign((void *)_registers.__pc,
+                                              ptrauth_key_return_address,
+                                              &_registers.__pc,
+                                              ptrauth_key_return_address,
+                                              getSP());
+#endif
+    return value;
+  }
+  void      setIP(uint64_t value) {
+#if defined(_LIBUNWIND_TARGET_AARCH64_AUTHENTICATED_UNWINDING)
+    // Note the value which was set should have been signed with the SP.
+    // We then resign with the slot we are being stored in to so that both SP
+    // and LR can't be spoofed at the same time.
+    value = (uint64_t)ptrauth_auth_and_resign((void *)value,
+                                              ptrauth_key_return_address,
+                                              getSP(),
+                                              ptrauth_key_return_address,
+                                              &_registers.__pc);
+#endif
+    _registers.__pc = value;
+  }
+  uint64_t getFP() const { return _registers.__fp; }
+  void setFP(uint64_t value) { _registers.__fp = value; }
+
+#if defined(_LIBUNWIND_TARGET_AARCH64_AUTHENTICATED_UNWINDING)
+  void
+  loadAndAuthenticateLinkRegister(reg_t inplaceAuthedLinkRegister,
+                                  link_reg_t *referenceAuthedLinkRegister) {
+    // If we are in an arm64/arm64e frame, then the PC should have been signed
+    // with the SP
+    *referenceAuthedLinkRegister =
+      (uint64_t)ptrauth_auth_data((void *)inplaceAuthedLinkRegister,
+                                  ptrauth_key_return_address,
+                                  _registers.__sp);
+  }
+#endif
 
 private:
+  uint64_t lazyGetVG() const;
+
+  void zaDisable() const {
+    if (!_misc_registers.__has_sme)
+      return;
+    if (__libunwind_Registers_arm64_za_disable() != 0)
+      _LIBUNWIND_ABORT("SME ZA disable failed");
+  }
+
+#if defined(_LIBUNWIND_HAVE_GETAUXVAL)
+  static bool checkHasSME() {
+    constexpr int hwcap2_sme = (1 << 23);
+    unsigned long hwcap2 = getauxval(AT_HWCAP2);
+    return (hwcap2 & hwcap2_sme) != 0;
+  }
+#elif defined(_LIBUNWIND_HAVE_ELF_AUX_INFO)
+  static bool checkHasSME() {
+    constexpr int hwcap2_sme = (1 << 23);
+    unsigned long hwcap2 = 0;
+    elf_aux_info(AT_HWCAP2, &hwcap2, sizeof(hwcap2));
+    return (hwcap2 & hwcap2_sme) != 0;
+  }
+#else
+  static bool checkHasSME() {
+    // TODO: Support other platforms.
+    return false;
+  }
+#endif
+
   struct GPRs {
-    uint64_t __x[29]; // x0-x28
-    uint64_t __fp;    // Frame pointer x29
-    uint64_t __lr;    // Link register x30
-    uint64_t __sp;    // Stack pointer x31
-    uint64_t __pc;    // Program counter
-    uint64_t __ra_sign_state; // RA sign state register
+    uint64_t __x[29] = {};        // x0-x28
+    uint64_t __fp = 0;            // Frame pointer x29
+    uint64_t __lr = 0;            // Link register x30
+    uint64_t __sp = 0;            // Stack pointer x31
+    uint64_t __pc = 0;            // Program counter
+    uint64_t __ra_sign_state = 0; // RA sign state register
   };
 
-  GPRs    _registers;
-  double  _vectorHalfRegisters[32];
+  struct Misc {
+    mutable uint32_t __vg = 0; // Vector Granule
+    bool __has_sme = checkHasSME();
+  };
+
+  GPRs _registers = {};
   // Currently only the lower double in 128-bit vectore registers
   // is perserved during unwinding.  We could define new register
   // numbers (> 96) which mean whole vector registers, then this
   // struct would need to change to contain whole vector registers.
+  double _vectorHalfRegisters[32] = {};
+
+  // Miscellaneous/virtual registers. These are stored below the GPRs and FPRs
+  // as they do not correspond to physical registers, so do not need to be
+  // saved/restored in UnwindRegistersRestore.S and UnwindRegistersSave.S, and
+  // we don't want to modify the existing offsets for GPRs and FPRs.
+  Misc _misc_registers;
 };
 
 inline Registers_arm64::Registers_arm64(const void *registers) {
@@ -1877,11 +1997,31 @@ inline Registers_arm64::Registers_arm64(const void *registers) {
   memcpy(_vectorHalfRegisters,
          static_cast<const uint8_t *>(registers) + sizeof(GPRs),
          sizeof(_vectorHalfRegisters));
+  _misc_registers.__vg = 0;
+
+#if defined(_LIBUNWIND_TARGET_AARCH64_AUTHENTICATED_UNWINDING)
+  // We have to do some pointer authentication fixups after this copy,
+  // and as part of that we need to load the source pc without
+  // authenticating so that we maintain the signature for the resigning
+  // performed by setIP.
+  uint64_t pcRegister = 0;
+  memmove(&pcRegister, ((uint8_t *)&_registers) + offsetof(GPRs, __pc),
+          sizeof(pcRegister));
+  setIP(pcRegister);
+#endif
 }
 
-inline Registers_arm64::Registers_arm64() {
-  memset(&_registers, 0, sizeof(_registers));
-  memset(&_vectorHalfRegisters, 0, sizeof(_vectorHalfRegisters));
+inline Registers_arm64::Registers_arm64(const Registers_arm64 &other) {
+  *this = other;
+}
+
+inline Registers_arm64 &
+Registers_arm64::operator=(const Registers_arm64 &other) {
+  memmove(static_cast<void *>(this), &other, sizeof(*this));
+  // We perform this step to ensure that we correctly authenticate and re-sign
+  // the pc after the bitwise copy.
+  setIP(other.getIP());
+  return *this;
 }
 
 inline bool Registers_arm64::validRegister(int regNum) const {
@@ -1895,22 +2035,40 @@ inline bool Registers_arm64::validRegister(int regNum) const {
     return false;
   if (regNum == UNW_AARCH64_RA_SIGN_STATE)
     return true;
+  if (regNum == UNW_AARCH64_VG)
+    return true;
   if ((regNum > 32) && (regNum < 64))
     return false;
   return true;
 }
 
+inline uint64_t Registers_arm64::lazyGetVG() const {
+  if (!_misc_registers.__vg) {
+#if defined(__aarch64__)
+    register uint64_t vg asm("x0");
+    asm(".inst 0x04e0e3e0" // CNTD x0
+        : "=r"(vg));
+    _misc_registers.__vg = vg;
+#else
+    _LIBUNWIND_ABORT("arm64 VG undefined");
+#endif
+  }
+  return _misc_registers.__vg;
+}
+
 inline uint64_t Registers_arm64::getRegister(int regNum) const {
   if (regNum == UNW_REG_IP || regNum == UNW_AARCH64_PC)
-    return _registers.__pc;
+    return getIP();
   if (regNum == UNW_REG_SP || regNum == UNW_AARCH64_SP)
     return _registers.__sp;
   if (regNum == UNW_AARCH64_RA_SIGN_STATE)
     return _registers.__ra_sign_state;
   if (regNum == UNW_AARCH64_FP)
-    return _registers.__fp;
+    return getFP();
   if (regNum == UNW_AARCH64_LR)
     return _registers.__lr;
+  if (regNum == UNW_AARCH64_VG)
+    return lazyGetVG();
   if ((regNum >= 0) && (regNum < 29))
     return _registers.__x[regNum];
   _LIBUNWIND_ABORT("unsupported arm64 register");
@@ -1918,15 +2076,17 @@ inline uint64_t Registers_arm64::getRegister(int regNum) const {
 
 inline void Registers_arm64::setRegister(int regNum, uint64_t value) {
   if (regNum == UNW_REG_IP || regNum == UNW_AARCH64_PC)
-    _registers.__pc = value;
+    setIP(value);
   else if (regNum == UNW_REG_SP || regNum == UNW_AARCH64_SP)
     _registers.__sp = value;
   else if (regNum == UNW_AARCH64_RA_SIGN_STATE)
     _registers.__ra_sign_state = value;
   else if (regNum == UNW_AARCH64_FP)
-    _registers.__fp = value;
+    setFP(value);
   else if (regNum == UNW_AARCH64_LR)
     _registers.__lr = value;
+  else if (regNum == UNW_AARCH64_VG)
+    _misc_registers.__vg = value;
   else if ((regNum >= 0) && (regNum < 29))
     _registers.__x[regNum] = value;
   else
@@ -2116,6 +2276,10 @@ public:
   Registers_arm();
   Registers_arm(const void *registers);
 
+  typedef uint32_t reg_t;
+  typedef uint32_t link_reg_t;
+  typedef const link_reg_t &link_hardened_reg_arg_t;
+
   bool        validRegister(int num) const;
   uint32_t    getRegister(int num) const;
   void        setRegister(int num, uint32_t value);
@@ -2621,6 +2785,10 @@ public:
   Registers_or1k();
   Registers_or1k(const void *registers);
 
+  typedef uint32_t reg_t;
+  typedef uint32_t link_reg_t;
+  typedef const link_reg_t &link_hardened_reg_arg_t;
+
   bool        validRegister(int num) const;
   uint32_t    getRegister(int num) const;
   void        setRegister(int num, uint32_t value);
@@ -2820,6 +2988,10 @@ public:
   Registers_mips_o32();
   Registers_mips_o32(const void *registers);
 
+  typedef uint32_t reg_t;
+  typedef uint32_t link_reg_t;
+  typedef const link_reg_t &link_hardened_reg_arg_t;
+
   bool        validRegister(int num) const;
   uint32_t    getRegister(int num) const;
   void        setRegister(int num, uint32_t value);
@@ -3155,6 +3327,10 @@ public:
   Registers_mips_newabi();
   Registers_mips_newabi(const void *registers);
 
+  typedef uint64_t reg_t;
+  typedef uint64_t link_reg_t;
+  typedef const link_reg_t &link_hardened_reg_arg_t;
+
   bool        validRegister(int num) const;
   uint64_t    getRegister(int num) const;
   void        setRegister(int num, uint64_t value);
@@ -3458,6 +3634,10 @@ public:
   Registers_sparc();
   Registers_sparc(const void *registers);
 
+  typedef uint32_t reg_t;
+  typedef uint32_t link_reg_t;
+  typedef const link_reg_t &link_hardened_reg_arg_t;
+
   bool        validRegister(int num) const;
   uint32_t    getRegister(int num) const;
   void        setRegister(int num, uint32_t value);
@@ -3644,6 +3824,10 @@ public:
   Registers_sparc64() = default;
   Registers_sparc64(const void *registers);
 
+  typedef uint64_t reg_t;
+  typedef uint64_t link_reg_t;
+  typedef const link_reg_t &link_hardened_reg_arg_t;
+
   bool validRegister(int num) const;
   uint64_t getRegister(int num) const;
   void setRegister(int num, uint64_t value);
@@ -3829,6 +4013,10 @@ public:
   Registers_hexagon();
   Registers_hexagon(const void *registers);
 
+  typedef uint32_t reg_t;
+  typedef uint32_t link_reg_t;
+  typedef const link_reg_t &link_hardened_reg_arg_t;
+
   bool        validRegister(int num) const;
   uint32_t    getRegister(int num) const;
   void        setRegister(int num, uint32_t value);
@@ -4044,6 +4232,10 @@ public:
   Registers_riscv();
   Registers_riscv(const void *registers);
 
+  typedef ::libunwind::reg_t reg_t;
+  typedef ::libunwind::reg_t link_reg_t;
+  typedef const link_reg_t &link_hardened_reg_arg_t;
+
   bool        validRegister(int num) const;
   reg_t       getRegister(int num) const;
   void        setRegister(int num, reg_t value);
@@ -4341,6 +4533,10 @@ public:
   Registers_ve();
   Registers_ve(const void *registers);
 
+  typedef uint64_t reg_t;
+  typedef uint64_t link_reg_t;
+  typedef const link_reg_t &link_hardened_reg_arg_t;
+
   bool        validRegister(int num) const;
   uint64_t    getRegister(int num) const;
   void        setRegister(int num, uint64_t value);
@@ -4784,6 +4980,10 @@ public:
   Registers_s390x();
   Registers_s390x(const void *registers);
 
+  typedef uint64_t reg_t;
+  typedef uint64_t link_reg_t;
+  typedef const link_reg_t &link_hardened_reg_arg_t;
+
   bool        validRegister(int num) const;
   uint64_t    getRegister(int num) const;
   void        setRegister(int num, uint64_t value);
@@ -5072,6 +5272,10 @@ public:
   Registers_loongarch();
   Registers_loongarch(const void *registers);
 
+  typedef uint64_t reg_t;
+  typedef uint64_t link_reg_t;
+  typedef const link_reg_t &link_hardened_reg_arg_t;
+
   bool validRegister(int num) const;
   uint64_t getRegister(int num) const;
   void setRegister(int num, uint64_t value);
diff --git a/lib/libunwind/src/Unwind-seh.cpp b/lib/libunwind/src/Unwind-seh.cpp
index 110c5987c3..0b1930b44d 100644
--- a/lib/libunwind/src/Unwind-seh.cpp
+++ b/lib/libunwind/src/Unwind-seh.cpp
@@ -163,7 +163,7 @@ _GCC_specific_handler(PEXCEPTION_RECORD ms_exc, PVOID frame, PCONTEXT ms_ctx,
     // If we were called by __libunwind_seh_personality(), indicate that
     // a handler was found; otherwise, initiate phase 2 by unwinding.
     if (ours && ms_exc->NumberParameters > 1)
-      return 4 /* ExceptionExecuteHandler in mingw */;
+      return static_cast<EXCEPTION_DISPOSITION>(4);
     // This should never happen in phase 2.
     if (IS_UNWINDING(ms_exc->ExceptionFlags))
       _LIBUNWIND_ABORT("Personality indicated exception handler in phase 2!");
@@ -182,7 +182,7 @@ _GCC_specific_handler(PEXCEPTION_RECORD ms_exc, PVOID frame, PCONTEXT ms_ctx,
     // a handler was found; otherwise, it's time to initiate a collided
     // unwind to the target.
     if (ours && !IS_UNWINDING(ms_exc->ExceptionFlags) && ms_exc->NumberParameters > 1)
-      return 4 /* ExceptionExecuteHandler in mingw */;
+      return static_cast<EXCEPTION_DISPOSITION>(4);
     // This should never happen in phase 1.
     if (!IS_UNWINDING(ms_exc->ExceptionFlags))
       _LIBUNWIND_ABORT("Personality installed context during phase 1!");
@@ -259,13 +259,12 @@ __libunwind_seh_personality(int version, _Unwind_Action state,
                              (void *)disp_ctx->LanguageHandler, (void *)&ms_exc,
                              (void *)disp_ctx->EstablisherFrame,
                              (void *)disp_ctx->ContextRecord, (void *)disp_ctx);
-  EXCEPTION_DISPOSITION ms_act = disp_ctx->LanguageHandler(&ms_exc,
-                                                           (PVOID)disp_ctx->EstablisherFrame,
-                                                           disp_ctx->ContextRecord,
-                                                           disp_ctx);
+  int ms_act = static_cast<int>(
+      disp_ctx->LanguageHandler(&ms_exc, (PVOID)disp_ctx->EstablisherFrame,
+                                disp_ctx->ContextRecord, disp_ctx));
   _LIBUNWIND_TRACE_UNWINDING("__libunwind_seh_personality() LanguageHandler "
                              "returned %d",
-                             (int)ms_act);
+                             ms_act);
   switch (ms_act) {
   case ExceptionContinueExecution: return _URC_END_OF_STACK;
   case ExceptionContinueSearch: return _URC_CONTINUE_UNWIND;
diff --git a/lib/libunwind/src/Unwind-wasm.c b/lib/libunwind/src/Unwind-wasm.c
index b8b7bc2779..b0d6cd2d00 100644
--- a/lib/libunwind/src/Unwind-wasm.c
+++ b/lib/libunwind/src/Unwind-wasm.c
@@ -37,13 +37,13 @@ struct _Unwind_LandingPadContext {
 // function
 thread_local struct _Unwind_LandingPadContext __wasm_lpad_context;
 
-/// Calls to this function is in landing pads in compiler-generated user code.
+/// Calls to this function are in landing pads in compiler-generated user code.
 /// In other EH schemes, stack unwinding is done by libunwind library, which
-/// calls the personality function for each each frame it lands. On the other
-/// hand, WebAssembly stack unwinding process is performed by a VM, and the
-/// personality function cannot be called from there. So the compiler inserts
-/// a call to this function in landing pads in the user code, which in turn
-/// calls the personality function.
+/// calls the personality function for each frame it lands. On the other hand,
+/// WebAssembly stack unwinding process is performed by a VM, and the
+/// personality function cannot be called from there. So the compiler inserts a
+/// call to this function in landing pads in the user code, which in turn calls
+/// the personality function.
 _Unwind_Reason_Code _Unwind_CallPersonality(void *exception_ptr) {
   struct _Unwind_Exception *exception_object =
       (struct _Unwind_Exception *)exception_ptr;
@@ -92,7 +92,7 @@ _LIBUNWIND_EXPORT void _Unwind_SetGR(struct _Unwind_Context *context, int index,
 
 /// Called by personality handler to get instruction pointer.
 _LIBUNWIND_EXPORT uintptr_t _Unwind_GetIP(struct _Unwind_Context *context) {
-  // The result will be used as an 1-based index after decrementing 1, so we
+  // The result will be used as a 1-based index after decrementing 1, so we
   // increment 2 here
   uintptr_t result =
       ((struct _Unwind_LandingPadContext *)context)->lpad_index + 2;
diff --git a/lib/libunwind/src/UnwindCursor.hpp b/lib/libunwind/src/UnwindCursor.hpp
index 55db035e62..5838dbcaa9 100644
--- a/lib/libunwind/src/UnwindCursor.hpp
+++ b/lib/libunwind/src/UnwindCursor.hpp
@@ -41,7 +41,8 @@
 #define _LIBUNWIND_CHECK_LINUX_SIGRETURN 1
 #endif
 
-#if defined(_LIBUNWIND_TARGET_HAIKU) && defined(_LIBUNWIND_TARGET_X86_64)
+#if defined(_LIBUNWIND_TARGET_HAIKU) &&                                        \
+    (defined(_LIBUNWIND_TARGET_I386) || defined(_LIBUNWIND_TARGET_X86_64))
 #include <OS.h>
 #include <signal.h>
 #define _LIBUNWIND_CHECK_HAIKU_SIGRETURN 1
@@ -120,7 +121,9 @@ class _LIBUNWIND_HIDDEN DwarfFDECache {
   typedef typename A::pint_t pint_t;
 public:
   static constexpr pint_t kSearchAll = static_cast<pint_t>(-1);
-  static pint_t findFDE(pint_t mh, pint_t pc);
+  template <typename R>
+  static pint_t findFDE(pint_t mh, typename R::link_hardened_reg_arg_t pc);
+
   static void add(pint_t mh, pint_t ip_start, pint_t ip_end, pint_t fde);
   static void removeAllIn(pint_t mh);
   static void iterateCacheEntries(void (*func)(unw_word_t ip_start,
@@ -173,7 +176,9 @@ bool DwarfFDECache<A>::_registeredForDyldUnloads = false;
 #endif
 
 template <typename A>
-typename A::pint_t DwarfFDECache<A>::findFDE(pint_t mh, pint_t pc) {
+template <typename R>
+typename DwarfFDECache<A>::pint_t
+DwarfFDECache<A>::findFDE(pint_t mh, typename R::link_hardened_reg_arg_t pc) {
   pint_t result = 0;
   _LIBUNWIND_LOG_IF_FALSE(_lock.lock_shared());
   for (entry *p = _buffer; p < _bufferUsed; ++p) {
@@ -471,7 +476,9 @@ public:
   virtual void getInfo(unw_proc_info_t *) {
     _LIBUNWIND_ABORT("getInfo not implemented");
   }
-  virtual void jumpto() { _LIBUNWIND_ABORT("jumpto not implemented"); }
+  _LIBUNWIND_TRACE_NO_INLINE virtual void jumpto() {
+    _LIBUNWIND_ABORT("jumpto not implemented");
+  }
   virtual bool isSignalFrame() {
     _LIBUNWIND_ABORT("isSignalFrame not implemented");
   }
@@ -488,6 +495,12 @@ public:
   virtual void saveVFPAsX() { _LIBUNWIND_ABORT("saveVFPAsX not implemented"); }
 #endif
 
+#ifdef _LIBUNWIND_TRACE_RET_INJECT
+  virtual void setWalkedFrames(unsigned) {
+    _LIBUNWIND_ABORT("setWalkedFrames not implemented");
+  }
+#endif
+
 #ifdef _AIX
   virtual uintptr_t getDataRelBase() {
     _LIBUNWIND_ABORT("getDataRelBase not implemented");
@@ -964,7 +977,8 @@ public:
   virtual void        setFloatReg(int, unw_fpreg_t);
   virtual int         step(bool stage2 = false);
   virtual void        getInfo(unw_proc_info_t *);
-  virtual void        jumpto();
+  _LIBUNWIND_TRACE_NO_INLINE
+    virtual void      jumpto();
   virtual bool        isSignalFrame();
   virtual bool        getFunctionName(char *buf, size_t len, unw_word_t *off);
   virtual void        setInfoBasedOnIPRegister(bool isReturnAddress = false);
@@ -973,6 +987,10 @@ public:
   virtual void        saveVFPAsX();
 #endif
 
+#ifdef _LIBUNWIND_TRACE_RET_INJECT
+  virtual void setWalkedFrames(unsigned);
+#endif
+
 #ifdef _AIX
   virtual uintptr_t getDataRelBase();
 #endif
@@ -1045,19 +1063,28 @@ private:
 #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
   bool getInfoFromFdeCie(const typename CFI_Parser<A>::FDE_Info &fdeInfo,
                          const typename CFI_Parser<A>::CIE_Info &cieInfo,
-                         pint_t pc, uintptr_t dso_base);
-  bool getInfoFromDwarfSection(pint_t pc, const UnwindInfoSections &sects,
-                                            uint32_t fdeSectionOffsetHint=0);
+                         typename R::link_hardened_reg_arg_t pc,
+                         uintptr_t dso_base);
+  bool getInfoFromDwarfSection(typename R::link_hardened_reg_arg_t pc,
+                               const UnwindInfoSections &sects,
+                               uint32_t fdeSectionOffsetHint = 0);
   int stepWithDwarfFDE(bool stage2) {
+#if defined(_LIBUNWIND_TARGET_AARCH64_AUTHENTICATED_UNWINDING)
+    typename R::reg_t rawPC = this->getReg(UNW_REG_IP);
+    typename R::link_reg_t pc;
+    _registers.loadAndAuthenticateLinkRegister(rawPC, &pc);
+#else
+    typename R::link_reg_t pc = this->getReg(UNW_REG_IP);
+#endif
     return DwarfInstructions<A, R>::stepWithDwarf(
-        _addressSpace, (pint_t)this->getReg(UNW_REG_IP),
-        (pint_t)_info.unwind_info, _registers, _isSignalFrame, stage2);
+        _addressSpace, pc, (pint_t)_info.unwind_info, _registers,
+        _isSignalFrame, stage2);
   }
 #endif
 
 #if defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND)
-  bool getInfoFromCompactEncodingSection(pint_t pc,
-                                            const UnwindInfoSections &sects);
+  bool getInfoFromCompactEncodingSection(typename R::link_hardened_reg_arg_t pc,
+                                         const UnwindInfoSections &sects);
   int stepWithCompactEncoding(bool stage2 = false) {
 #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
     if ( compactSaysUseDwarf() )
@@ -1344,9 +1371,12 @@ private:
   bool             _unwindInfoMissing;
   bool             _isSignalFrame;
 #if defined(_LIBUNWIND_CHECK_LINUX_SIGRETURN) ||                               \
-    defined(_LIBUNWIND_TARGET_HAIKU)
+    defined(_LIBUNWIND_CHECK_HAIKU_SIGRETURN)
   bool             _isSigReturn = false;
 #endif
+#ifdef _LIBUNWIND_TRACE_RET_INJECT
+  uint32_t _walkedFrames;
+#endif
 };
 
 
@@ -1358,13 +1388,13 @@ UnwindCursor<A, R>::UnwindCursor(unw_context_t *context, A &as)
                 "UnwindCursor<> does not fit in unw_cursor_t");
   static_assert((alignof(UnwindCursor<A, R>) <= alignof(unw_cursor_t)),
                 "UnwindCursor<> requires more alignment than unw_cursor_t");
-  memset(&_info, 0, sizeof(_info));
+  memset(static_cast<void *>(&_info), 0, sizeof(_info));
 }
 
 template <typename A, typename R>
 UnwindCursor<A, R>::UnwindCursor(A &as, void *)
     : _addressSpace(as), _unwindInfoMissing(false), _isSignalFrame(false) {
-  memset(&_info, 0, sizeof(_info));
+  memset(static_cast<void *>(&_info), 0, sizeof(_info));
   // FIXME
   // fill in _registers from thread arg
 }
@@ -1401,7 +1431,46 @@ void UnwindCursor<A, R>::setFloatReg(int regNum, unw_fpreg_t value) {
 }
 
 template <typename A, typename R> void UnwindCursor<A, R>::jumpto() {
+#ifdef _LIBUNWIND_TRACE_RET_INJECT
+  /*
+
+  The value of `_walkedFrames` is computed in `unwind_phase2` and represents the
+  number of frames walked starting `unwind_phase2` to get to the landing pad.
+
+  ```
+    // uc is initialized by __unw_getcontext in the parent frame.
+    // The first stack frame walked is unwind_phase2.
+    unsigned framesWalked = 1;
+  ```
+
+  To that, we need to add the number of function calls in libunwind between
+  `unwind_phase2` & `__libunwind_Registers_arm64_jumpto` which performs the long
+  jump, to rebalance the execution flow.
+
+  ```
+      frame #0: libunwind.1.dylib`__libunwind_Registers_arm64_jumpto at UnwindRegistersRestore.S:646
+      frame #1: libunwind.1.dylib`libunwind::Registers_arm64::returnto at Registers.hpp:2291:3
+      frame #2: libunwind.1.dylib`libunwind::UnwindCursor<libunwind::LocalAddressSpace, libunwind::Registers_arm64>::jumpto at UnwindCursor.hpp:1474:14
+      frame #3: libunwind.1.dylib`__unw_resume at libunwind.cpp:375:7
+      frame #4: libunwind.1.dylib`__unw_resume_with_frames_walked at libunwind.cpp:363:10
+      frame #5: libunwind.1.dylib`unwind_phase2 at UnwindLevel1.c:328:9
+      frame #6: libunwind.1.dylib`_Unwind_RaiseException at UnwindLevel1.c:480:10
+      frame #7: libc++abi.dylib`__cxa_throw at cxa_exception.cpp:295:5
+      ...
+  ```
+
+  If we look at the backtrace from `__libunwind_Registers_arm64_jumpto`, we see
+  there are 5 frames on the stack to reach `unwind_phase2`. However, only 4 of
+  them will never return, since `__libunwind_Registers_arm64_jumpto` returns
+  back to the landing pad, so we need to subtract 1 to the number of
+  `_EXTRA_LIBUNWIND_FRAMES_WALKED`.
+  */
+
+  static constexpr size_t _EXTRA_LIBUNWIND_FRAMES_WALKED = 5 - 1;
+  _registers.returnto(_walkedFrames + _EXTRA_LIBUNWIND_FRAMES_WALKED);
+#else
   _registers.jumpto();
+#endif
 }
 
 #ifdef __arm__
@@ -1410,6 +1479,13 @@ template <typename A, typename R> void UnwindCursor<A, R>::saveVFPAsX() {
 }
 #endif
 
+#ifdef _LIBUNWIND_TRACE_RET_INJECT
+template <typename A, typename R>
+void UnwindCursor<A, R>::setWalkedFrames(unsigned walkedFrames) {
+  _walkedFrames = walkedFrames;
+}
+#endif
+
 #ifdef _AIX
 template <typename A, typename R>
 uintptr_t UnwindCursor<A, R>::getDataRelBase() {
@@ -1658,11 +1734,11 @@ bool UnwindCursor<A, R>::getInfoFromEHABISection(
 template <typename A, typename R>
 bool UnwindCursor<A, R>::getInfoFromFdeCie(
     const typename CFI_Parser<A>::FDE_Info &fdeInfo,
-    const typename CFI_Parser<A>::CIE_Info &cieInfo, pint_t pc,
-    uintptr_t dso_base) {
+    const typename CFI_Parser<A>::CIE_Info &cieInfo,
+    typename R::link_hardened_reg_arg_t pc, uintptr_t dso_base) {
   typename CFI_Parser<A>::PrologInfo prolog;
-  if (CFI_Parser<A>::parseFDEInstructions(_addressSpace, fdeInfo, cieInfo, pc,
-                                          R::getArch(), &prolog)) {
+  if (CFI_Parser<A>::template parseFDEInstructions<R>(
+          _addressSpace, fdeInfo, cieInfo, pc, R::getArch(), &prolog)) {
     // Save off parsed FDE info
     _info.start_ip          = fdeInfo.pcStart;
     _info.end_ip            = fdeInfo.pcEnd;
@@ -1682,43 +1758,42 @@ bool UnwindCursor<A, R>::getInfoFromFdeCie(
 }
 
 template <typename A, typename R>
-bool UnwindCursor<A, R>::getInfoFromDwarfSection(pint_t pc,
-                                                const UnwindInfoSections &sects,
-                                                uint32_t fdeSectionOffsetHint) {
+bool UnwindCursor<A, R>::getInfoFromDwarfSection(
+    typename R::link_hardened_reg_arg_t pc, const UnwindInfoSections &sects,
+    uint32_t fdeSectionOffsetHint) {
   typename CFI_Parser<A>::FDE_Info fdeInfo;
   typename CFI_Parser<A>::CIE_Info cieInfo;
   bool foundFDE = false;
   bool foundInCache = false;
   // If compact encoding table gave offset into dwarf section, go directly there
   if (fdeSectionOffsetHint != 0) {
-    foundFDE = CFI_Parser<A>::findFDE(_addressSpace, pc, sects.dwarf_section,
-                                    sects.dwarf_section_length,
-                                    sects.dwarf_section + fdeSectionOffsetHint,
-                                    &fdeInfo, &cieInfo);
+    foundFDE = CFI_Parser<A>::template findFDE<R>(
+        _addressSpace, pc, sects.dwarf_section, sects.dwarf_section_length,
+        sects.dwarf_section + fdeSectionOffsetHint, &fdeInfo, &cieInfo);
   }
 #if defined(_LIBUNWIND_SUPPORT_DWARF_INDEX)
   if (!foundFDE && (sects.dwarf_index_section != 0)) {
-    foundFDE = EHHeaderParser<A>::findFDE(
+    foundFDE = EHHeaderParser<A>::template findFDE<R>(
         _addressSpace, pc, sects.dwarf_index_section,
         (uint32_t)sects.dwarf_index_section_length, &fdeInfo, &cieInfo);
   }
 #endif
   if (!foundFDE) {
     // otherwise, search cache of previously found FDEs.
-    pint_t cachedFDE = DwarfFDECache<A>::findFDE(sects.dso_base, pc);
+    pint_t cachedFDE =
+        DwarfFDECache<A>::template findFDE<R>(sects.dso_base, pc);
     if (cachedFDE != 0) {
-      foundFDE =
-          CFI_Parser<A>::findFDE(_addressSpace, pc, sects.dwarf_section,
-                                 sects.dwarf_section_length,
-                                 cachedFDE, &fdeInfo, &cieInfo);
+      foundFDE = CFI_Parser<A>::template findFDE<R>(
+          _addressSpace, pc, sects.dwarf_section, sects.dwarf_section_length,
+          cachedFDE, &fdeInfo, &cieInfo);
       foundInCache = foundFDE;
     }
   }
   if (!foundFDE) {
     // Still not found, do full scan of __eh_frame section.
-    foundFDE = CFI_Parser<A>::findFDE(_addressSpace, pc, sects.dwarf_section,
-                                      sects.dwarf_section_length, 0,
-                                      &fdeInfo, &cieInfo);
+    foundFDE = CFI_Parser<A>::template findFDE<R>(
+        _addressSpace, pc, sects.dwarf_section, sects.dwarf_section_length, 0,
+        &fdeInfo, &cieInfo);
   }
   if (foundFDE) {
     if (getInfoFromFdeCie(fdeInfo, cieInfo, pc, sects.dso_base)) {
@@ -1742,8 +1817,8 @@ bool UnwindCursor<A, R>::getInfoFromDwarfSection(pint_t pc,
 
 #if defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND)
 template <typename A, typename R>
-bool UnwindCursor<A, R>::getInfoFromCompactEncodingSection(pint_t pc,
-                                              const UnwindInfoSections &sects) {
+bool UnwindCursor<A, R>::getInfoFromCompactEncodingSection(
+    typename R::link_hardened_reg_arg_t pc, const UnwindInfoSections &sects) {
   const bool log = false;
   if (log)
     fprintf(stderr, "getInfoFromCompactEncodingSection(pc=0x%llX, mh=0x%llX)\n",
@@ -1974,6 +2049,16 @@ bool UnwindCursor<A, R>::getInfoFromCompactEncodingSection(pint_t pc,
         personalityIndex * sizeof(uint32_t));
     pint_t personalityPointer = sects.dso_base + (pint_t)personalityDelta;
     personality = _addressSpace.getP(personalityPointer);
+#if defined(_LIBUNWIND_TARGET_AARCH64_AUTHENTICATED_UNWINDING)
+    // The GOT for the personality function was signed address authenticated.
+    // Resign it as a regular function pointer.
+    const auto discriminator = ptrauth_blend_discriminator(
+        &_info.handler, __ptrauth_unwind_upi_handler_disc);
+    void *signedPtr = ptrauth_auth_and_resign(
+        (void *)personality, ptrauth_key_function_pointer, personalityPointer,
+        ptrauth_key_function_pointer, discriminator);
+    personality = (__typeof(personality))signedPtr;
+#endif
     if (log)
       fprintf(stderr, "getInfoFromCompactEncodingSection(pc=0x%llX), "
                       "personalityDelta=0x%08X, personality=0x%08llX\n",
@@ -1987,7 +2072,11 @@ bool UnwindCursor<A, R>::getInfoFromCompactEncodingSection(pint_t pc,
   _info.start_ip = funcStart;
   _info.end_ip = funcEnd;
   _info.lsda = lsda;
-  _info.handler = personality;
+  // We use memmove to copy the personality function as we have already manually
+  // re-signed the pointer, and assigning directly will attempt to incorrectly
+  // sign the already signed value.
+  memmove(reinterpret_cast<void *>(&_info.handler),
+          reinterpret_cast<void *>(&personality), sizeof(personality));
   _info.gp = 0;
   _info.flags = 0;
   _info.format = encoding;
@@ -2640,11 +2729,19 @@ void UnwindCursor<A, R>::setInfoBasedOnIPRegister(bool isReturnAddress) {
   _isSigReturn = false;
 #endif
 
-  pint_t pc = static_cast<pint_t>(this->getReg(UNW_REG_IP));
+  typename R::reg_t rawPC = this->getReg(UNW_REG_IP);
+
 #if defined(_LIBUNWIND_ARM_EHABI)
   // Remove the thumb bit so the IP represents the actual instruction address.
   // This matches the behaviour of _Unwind_GetIP on arm.
-  pc &= (pint_t)~0x1;
+  rawPC &= (pint_t)~0x1;
+#endif
+
+  typename R::link_reg_t pc;
+#if defined(_LIBUNWIND_TARGET_AARCH64_AUTHENTICATED_UNWINDING)
+  _registers.loadAndAuthenticateLinkRegister(rawPC, &pc);
+#else
+  pc = rawPC;
 #endif
 
   // Exit early if at the top of the stack.
@@ -2679,7 +2776,7 @@ void UnwindCursor<A, R>::setInfoBasedOnIPRegister(bool isReturnAddress) {
 
   // Ask address space object to find unwind sections for this pc.
   UnwindInfoSections sects;
-  if (_addressSpace.findUnwindSections(pc, sects)) {
+  if (_addressSpace.template findUnwindSections<R>(pc, sects)) {
 #if defined(_LIBUNWIND_SUPPORT_COMPACT_UNWIND)
     // If there is a compact unwind encoding table, look there first.
     if (sects.compact_unwind_section != 0) {
@@ -2735,8 +2832,8 @@ void UnwindCursor<A, R>::setInfoBasedOnIPRegister(bool isReturnAddress) {
 #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
   // There is no static unwind info for this pc. Look to see if an FDE was
   // dynamically registered for it.
-  pint_t cachedFDE = DwarfFDECache<A>::findFDE(DwarfFDECache<A>::kSearchAll,
-                                               pc);
+  pint_t cachedFDE =
+      DwarfFDECache<A>::template findFDE<R>(DwarfFDECache<A>::kSearchAll, pc);
   if (cachedFDE != 0) {
     typename CFI_Parser<A>::FDE_Info fdeInfo;
     typename CFI_Parser<A>::CIE_Info cieInfo;
@@ -2748,7 +2845,7 @@ void UnwindCursor<A, R>::setInfoBasedOnIPRegister(bool isReturnAddress) {
   // Lastly, ask AddressSpace object about platform specific ways to locate
   // other FDEs.
   pint_t fde;
-  if (_addressSpace.findOtherFDE(pc, fde)) {
+  if (_addressSpace.template findOtherFDE<R>(pc, fde)) {
     typename CFI_Parser<A>::FDE_Info fdeInfo;
     typename CFI_Parser<A>::CIE_Info cieInfo;
     if (!CFI_Parser<A>::decodeFDE(_addressSpace, fde, &fdeInfo, &cieInfo)) {
@@ -2772,6 +2869,21 @@ void UnwindCursor<A, R>::setInfoBasedOnIPRegister(bool isReturnAddress) {
 
 #if defined(_LIBUNWIND_CHECK_LINUX_SIGRETURN) &&                               \
     defined(_LIBUNWIND_TARGET_AARCH64)
+
+/*
+ * The linux sigreturn restorer stub will always have the form:
+ *
+ *  d2801168        movz    x8, #0x8b
+ *  d4000001        svc     #0x0
+ */
+#if defined(__AARCH64EB__)
+#define MOVZ_X8_8B 0x681180d2
+#define SVC_0 0x010000d4
+#else
+#define MOVZ_X8_8B 0xd2801168
+#define SVC_0 0xd4000001
+#endif
+
 template <typename A, typename R>
 bool UnwindCursor<A, R>::setInfoForSigReturn(Registers_arm64 &) {
   // Look for the sigreturn trampoline. The trampoline's body is two
@@ -2796,7 +2908,7 @@ bool UnwindCursor<A, R>::setInfoForSigReturn(Registers_arm64 &) {
     return false;
   auto *instructions = reinterpret_cast<const uint32_t *>(pc);
   // Look for instructions: mov x8, #0x8b; svc #0x0
-  if (instructions[0] != 0xd2801168 || instructions[1] != 0xd4000001)
+  if (instructions[0] != MOVZ_X8_8B || instructions[1] != SVC_0)
     return false;
 
   _info = {};
@@ -3188,16 +3300,22 @@ template <typename A, typename R> int UnwindCursor<A, R>::step(bool stage2) {
 template <typename A, typename R>
 void UnwindCursor<A, R>::getInfo(unw_proc_info_t *info) {
   if (_unwindInfoMissing)
-    memset(info, 0, sizeof(*info));
+    memset(static_cast<void *>(info), 0, sizeof(*info));
   else
     *info = _info;
 }
 
 template <typename A, typename R>
 bool UnwindCursor<A, R>::getFunctionName(char *buf, size_t bufLen,
-                                                           unw_word_t *offset) {
-  return _addressSpace.findFunctionName((pint_t)this->getReg(UNW_REG_IP),
-                                         buf, bufLen, offset);
+                                         unw_word_t *offset) {
+#if defined(_LIBUNWIND_TARGET_AARCH64_AUTHENTICATED_UNWINDING)
+  typename R::reg_t rawPC = this->getReg(UNW_REG_IP);
+  typename R::link_reg_t pc;
+  _registers.loadAndAuthenticateLinkRegister(rawPC, &pc);
+#else
+  typename R::link_reg_t pc = this->getReg(UNW_REG_IP);
+#endif
+  return _addressSpace.template findFunctionName<R>(pc, buf, bufLen, offset);
 }
 
 #if defined(_LIBUNWIND_CHECK_LINUX_SIGRETURN)
diff --git a/lib/libunwind/src/UnwindLevel1.c b/lib/libunwind/src/UnwindLevel1.c
index f3b451ad9b..7368b3cb80 100644
--- a/lib/libunwind/src/UnwindLevel1.c
+++ b/lib/libunwind/src/UnwindLevel1.c
@@ -48,16 +48,15 @@
 // avoided when invoking the `jumpto()` function. To do this, we use inline
 // assemblies to "goto" the `jumpto()` for these architectures.
 #if !defined(_LIBUNWIND_USE_CET) && !defined(_LIBUNWIND_USE_GCS)
-#define __unw_phase2_resume(cursor, fn)                                        \
+#define __unw_phase2_resume(cursor, payload)                                   \
   do {                                                                         \
-    (void)fn;                                                                  \
-    __unw_resume((cursor));                                                    \
+    __unw_resume_with_frames_walked((cursor), (payload));                      \
   } while (0)
 #elif defined(_LIBUNWIND_TARGET_I386)
 #define __shstk_step_size (4)
-#define __unw_phase2_resume(cursor, fn)                                        \
+#define __unw_phase2_resume(cursor, payload)                                   \
   do {                                                                         \
-    _LIBUNWIND_POP_SHSTK_SSP((fn));                                            \
+    _LIBUNWIND_POP_SHSTK_SSP((payload));                                       \
     void *shstkRegContext = __libunwind_shstk_get_registers((cursor));         \
     void *shstkJumpAddress = __libunwind_shstk_get_jump_target();              \
     __asm__ volatile("push %%edi\n\t"                                          \
@@ -67,9 +66,9 @@
   } while (0)
 #elif defined(_LIBUNWIND_TARGET_X86_64)
 #define __shstk_step_size (8)
-#define __unw_phase2_resume(cursor, fn)                                        \
+#define __unw_phase2_resume(cursor, payload)                                   \
   do {                                                                         \
-    _LIBUNWIND_POP_SHSTK_SSP((fn));                                            \
+    _LIBUNWIND_POP_SHSTK_SSP((payload));                                       \
     void *shstkRegContext = __libunwind_shstk_get_registers((cursor));         \
     void *shstkJumpAddress = __libunwind_shstk_get_jump_target();              \
     __asm__ volatile("jmpq *%%rdx\n\t" ::"D"(shstkRegContext),                 \
@@ -77,19 +76,37 @@
   } while (0)
 #elif defined(_LIBUNWIND_TARGET_AARCH64)
 #define __shstk_step_size (8)
-#define __unw_phase2_resume(cursor, fn)                                        \
+#define __unw_phase2_resume(cursor, payload)                                   \
   do {                                                                         \
-    _LIBUNWIND_POP_SHSTK_SSP((fn));                                            \
+    _LIBUNWIND_POP_SHSTK_SSP((payload));                                       \
     void *shstkRegContext = __libunwind_shstk_get_registers((cursor));         \
     void *shstkJumpAddress = __libunwind_shstk_get_jump_target();              \
     __asm__ volatile("mov x0, %0\n\t"                                          \
+                     "mov x1, #0\n\t"                                         \
                      "br %1\n\t"                                               \
                      :                                                         \
                      : "r"(shstkRegContext), "r"(shstkJumpAddress)             \
-                     : "x0");                                                  \
+                     : "x0", "x1");                                            \
   } while (0)
 #endif
 
+// We need this helper function as the semantics of casting between integers and
+// function pointers mean that we end up with a function pointer without the
+// correct signature. Instead we assign to an integer with a matching schema,
+// and then memmove the result into a variable of the correct type. This memmove
+// is possible as `_Unwind_Personality_Fn` is a standard function pointer, and
+// as such is not address diversified.
+static _Unwind_Personality_Fn get_handler_function(unw_proc_info_t *frameInfo) {
+  uintptr_t __unwind_ptrauth_restricted_intptr(ptrauth_key_function_pointer,
+                                               0,
+                                               ptrauth_function_pointer_type_discriminator(_Unwind_Personality_Fn))
+    reauthenticatedIntegerHandler = frameInfo->handler;
+  _Unwind_Personality_Fn handler;
+  memmove(&handler, (void *)&reauthenticatedIntegerHandler,
+          sizeof(_Unwind_Personality_Fn));
+  return handler;
+}
+
 static _Unwind_Reason_Code
 unwind_phase1(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *exception_object) {
   __unw_init_local(cursor, uc);
@@ -147,8 +164,7 @@ unwind_phase1(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *except
     // If there is a personality routine, ask it if it will want to stop at
     // this frame.
     if (frameInfo.handler != 0) {
-      _Unwind_Personality_Fn p =
-          (_Unwind_Personality_Fn)(uintptr_t)(frameInfo.handler);
+      _Unwind_Personality_Fn p = get_handler_function(&frameInfo);
       _LIBUNWIND_TRACE_UNWINDING(
           "unwind_phase1(ex_obj=%p): calling personality function %p",
           (void *)exception_object, (void *)(uintptr_t)p);
@@ -184,11 +200,12 @@ unwind_phase1(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *except
   }
   return _URC_NO_REASON;
 }
-extern int __unw_step_stage2(unw_cursor_t *);
 
 #if defined(_LIBUNWIND_USE_GCS)
 // Enable the GCS target feature to permit gcspop instructions to be used.
 __attribute__((target("+gcs")))
+#else
+_LIBUNWIND_TRACE_NO_INLINE
 #endif
 static _Unwind_Reason_Code
 unwind_phase2(unw_context_t *uc, unw_cursor_t *cursor,
@@ -276,8 +293,7 @@ unwind_phase2(unw_context_t *uc, unw_cursor_t *cursor,
     ++framesWalked;
     // If there is a personality routine, tell it we are unwinding.
     if (frameInfo.handler != 0) {
-      _Unwind_Personality_Fn p =
-          (_Unwind_Personality_Fn)(uintptr_t)(frameInfo.handler);
+      _Unwind_Personality_Fn p = get_handler_function(&frameInfo);
       _Unwind_Action action = _UA_CLEANUP_PHASE;
       if (sp == exception_object->private_2) {
         // Tell personality this was the frame it marked in phase 1.
@@ -334,6 +350,8 @@ unwind_phase2(unw_context_t *uc, unw_cursor_t *cursor,
 #if defined(_LIBUNWIND_USE_GCS)
 // Enable the GCS target feature to permit gcspop instructions to be used.
 __attribute__((target("+gcs")))
+#else
+_LIBUNWIND_TRACE_NO_INLINE
 #endif
 static _Unwind_Reason_Code
 unwind_phase2_forced(unw_context_t *uc, unw_cursor_t *cursor,
@@ -394,8 +412,7 @@ unwind_phase2_forced(unw_context_t *uc, unw_cursor_t *cursor,
     ++framesWalked;
     // If there is a personality routine, tell it we are unwinding.
     if (frameInfo.handler != 0) {
-      _Unwind_Personality_Fn p =
-          (_Unwind_Personality_Fn)(intptr_t)(frameInfo.handler);
+      _Unwind_Personality_Fn p = get_handler_function(&frameInfo);
       _LIBUNWIND_TRACE_UNWINDING(
           "unwind_phase2_forced(ex_obj=%p): calling personality function %p",
           (void *)exception_object, (void *)(uintptr_t)p);
@@ -597,6 +614,18 @@ _LIBUNWIND_EXPORT uintptr_t _Unwind_GetIP(struct _Unwind_Context *context) {
   unw_cursor_t *cursor = (unw_cursor_t *)context;
   unw_word_t result;
   __unw_get_reg(cursor, UNW_REG_IP, &result);
+
+#if defined(_LIBUNWIND_TARGET_AARCH64_AUTHENTICATED_UNWINDING)
+  // If we are in an arm64e frame, then the PC should have been signed with the
+  // sp
+  {
+    unw_word_t sp;
+    __unw_get_reg(cursor, UNW_REG_SP, &sp);
+    result = (unw_word_t)ptrauth_auth_data((void *)result,
+                                           ptrauth_key_return_address, sp);
+  }
+#endif
+
   _LIBUNWIND_TRACE_API("_Unwind_GetIP(context=%p) => 0x%" PRIxPTR,
                        (void *)context, result);
   return (uintptr_t)result;
diff --git a/lib/libunwind/src/UnwindRegistersRestore.S b/lib/libunwind/src/UnwindRegistersRestore.S
index 1bcd205be2..76a8034403 100644
--- a/lib/libunwind/src/UnwindRegistersRestore.S
+++ b/lib/libunwind/src/UnwindRegistersRestore.S
@@ -6,6 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+#if !defined(__wasm__)
+
 #include "assembly.h"
 
 #define FROM_0_TO_15 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
@@ -16,13 +18,17 @@
 
 #if defined(_AIX)
   .toc
+#elif defined(__aarch64__) && defined(__ELF__) && defined(_LIBUNWIND_EXECUTE_ONLY_CODE)
+  .section .text,"axy",@progbits,unique,0
 #else
   .text
 #endif
 
-#if !defined(__USING_SJLJ_EXCEPTIONS__) && !defined(__wasm__)
+#if !defined(__USING_SJLJ_EXCEPTIONS__)
 
 #if defined(__i386__)
+.att_syntax
+
 DEFINE_LIBUNWIND_FUNCTION(__libunwind_Registers_x86_jumpto)
 #
 # extern "C" void __libunwind_Registers_x86_jumpto(Registers_x86 *);
@@ -67,6 +73,7 @@ DEFINE_LIBUNWIND_FUNCTION(__libunwind_Registers_x86_jumpto)
   # skip gs
 
 #elif defined(__x86_64__) && !defined(__arm64ec__)
+.att_syntax
 
 DEFINE_LIBUNWIND_FUNCTION(__libunwind_Registers_x86_64_jumpto)
 #
@@ -629,18 +636,35 @@ Lnovec:
 
 #elif defined(__aarch64__)
 
+#ifndef __has_feature
+#define __has_feature(__feature) 0
+#endif
+
 #if defined(__ARM_FEATURE_GCS_DEFAULT)
 .arch_extension gcs
 #endif
 
 //
-// extern "C" void __libunwind_Registers_arm64_jumpto(Registers_arm64 *);
+// extern "C" void __libunwind_Registers_arm64_jumpto(Registers_arm64 *, unsigned);
 //
 // On entry:
 //  thread_state pointer is in x0
+//  walked_frames counter is in x1
 //
   .p2align 2
 DEFINE_LIBUNWIND_FUNCTION(__libunwind_Registers_arm64_jumpto)
+
+  #if defined(_LIBUNWIND_TRACE_RET_INJECT)
+    cbz     w1, 1f
+  0:
+    subs    w1, w1, #1
+    adr     x16, #8
+    ret     x16
+
+    b.ne    0b
+  1:
+  #endif
+
   // skip restore of x0,x1 for now
   ldp    x2, x3,  [x0, #0x010]
   ldp    x4, x5,  [x0, #0x020]
@@ -657,7 +681,7 @@ DEFINE_LIBUNWIND_FUNCTION(__libunwind_Registers_arm64_jumpto)
   ldp    x24,x25, [x0, #0x0C0]
   ldp    x26,x27, [x0, #0x0D0]
   ldp    x28,x29, [x0, #0x0E0]
-  ldr    x30,     [x0, #0x100]  // restore pc into lr
+
 #if defined(__ARM_FP) && __ARM_FP != 0
   ldp    d0, d1,  [x0, #0x110]
   ldp    d2, d3,  [x0, #0x120]
@@ -681,7 +705,18 @@ DEFINE_LIBUNWIND_FUNCTION(__libunwind_Registers_arm64_jumpto)
   // context struct, because it is allocated on the stack, and an exception
   // could clobber the de-allocated portion of the stack after sp has been
   // restored.
-  ldr    x16,     [x0, #0x0F8]
+
+  ldr    x16,     [x0, #0x0F8]  // load sp into scratch
+  ldr    lr,      [x0, #0x100]  // restore pc into lr
+
+#if __has_feature(ptrauth_calls)
+  // The LR is signed with its address inside the register state.  Time
+  // to resign to be a regular ROP protected signed pointer
+  add    x1, x0, #0x100
+  autib  lr, x1
+  pacib  lr, x16  // signed the scratch register for sp
+#endif
+
   ldp    x0, x1,  [x0, #0x000]  // restore x0,x1
   mov    sp,x16                 // restore sp
 #if defined(__ARM_FEATURE_GCS_DEFAULT)
@@ -694,7 +729,12 @@ DEFINE_LIBUNWIND_FUNCTION(__libunwind_Registers_arm64_jumpto)
   gcspushm x30
 Lnogcs:
 #endif
+
+#if __has_feature(ptrauth_calls)
+  retab
+#else
   ret    x30                    // jump to pc
+#endif
 
 #elif defined(__arm__) && !defined(__APPLE__)
 
@@ -1253,7 +1293,8 @@ DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind19Registers_loongarch6jumptoEv)
 
 #endif
 
-#endif /* !defined(__USING_SJLJ_EXCEPTIONS__) && !defined(__wasm__) */
+#endif /* !defined(__USING_SJLJ_EXCEPTIONS__) */
 
 NO_EXEC_STACK_DIRECTIVE
 
+#endif /* !defined(__wasm__) */
diff --git a/lib/libunwind/src/UnwindRegistersSave.S b/lib/libunwind/src/UnwindRegistersSave.S
index 5139a551ad..f988fd461d 100644
--- a/lib/libunwind/src/UnwindRegistersSave.S
+++ b/lib/libunwind/src/UnwindRegistersSave.S
@@ -6,6 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+#if !defined(__wasm__)
+
 #include "assembly.h"
 
 #define FROM_0_TO_15 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
@@ -16,13 +18,16 @@
 
 #if defined(_AIX)
     .toc
+#elif defined(__aarch64__) && defined(__ELF__) && defined(_LIBUNWIND_EXECUTE_ONLY_CODE)
+    .section .text,"axy",@progbits,unique,0
 #else
     .text
 #endif
 
-#if !defined(__USING_SJLJ_EXCEPTIONS__) && !defined(__wasm__)
+#if !defined(__USING_SJLJ_EXCEPTIONS__)
 
 #if defined(__i386__)
+.att_syntax
 
 #
 # extern int __unw_getcontext(unw_context_t* thread_state)
@@ -107,6 +112,7 @@ DEFINE_LIBUNWIND_FUNCTION("#__unw_getcontext")
   .text
 
 #elif defined(__x86_64__)
+.att_syntax
 
 #
 # extern int __unw_getcontext(unw_context_t* thread_state)
@@ -759,6 +765,10 @@ LnoR2Fix:
 
 #elif defined(__aarch64__)
 
+#ifndef __has_feature
+#define __has_feature(__feature) 0
+#endif
+
 //
 // extern int __unw_getcontext(unw_context_t* thread_state)
 //
@@ -767,6 +777,11 @@ LnoR2Fix:
 //
   .p2align 2
 DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
+
+#if __has_feature(ptrauth_calls)
+  pacibsp
+#endif
+
   stp    x0, x1,  [x0, #0x000]
   stp    x2, x3,  [x0, #0x010]
   stp    x4, x5,  [x0, #0x020]
@@ -807,7 +822,74 @@ DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
   str    d31,     [x0, #0x208]
 #endif
   mov    x0, #0                   // return UNW_ESUCCESS
+
+#if __has_feature(ptrauth_calls)
+  retab
+#else
   ret
+#endif
+
+//
+// extern "C" int64_t __libunwind_Registers_arm64_za_disable()
+//
+//   This function implements the requirements of the __arm_za_disable ABI
+//   routine, except that it will not abort; it will return a non-zero value
+//   to signify the routine failed.
+//
+//   Note: This function uses SME instructions. It must only be called if SME
+//   has been confirmed to be available.
+//
+// On return:
+//
+//   A status is placed in x0. A zero value indicates success; any non-zero
+//   value indicates failure.
+//
+  .p2align 2
+DEFINE_LIBUNWIND_FUNCTION(__libunwind_Registers_arm64_za_disable)
+  .variant_pcs __libunwind_Registers_arm64_za_disable
+#if __has_feature(ptrauth_calls)
+  pacibsp
+#endif
+  // If TPIDR2_EL0 is null, the subroutine just disables ZA.
+  .inst 0xd53bd0b0 // mrs x16, TPIDR2_EL0
+  cbz x16, 1f
+
+  // If any of the reserved bytes in the first 16 bytes of the TPIDR2 block are
+  // nonzero, return a non-zero value (libunwind will then abort).
+  ldrh w0, [x16, #10]
+  cbnz w0, 2f
+  ldr w0, [x16, #12]
+  cbnz w0, 2f
+
+  // If num_za_save_slices is zero, the subroutine just disables ZA.
+  ldrh w0, [x16, #8]
+  cbz x0, 1f
+
+  // If za_save_buffer is NULL, the subroutine just disables ZA.
+  ldr x16, [x16]
+  cbz x16, 1f
+
+  // Store ZA to za_save_buffer.
+  mov x15, xzr
+0:
+  .inst 0xe1206200 // str za[w15,0], [x16]
+  .inst 0x04305830 // addsvl x16, x16, #1
+  add x15, x15, #1
+  cmp x0, x15
+  b.ne 0b
+1:
+  // * Set TPIDR2_EL0 to null.
+  .inst 0xd51bd0bf // msr TPIDR2_EL0, xzr
+  // * Set PSTATE.ZA to 0.
+  .inst 0xd503447f // smstop za
+  // * Return zero (success)
+  mov x0, xzr
+2:
+#if __has_feature(ptrauth_calls)
+  retab
+#else
+  ret
+#endif
 
 #elif defined(__arm__) && !defined(__APPLE__)
 
@@ -1232,6 +1314,8 @@ DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
   WEAK_ALIAS(__unw_getcontext, unw_getcontext)
 #endif
 
-#endif /* !defined(__USING_SJLJ_EXCEPTIONS__) && !defined(__wasm__) */
+#endif /* !defined(__USING_SJLJ_EXCEPTIONS__) */
 
 NO_EXEC_STACK_DIRECTIVE
+
+#endif /* !defined(__wasm__) */
diff --git a/lib/libunwind/src/assembly.h b/lib/libunwind/src/assembly.h
index f8e83e138e..84c9d526f1 100644
--- a/lib/libunwind/src/assembly.h
+++ b/lib/libunwind/src/assembly.h
@@ -15,7 +15,7 @@
 #ifndef UNWIND_ASSEMBLY_H
 #define UNWIND_ASSEMBLY_H
 
-#if defined(__linux__) && defined(__CET__)
+#if defined(__CET__)
 #include <cet.h>
 #define _LIBUNWIND_CET_ENDBR _CET_ENDBR
 #else
@@ -132,6 +132,10 @@
 
 #if defined(__APPLE__)
 
+#if defined(__aarch64__) || defined(__arm64__) || defined(__arm64e__)
+#define _LIBUNWIND_TRACE_RET_INJECT 1
+#endif
+
 #define SYMBOL_IS_FUNC(name)
 #define HIDDEN_SYMBOL(name) .private_extern name
 #if defined(_LIBUNWIND_HIDE_SYMBOLS)
diff --git a/lib/libunwind/src/config.h b/lib/libunwind/src/config.h
index deb5a4d4d7..f017403fa2 100644
--- a/lib/libunwind/src/config.h
+++ b/lib/libunwind/src/config.h
@@ -28,6 +28,9 @@
     #define _LIBUNWIND_SUPPORT_COMPACT_UNWIND 1
     #define _LIBUNWIND_SUPPORT_DWARF_UNWIND 1
   #endif
+  #if defined(__aarch64__) || defined(__arm64__) || defined(__arm64e__)
+    #define _LIBUNWIND_TRACE_RET_INJECT 1
+  #endif
 #elif defined(_WIN32)
   #ifdef __SEH__
     #define _LIBUNWIND_SUPPORT_SEH_UNWIND 1
@@ -61,6 +64,12 @@
   #endif
 #endif
 
+#ifdef _LIBUNWIND_TRACE_RET_INJECT
+#define _LIBUNWIND_TRACE_NO_INLINE __attribute__((noinline, disable_tail_calls))
+#else
+#define _LIBUNWIND_TRACE_NO_INLINE
+#endif
+
 #if defined(_LIBUNWIND_HIDE_SYMBOLS)
   // The CMake file passes -fvisibility=hidden to control ELF/Mach-O visibility.
   #define _LIBUNWIND_EXPORT
diff --git a/lib/libunwind/src/gcc_personality_v0.c b/lib/libunwind/src/gcc_personality_v0.c
index 18f5a9cf89..63003e3b31 100644
--- a/lib/libunwind/src/gcc_personality_v0.c
+++ b/lib/libunwind/src/gcc_personality_v0.c
@@ -31,6 +31,58 @@ EXCEPTION_DISPOSITION _GCC_specific_handler(PEXCEPTION_RECORD, void *, PCONTEXT,
                                             _Unwind_Personality_Fn);
 #endif
 
+#ifndef __has_feature
+#define __has_feature(__feature) 0
+#endif
+
+#if __has_feature(ptrauth_calls)
+#include <ptrauth.h>
+
+// `__ptrauth_restricted_intptr` is a feature of apple clang that predates
+// support for direct application of `__ptrauth` to integer types. This
+// guard is necessary to support compilation with those compiler.
+#if __has_feature(ptrauth_restricted_intptr_qualifier)
+#define __ptrauth_gcc_personality_intptr(key, addressDiscriminated,            \
+                                         discriminator)                        \
+  __ptrauth_restricted_intptr(key, addressDiscriminated, discriminator)
+#else
+#define __ptrauth_gcc_personality_intptr(key, addressDiscriminated,            \
+                                         discriminator)                        \
+  __ptrauth(key, addressDiscriminated, discriminator)
+#endif
+#else
+#define __ptrauth_gcc_personality_intptr(...)
+#endif
+
+#define __ptrauth_gcc_personality_func_key ptrauth_key_function_pointer
+
+// ptrauth_string_discriminator("__gcc_personality_v0'funcStart") == 0xDFEB
+#define __ptrauth_gcc_personality_func_start                                   \
+  __ptrauth_gcc_personality_intptr(__ptrauth_gcc_personality_func_key, 1,      \
+                                   0xDFEB)
+
+// ptrauth_string_discriminator("__gcc_personality_v0'start") == 0x52DC
+#define __ptrauth_gcc_personality_start                                        \
+  __ptrauth_gcc_personality_intptr(__ptrauth_gcc_personality_func_key, 1,      \
+                                   0x52DC)
+
+// ptrauth_string_discriminator("__gcc_personality_v0'length") == 0xFFF7
+#define __ptrauth_gcc_personality_length                                       \
+  __ptrauth_gcc_personality_intptr(__ptrauth_gcc_personality_func_key, 1,      \
+                                   0xFFF7)
+
+// ptrauth_string_discriminator("__gcc_personality_v0'landingPadOffset") ==
+// 0x6498
+#define __ptrauth_gcc_personality_lpoffset                                     \
+  __ptrauth_gcc_personality_intptr(__ptrauth_gcc_personality_func_key, 1,      \
+                                   0x6498)
+
+// ptrauth_string_discriminator("__gcc_personality_v0'landingPad") == 0xA134
+#define __ptrauth_gcc_personality_lpad_disc 0xA134
+#define __ptrauth_gcc_personality_lpad                                         \
+  __ptrauth_gcc_personality_intptr(__ptrauth_gcc_personality_func_key, 1,      \
+                                   __ptrauth_gcc_personality_lpad_disc)
+
 // Pointer encodings documented at:
 //   http://refspecs.freestandards.org/LSB_1.3.0/gLSB/gLSB/ehframehdr.html
 
@@ -206,7 +258,8 @@ COMPILER_RT_ABI _Unwind_Reason_Code __gcc_personality_v0(
     return continueUnwind(exceptionObject, context);
 
   uintptr_t pc = (uintptr_t)_Unwind_GetIP(context) - 1;
-  uintptr_t funcStart = (uintptr_t)_Unwind_GetRegionStart(context);
+  uintptr_t __ptrauth_gcc_personality_func_start funcStart =
+      (uintptr_t)_Unwind_GetRegionStart(context);
   uintptr_t pcOffset = pc - funcStart;
 
   // Parse LSDA header.
@@ -225,11 +278,14 @@ COMPILER_RT_ABI _Unwind_Reason_Code __gcc_personality_v0(
   const uint8_t *callSiteTableEnd = callSiteTableStart + callSiteTableLength;
   const uint8_t *p = callSiteTableStart;
   while (p < callSiteTableEnd) {
-    uintptr_t start = readEncodedPointer(&p, callSiteEncoding);
-    size_t length = readEncodedPointer(&p, callSiteEncoding);
-    size_t landingPad = readEncodedPointer(&p, callSiteEncoding);
+    uintptr_t __ptrauth_gcc_personality_start start =
+        readEncodedPointer(&p, callSiteEncoding);
+    size_t __ptrauth_gcc_personality_length length =
+        readEncodedPointer(&p, callSiteEncoding);
+    size_t __ptrauth_gcc_personality_lpoffset landingPadOffset =
+        readEncodedPointer(&p, callSiteEncoding);
     readULEB128(&p); // action value not used for C code
-    if (landingPad == 0)
+    if (landingPadOffset == 0)
       continue; // no landing pad for this entry
     if ((start <= pcOffset) && (pcOffset < (start + length))) {
       // Found landing pad for the PC.
@@ -239,7 +295,24 @@ COMPILER_RT_ABI _Unwind_Reason_Code __gcc_personality_v0(
       _Unwind_SetGR(context, __builtin_eh_return_data_regno(0),
                     (uintptr_t)exceptionObject);
       _Unwind_SetGR(context, __builtin_eh_return_data_regno(1), 0);
-      _Unwind_SetIP(context, (funcStart + landingPad));
+      size_t __ptrauth_gcc_personality_lpad landingPad =
+          funcStart + landingPadOffset;
+#if __has_feature(ptrauth_calls)
+      uintptr_t stackPointer = _Unwind_GetGR(context, -2);
+      const uintptr_t existingDiscriminator = ptrauth_blend_discriminator(
+          &landingPad, __ptrauth_gcc_personality_lpad_disc);
+      // newIP is authenticated as if it were qualified with a pseudo qualifier
+      // along the lines of:
+      //   __ptrauth(ptrauth_key_return_address, <stackPointer>, 0)
+      // where the stack pointer is used in place of the strict storage
+      // address.
+      uintptr_t newIP = (uintptr_t)ptrauth_auth_and_resign(
+          *(void **)&landingPad, __ptrauth_gcc_personality_func_key,
+          existingDiscriminator, ptrauth_key_return_address, stackPointer);
+      _Unwind_SetIP(context, newIP);
+#else
+      _Unwind_SetIP(context, landingPad);
+#endif
       return _URC_INSTALL_CONTEXT;
     }
   }
diff --git a/lib/libunwind/src/libunwind.cpp b/lib/libunwind/src/libunwind.cpp
index cf39ec5f7d..7ffffc2a30 100644
--- a/lib/libunwind/src/libunwind.cpp
+++ b/lib/libunwind/src/libunwind.cpp
@@ -118,14 +118,55 @@ _LIBUNWIND_HIDDEN int __unw_set_reg(unw_cursor_t *cursor, unw_regnum_t regNum,
   typedef LocalAddressSpace::pint_t pint_t;
   AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
   if (co->validReg(regNum)) {
-    co->setReg(regNum, (pint_t)value);
     // special case altering IP to re-find info (being called by personality
     // function)
     if (regNum == UNW_REG_IP) {
       unw_proc_info_t info;
       // First, get the FDE for the old location and then update it.
       co->getInfo(&info);
-      co->setInfoBasedOnIPRegister(false);
+
+      pint_t sp = (pint_t)co->getReg(UNW_REG_SP);
+
+#if defined(_LIBUNWIND_TARGET_AARCH64_AUTHENTICATED_UNWINDING)
+      {
+        // It is only valid to set the IP within the current function. This is
+        // important for ptrauth, otherwise the IP cannot be correctly signed.
+        // The current signature of `value` is via the schema:
+        //   __ptrauth(ptrauth_key_return_address, <<sp>>, 0)
+        // For this to be generally usable we manually re-sign it to the
+        // directly supported schema:
+        //   __ptrauth(ptrauth_key_return_address, 1, 0)
+        unw_word_t
+              __unwind_ptrauth_restricted_intptr(ptrauth_key_return_address, 1,
+                                                 0) authenticated_value;
+        unw_word_t opaque_value = (uint64_t)ptrauth_auth_and_resign(
+            (void *)value, ptrauth_key_return_address, sp,
+            ptrauth_key_return_address, &authenticated_value);
+        memmove(reinterpret_cast<void *>(&authenticated_value),
+                reinterpret_cast<void *>(&opaque_value),
+                sizeof(authenticated_value));
+        if (authenticated_value < info.start_ip ||
+            authenticated_value > info.end_ip)
+          _LIBUNWIND_ABORT("PC vs frame info mismatch");
+
+        // PC should have been signed with the sp, so we verify that
+        // roundtripping does not fail. The `ptrauth_auth_and_resign` is
+        // guaranteed to trap on authentication failure even without FPAC
+        // feature.
+        pint_t pc = (pint_t)co->getReg(UNW_REG_IP);
+        if (ptrauth_auth_and_resign((void *)pc, ptrauth_key_return_address, sp,
+                                    ptrauth_key_return_address,
+                                    sp) != (void *)pc) {
+          _LIBUNWIND_LOG(
+              "Bad unwind with PAuth-enabled ABI (0x%zX, 0x%zX)->0x%zX\n", pc,
+              sp,
+              (pint_t)ptrauth_auth_data((void *)pc, ptrauth_key_return_address,
+                                        sp));
+          _LIBUNWIND_ABORT("Bad unwind with PAuth-enabled ABI");
+        }
+      }
+#endif
+
       // If the original call expects stack adjustment, perform this now.
       // Normal frame unwinding would have included the offset already in the
       // CFA computation.
@@ -133,7 +174,11 @@ _LIBUNWIND_HIDDEN int __unw_set_reg(unw_cursor_t *cursor, unw_regnum_t regNum,
       // this should actually be - info.gp. LLVM doesn't currently support
       // any such platforms and Clang doesn't export a macro for them.
       if (info.gp)
-        co->setReg(UNW_REG_SP, co->getReg(UNW_REG_SP) + info.gp);
+        co->setReg(UNW_REG_SP, sp + info.gp);
+      co->setReg(UNW_REG_IP, value);
+      co->setInfoBasedOnIPRegister(false);
+    } else {
+      co->setReg(regNum, (pint_t)value);
     }
     return UNW_ESUCCESS;
   }
@@ -205,7 +250,27 @@ _LIBUNWIND_HIDDEN int __unw_get_proc_info(unw_cursor_t *cursor,
 }
 _LIBUNWIND_WEAK_ALIAS(__unw_get_proc_info, unw_get_proc_info)
 
-/// Resume execution at cursor position (aka longjump).
+/// Rebalance the execution flow by injecting the right amount of `ret`
+/// instruction relatively to the amount of `walkedFrames` then resume execution
+/// at cursor position (aka longjump).
+_LIBUNWIND_HIDDEN int __unw_resume_with_frames_walked(unw_cursor_t *cursor,
+                                                      unsigned walkedFrames) {
+  _LIBUNWIND_TRACE_API("__unw_resume(cursor=%p, walkedFrames=%u)",
+                       static_cast<void *>(cursor), walkedFrames);
+#if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__)
+  // Inform the ASan runtime that now might be a good time to clean stuff up.
+  __asan_handle_no_return();
+#endif
+#ifdef _LIBUNWIND_TRACE_RET_INJECT
+  AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
+  co->setWalkedFrames(walkedFrames);
+#endif
+  return __unw_resume(cursor);
+}
+_LIBUNWIND_WEAK_ALIAS(__unw_resume_with_frames_walked,
+                      unw_resume_with_frames_walked)
+
+/// Legacy function. Resume execution at cursor position (aka longjump).
 _LIBUNWIND_HIDDEN int __unw_resume(unw_cursor_t *cursor) {
   _LIBUNWIND_TRACE_API("__unw_resume(cursor=%p)", static_cast<void *>(cursor));
 #if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__)
@@ -347,6 +412,41 @@ void __unw_remove_dynamic_eh_frame_section(unw_word_t eh_frame_start) {
 }
 
 #endif // defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
+
+/// Maps the UNW_* error code to a textual representation
+_LIBUNWIND_HIDDEN const char *__unw_strerror(int error_code) {
+  switch (error_code) {
+  case UNW_ESUCCESS:
+    return "no error";
+  case UNW_EUNSPEC:
+    return "unspecified (general) error";
+  case UNW_ENOMEM:
+    return "out of memory";
+  case UNW_EBADREG:
+    return "bad register number";
+  case UNW_EREADONLYREG:
+    return "attempt to write read-only register";
+  case UNW_ESTOPUNWIND:
+    return "stop unwinding";
+  case UNW_EINVALIDIP:
+    return "invalid IP";
+  case UNW_EBADFRAME:
+    return "bad frame";
+  case UNW_EINVAL:
+    return "unsupported operation or bad value";
+  case UNW_EBADVERSION:
+    return "unwind info has unsupported version";
+  case UNW_ENOINFO:
+    return "no unwind info found";
+#if defined(_LIBUNWIND_TARGET_AARCH64) && !defined(_LIBUNWIND_IS_NATIVE_ONLY)
+  case UNW_ECROSSRASIGNING:
+    return "cross unwind with return address signing";
+#endif
+  }
+  return "invalid error code";
+}
+_LIBUNWIND_WEAK_ALIAS(__unw_strerror, unw_strerror)
+
 #endif // !defined(__USING_SJLJ_EXCEPTIONS__) && !defined(__wasm__)
 
 #ifdef __APPLE__
diff --git a/lib/libunwind/src/libunwind_ext.h b/lib/libunwind/src/libunwind_ext.h
index 28db43a4f6..b3762c24d7 100644
--- a/lib/libunwind/src/libunwind_ext.h
+++ b/lib/libunwind/src/libunwind_ext.h
@@ -26,11 +26,16 @@ extern "C" {
 extern int __unw_getcontext(unw_context_t *);
 extern int __unw_init_local(unw_cursor_t *, unw_context_t *);
 extern int __unw_step(unw_cursor_t *);
+extern int __unw_step_stage2(unw_cursor_t *);
 extern int __unw_get_reg(unw_cursor_t *, unw_regnum_t, unw_word_t *);
 extern int __unw_get_fpreg(unw_cursor_t *, unw_regnum_t, unw_fpreg_t *);
 extern int __unw_set_reg(unw_cursor_t *, unw_regnum_t, unw_word_t);
 extern int __unw_set_fpreg(unw_cursor_t *, unw_regnum_t, unw_fpreg_t);
-extern int __unw_resume(unw_cursor_t *);
+_LIBUNWIND_TRACE_NO_INLINE
+  extern int __unw_resume_with_frames_walked(unw_cursor_t *, unsigned);
+// `__unw_resume` is a legacy function. Use `__unw_resume_with_frames_walked` instead.
+_LIBUNWIND_TRACE_NO_INLINE
+  extern int __unw_resume(unw_cursor_t *);
 
 #ifdef __arm__
 /* Save VFP registers in FSTMX format (instead of FSTMD). */
@@ -42,6 +47,7 @@ extern int __unw_get_proc_info(unw_cursor_t *, unw_proc_info_t *);
 extern int __unw_is_fpreg(unw_cursor_t *, unw_regnum_t);
 extern int __unw_is_signal_frame(unw_cursor_t *);
 extern int __unw_get_proc_name(unw_cursor_t *, char *, size_t, unw_word_t *);
+extern const char *__unw_strerror(int);
 
 #if defined(_AIX)
 extern uintptr_t __unw_get_data_rel_base(unw_cursor_t *);
diff --git a/lib/libunwind/src/shadow_stack_unwind.h b/lib/libunwind/src/shadow_stack_unwind.h
index 1f229d8317..b00ca2c932 100644
--- a/lib/libunwind/src/shadow_stack_unwind.h
+++ b/lib/libunwind/src/shadow_stack_unwind.h
@@ -12,8 +12,8 @@
 
 #include "libunwind.h"
 
-// Currently, CET is implemented on Linux x86 platforms.
-#if defined(_LIBUNWIND_TARGET_LINUX) && defined(__CET__) && defined(__SHSTK__)
+// Currently, CET is implemented on some ELF x86 platforms.
+#if defined(__CET__) && defined(__SHSTK__)
 #define _LIBUNWIND_USE_CET 1
 #endif
 
diff --git a/lib/std/Io/test.zig b/lib/std/Io/test.zig
index 20426acb9c..46376a1825 100644
--- a/lib/std/Io/test.zig
+++ b/lib/std/Io/test.zig
@@ -137,7 +137,6 @@ test "File.setLength" {
 test "legacy setLength" {
     // https://github.com/ziglang/zig/issues/20747 (open fd does not have write permission)
     if (builtin.os.tag == .wasi and builtin.link_libc) return error.SkipZigTest;
-    if (builtin.cpu.arch.isMIPS64() and (builtin.abi == .gnuabin32 or builtin.abi == .muslabin32)) return error.SkipZigTest; // https://github.com/ziglang/zig/issues/23806
 
     const io = testing.io;
 
diff --git a/lib/std/Target.zig b/lib/std/Target.zig
index 2b06ba33f4..509528b76b 100644
--- a/lib/std/Target.zig
+++ b/lib/std/Target.zig
@@ -845,6 +845,12 @@ pub const Abi = enum {
                 => .eabi,
                 else => .none,
             },
+            .fuchsia => switch (arch) {
+                .arm,
+                .thumb,
+                => .eabihf,
+                else => .none,
+            },
             .haiku => switch (arch) {
                 .arm,
                 .powerpc,
@@ -928,7 +934,6 @@ pub const Abi = enum {
             .wasi, .emscripten => .musl,
 
             .contiki,
-            .fuchsia,
             .hermit,
             .illumos,
             .managarm,
@@ -1014,6 +1019,7 @@ pub const Abi = enum {
             .gnueabi,
             .musleabi,
             .gnusf,
+            .muslsf,
             .ohoseabi,
             => .soft,
             else => .hard,
@@ -1219,7 +1225,7 @@ pub const Cpu = struct {
         pub const Set = struct {
             ints: [usize_count]usize,
 
-            pub const needed_bit_count = 317;
+            pub const needed_bit_count = 347;
             pub const byte_count = (needed_bit_count + 7) / 8;
             pub const usize_count = (byte_count + (@sizeOf(usize) - 1)) / @sizeOf(usize);
             pub const Index = std.math.Log2Int(std.meta.Int(.unsigned, usize_count * @bitSizeOf(usize)));
@@ -2055,6 +2061,7 @@ pub const Cpu = struct {
                 .hppa => &hppa.cpu.pa_7300lc,
                 .kvx => &kvx.cpu.coolidge_v2,
                 .lanai => &lanai.cpu.v11, // clang does not have a generic lanai model.
+                .loongarch32 => &loongarch.cpu.la32v1_0,
                 .loongarch64 => &loongarch.cpu.la64v1_0,
                 .m68k => &m68k.cpu.M68000,
                 .mips => &mips.cpu.mips32r2,
@@ -2442,8 +2449,10 @@ pub const DynamicLinker = struct {
     pub fn standard(cpu: Cpu, os: Os, abi: Abi) DynamicLinker {
         return switch (os.tag) {
             .fuchsia => switch (cpu.arch) {
+                .arm,
                 .aarch64,
                 .riscv64,
+                .thumb,
                 .x86_64,
                 => init("ld.so.1"), // Fuchsia is unusual in that `DT_INTERP` is just a basename.
                 else => none,
diff --git a/lib/std/Target/aarch64.zig b/lib/std/Target/aarch64.zig
index c923cb8f0a..59dc81cfd2 100644
--- a/lib/std/Target/aarch64.zig
+++ b/lib/std/Target/aarch64.zig
@@ -9,6 +9,7 @@ pub const Feature = enum {
     addr_lsl_slow_14,
     aes,
     aggressive_fma,
+    aggressive_interleaving,
     alternate_sextload_cvt_f32_pattern,
     altnzcv,
     alu_lsl_fast,
@@ -22,6 +23,7 @@ pub const Feature = enum {
     bf16,
     brbe,
     bti,
+    btie,
     call_saved_x10,
     call_saved_x11,
     call_saved_x12,
@@ -36,6 +38,7 @@ pub const Feature = enum {
     ccpp,
     chk,
     clrbhb,
+    cmh,
     cmp_bcc_fusion,
     cmpbr,
     complxnum,
@@ -48,7 +51,9 @@ pub const Feature = enum {
     disable_fast_inc_vl,
     disable_latency_sched_heuristic,
     disable_ldp,
+    disable_maximize_scalable_bandwidth,
     disable_stp,
+    disable_unpredicated_ld_st_lower,
     dit,
     dotprod,
     ecv,
@@ -58,6 +63,9 @@ pub const Feature = enum {
     ete,
     execute_only,
     exynos_cheap_as_move,
+    f16f32dot,
+    f16f32mm,
+    f16mm,
     f32mm,
     f64mm,
     f8f16mm,
@@ -86,7 +94,9 @@ pub const Feature = enum {
     fuse_arith_logic,
     fuse_crypto_eor,
     fuse_csel,
+    fuse_cset,
     fuse_literals,
+    gcie,
     gcs,
     harden_sls_blr,
     harden_sls_nocomdat,
@@ -99,22 +109,27 @@ pub const Feature = enum {
     ldp_aligned_only,
     lor,
     ls64,
+    lscp,
     lse,
     lse128,
     lse2,
     lsfe,
     lsui,
     lut,
+    max_interleave_factor_4,
     mec,
     mops,
+    mops_go,
     mpam,
+    mpamv2,
     mte,
+    mtetc,
     neon,
     nmi,
     no_bti_at_return_twice,
     no_neg_immediates,
     no_sve_fp_ld1r,
-    no_zcz_fp,
+    no_zcz_fpr64,
     nv,
     occmo,
     olympus,
@@ -125,6 +140,7 @@ pub const Feature = enum {
     pauth_lr,
     pcdphint,
     perfmon,
+    poe2,
     pops,
     predictable_select_expensive,
     predres,
@@ -174,6 +190,7 @@ pub const Feature = enum {
     sme2,
     sme2p1,
     sme2p2,
+    sme2p3,
     sme_b16b16,
     sme_f16f16,
     sme_f64f64,
@@ -206,19 +223,22 @@ pub const Feature = enum {
     sve2_sm4,
     sve2p1,
     sve2p2,
+    sve2p3,
     sve_aes,
     sve_aes2,
     sve_b16b16,
+    sve_b16mm,
     sve_bfscale,
     sve_bitperm,
     sve_f16f32mm,
     sve_sha3,
     sve_sm4,
     tagged_globals,
+    tev,
     the,
     tlb_rmi,
+    tlbid,
     tlbiw,
-    tme,
     tpidr_el1,
     tpidr_el2,
     tpidr_el3,
@@ -230,6 +250,7 @@ pub const Feature = enum {
     use_fixed_over_scalable_if_equal_cost,
     use_postra_scheduler,
     use_reciprocal_square_root,
+    use_wzr_to_vec_move,
     v8_1a,
     v8_2a,
     v8_3a,
@@ -247,17 +268,20 @@ pub const Feature = enum {
     v9_4a,
     v9_5a,
     v9_6a,
+    v9_7a,
     v9a,
     vh,
     wfxt,
     xs,
+    zcm_fpr128,
     zcm_fpr32,
     zcm_fpr64,
     zcm_gpr32,
     zcm_gpr64,
-    zcz,
     zcz_fp_workaround,
-    zcz_gp,
+    zcz_fpr128,
+    zcz_gpr32,
+    zcz_gpr64,
 };
 
 pub const featureSet = CpuFeature.FeatureSetFns(Feature).featureSet;
@@ -274,9 +298,12 @@ pub const all_features = blk: {
         .llvm_name = "a320",
         .description = "Cortex-A320 ARM processors",
         .dependencies = featureSet(&[_]Feature{
+            .aggressive_interleaving,
             .fuse_adrp_add,
             .fuse_aes,
+            .use_fixed_over_scalable_if_equal_cost,
             .use_postra_scheduler,
+            .use_wzr_to_vec_move,
         }),
     };
     result[@intFromEnum(Feature.addr_lsl_slow_14)] = .{
@@ -296,6 +323,11 @@ pub const all_features = blk: {
         .description = "Enable Aggressive FMA for floating-point.",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.aggressive_interleaving)] = .{
+        .llvm_name = "aggressive-interleaving",
+        .description = "Make use of aggressive interleaving during vectorization",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.alternate_sextload_cvt_f32_pattern)] = .{
         .llvm_name = "alternate-sextload-cvt-f32-pattern",
         .description = "Use alternative pattern for sextload convert to f32",
@@ -367,6 +399,11 @@ pub const all_features = blk: {
         .description = "Enable Branch Target Identification",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.btie)] = .{
+        .llvm_name = "btie",
+        .description = "Enable Enhanced Branch Target Identification extension",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.call_saved_x10)] = .{
         .llvm_name = "call-saved-x10",
         .description = "Make X10 callee saved.",
@@ -439,6 +476,11 @@ pub const all_features = blk: {
         .description = "Enable Clear BHB instruction",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.cmh)] = .{
+        .llvm_name = "cmh",
+        .description = "Enable Armv9.7-A Contention Management Hints",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.cmp_bcc_fusion)] = .{
         .llvm_name = "cmp-bcc-fusion",
         .description = "CPU fuses cmp+bcc operations",
@@ -506,11 +548,21 @@ pub const all_features = blk: {
         .description = "Do not emit ldp",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.disable_maximize_scalable_bandwidth)] = .{
+        .llvm_name = "disable-maximize-scalable-bandwidth",
+        .description = "Determine the maximum scalable vector length for a loop by the largest scalar type rather than the smallest",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.disable_stp)] = .{
         .llvm_name = "disable-stp",
         .description = "Do not emit stp",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.disable_unpredicated_ld_st_lower)] = .{
+        .llvm_name = "disable-unpredicated-ld-st-lower",
+        .description = "Disable lowering unpredicated loads/stores as LDR/STR",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.dit)] = .{
         .llvm_name = "dit",
         .description = "Enable Armv8.4-A Data Independent Timing instructions",
@@ -560,6 +612,30 @@ pub const all_features = blk: {
         .description = "Use Exynos specific handling of cheap instructions",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.f16f32dot)] = .{
+        .llvm_name = "f16f32dot",
+        .description = "Enable Armv9.7-A Advanced SIMD half-precision dot product accumulate to single-precision",
+        .dependencies = featureSet(&[_]Feature{
+            .fullfp16,
+            .neon,
+        }),
+    };
+    result[@intFromEnum(Feature.f16f32mm)] = .{
+        .llvm_name = "f16f32mm",
+        .description = "Enable Armv9.7-A Advanced SIMD half-precision matrix multiply-accumulate to single-precision",
+        .dependencies = featureSet(&[_]Feature{
+            .fullfp16,
+            .neon,
+        }),
+    };
+    result[@intFromEnum(Feature.f16mm)] = .{
+        .llvm_name = "f16mm",
+        .description = "Enable Armv9.7-A non-widening half-precision matrix multiply-accumulate",
+        .dependencies = featureSet(&[_]Feature{
+            .fullfp16,
+            .neon,
+        }),
+    };
     result[@intFromEnum(Feature.f32mm)] = .{
         .llvm_name = "f32mm",
         .description = "Enable Matrix Multiply FP32 Extension",
@@ -729,7 +805,12 @@ pub const all_features = blk: {
     };
     result[@intFromEnum(Feature.fuse_csel)] = .{
         .llvm_name = "fuse-csel",
-        .description = "CPU fuses conditional select operations",
+        .description = "CPU can fuse CMP and CSEL operations",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
+    result[@intFromEnum(Feature.fuse_cset)] = .{
+        .llvm_name = "fuse-cset",
+        .description = "CPU can fuse CMP and CSET operations",
         .dependencies = featureSet(&[_]Feature{}),
     };
     result[@intFromEnum(Feature.fuse_literals)] = .{
@@ -737,6 +818,11 @@ pub const all_features = blk: {
         .description = "CPU fuses literal generation operations",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.gcie)] = .{
+        .llvm_name = "gcie",
+        .description = "Enable GICv5 (Generic Interrupt Controller) CPU Interface Extension",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.gcs)] = .{
         .llvm_name = "gcs",
         .description = "Enable Armv9.4-A Guarded Call Stack Extension",
@@ -805,6 +891,11 @@ pub const all_features = blk: {
         .description = "Enable Armv8.7-A LD64B/ST64B Accelerator Extension",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.lscp)] = .{
+        .llvm_name = "lscp",
+        .description = "Enable Armv9.7-A Load-acquire and store-release pair extension",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.lse)] = .{
         .llvm_name = "lse",
         .description = "Enable Armv8.1-A Large System Extension (LSE) atomic instructions",
@@ -841,6 +932,11 @@ pub const all_features = blk: {
             .neon,
         }),
     };
+    result[@intFromEnum(Feature.max_interleave_factor_4)] = .{
+        .llvm_name = "max-interleave-factor-4",
+        .description = "Set the MaxInterleaveFactor to 4 (from the default 2)",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.mec)] = .{
         .llvm_name = "mec",
         .description = "Enable Memory Encryption Contexts Extension",
@@ -853,16 +949,36 @@ pub const all_features = blk: {
         .description = "Enable Armv8.8-A memcpy and memset acceleration instructions",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.mops_go)] = .{
+        .llvm_name = "mops-go",
+        .description = "Enable memset acceleration granule only",
+        .dependencies = featureSet(&[_]Feature{
+            .mops,
+            .mte,
+        }),
+    };
     result[@intFromEnum(Feature.mpam)] = .{
         .llvm_name = "mpam",
         .description = "Enable Armv8.4-A Memory system Partitioning and Monitoring extension",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.mpamv2)] = .{
+        .llvm_name = "mpamv2",
+        .description = "Enable Armv9.7-A MPAMv2 Lookaside Buffer Invalidate instructions",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.mte)] = .{
         .llvm_name = "mte",
         .description = "Enable Memory Tagging Extension",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.mtetc)] = .{
+        .llvm_name = "mtetc",
+        .description = "Enable Virtual Memory Tagging Extension",
+        .dependencies = featureSet(&[_]Feature{
+            .mte,
+        }),
+    };
     result[@intFromEnum(Feature.neon)] = .{
         .llvm_name = "neon",
         .description = "Enable Advanced SIMD instructions",
@@ -890,9 +1006,9 @@ pub const all_features = blk: {
         .description = "Avoid using LD1RX instructions for FP",
         .dependencies = featureSet(&[_]Feature{}),
     };
-    result[@intFromEnum(Feature.no_zcz_fp)] = .{
-        .llvm_name = "no-zcz-fp",
-        .description = "Has no zero-cycle zeroing instructions for FP registers",
+    result[@intFromEnum(Feature.no_zcz_fpr64)] = .{
+        .llvm_name = "no-zcz-fpr64",
+        .description = "Has no zero-cycle zeroing instructions for FPR64 registers",
         .dependencies = featureSet(&[_]Feature{}),
     };
     result[@intFromEnum(Feature.nv)] = .{
@@ -914,6 +1030,7 @@ pub const all_features = blk: {
             .enable_select_opt,
             .fuse_adrp_add,
             .fuse_aes,
+            .max_interleave_factor_4,
             .predictable_select_expensive,
             .use_fixed_over_scalable_if_equal_cost,
             .use_postra_scheduler,
@@ -956,6 +1073,11 @@ pub const all_features = blk: {
         .description = "Enable Armv8.0-A PMUv3 Performance Monitors extension",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.poe2)] = .{
+        .llvm_name = "poe2",
+        .description = "Enable Stage 1 Permission Overlays Extension 2 instructions",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.pops)] = .{
         .llvm_name = "pops",
         .description = "Enable Armv9.6-A Point Of Physical Storage (PoPS) DC instructions",
@@ -1224,6 +1346,13 @@ pub const all_features = blk: {
             .sme2p1,
         }),
     };
+    result[@intFromEnum(Feature.sme2p3)] = .{
+        .llvm_name = "sme2p3",
+        .description = "Enable Armv9.7-A Scalable Matrix Extension 2.3 instructions",
+        .dependencies = featureSet(&[_]Feature{
+            .sme2p2,
+        }),
+    };
     result[@intFromEnum(Feature.sme_b16b16)] = .{
         .llvm_name = "sme-b16b16",
         .description = "Enable SME2.1 ZA-targeting non-widening BFloat16 instructions",
@@ -1447,6 +1576,13 @@ pub const all_features = blk: {
             .sve2p1,
         }),
     };
+    result[@intFromEnum(Feature.sve2p3)] = .{
+        .llvm_name = "sve2p3",
+        .description = "Enable Armv9.7-A Scalable Vector Extension 2.3 instructions",
+        .dependencies = featureSet(&[_]Feature{
+            .sve2p2,
+        }),
+    };
     result[@intFromEnum(Feature.sve_aes)] = .{
         .llvm_name = "sve-aes",
         .description = "Enable SVE AES and quadword SVE polynomial multiply instructions",
@@ -1464,6 +1600,13 @@ pub const all_features = blk: {
         .description = "Enable SVE2 non-widening and SME2 Z-targeting non-widening BFloat16 instructions",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.sve_b16mm)] = .{
+        .llvm_name = "sve-b16mm",
+        .description = "Enable Armv9.7-A SVE non-widening BFloat16 matrix multiply-accumulate",
+        .dependencies = featureSet(&[_]Feature{
+            .sve,
+        }),
+    };
     result[@intFromEnum(Feature.sve_bfscale)] = .{
         .llvm_name = "sve-bfscale",
         .description = "Enable Armv9.6-A SVE BFloat16 scaling instructions",
@@ -1500,6 +1643,11 @@ pub const all_features = blk: {
         .description = "Use an instruction sequence for taking the address of a global that allows a memory tag in the upper address bits",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.tev)] = .{
+        .llvm_name = "tev",
+        .description = "Enable TIndex Exception-like Vector instructions",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.the)] = .{
         .llvm_name = "the",
         .description = "Enable Armv8.9-A Translation Hardening Extension",
@@ -1510,16 +1658,16 @@ pub const all_features = blk: {
         .description = "Enable Armv8.4-A TLB Range and Maintenance instructions",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.tlbid)] = .{
+        .llvm_name = "tlbid",
+        .description = "Enable Armv9.7-A TLBI Domains extension",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.tlbiw)] = .{
         .llvm_name = "tlbiw",
         .description = "Enable Armv9.5-A TLBI VMALL for Dirty State",
         .dependencies = featureSet(&[_]Feature{}),
     };
-    result[@intFromEnum(Feature.tme)] = .{
-        .llvm_name = "tme",
-        .description = "Enable Transactional Memory Extension",
-        .dependencies = featureSet(&[_]Feature{}),
-    };
     result[@intFromEnum(Feature.tpidr_el1)] = .{
         .llvm_name = "tpidr-el1",
         .description = "Permit use of TPIDR_EL1 for the TLS base",
@@ -1575,6 +1723,11 @@ pub const all_features = blk: {
         .description = "Use the reciprocal square root approximation",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.use_wzr_to_vec_move)] = .{
+        .llvm_name = "use-wzr-to-vec-move",
+        .description = "Move from WZR to insert 0 into vector registers",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.v8_1a)] = .{
         .llvm_name = "v8.1a",
         .description = "Support ARM v8.1a architecture",
@@ -1783,6 +1936,16 @@ pub const all_features = blk: {
             .v9_5a,
         }),
     };
+    result[@intFromEnum(Feature.v9_7a)] = .{
+        .llvm_name = "v9.7a",
+        .description = "Support ARM v9.7a architecture",
+        .dependencies = featureSet(&[_]Feature{
+            .f16f32dot,
+            .fprcvt,
+            .sve2p3,
+            .v9_6a,
+        }),
+    };
     result[@intFromEnum(Feature.v9a)] = .{
         .llvm_name = "v9a",
         .description = "Support ARM v9a architecture",
@@ -1808,6 +1971,11 @@ pub const all_features = blk: {
         .description = "Enable Armv8.7-A limited-TLB-maintenance instruction",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.zcm_fpr128)] = .{
+        .llvm_name = "zcm-fpr128",
+        .description = "Has zero-cycle register moves for FPR128 registers",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.zcm_fpr32)] = .{
         .llvm_name = "zcm-fpr32",
         .description = "Has zero-cycle register moves for FPR32 registers",
@@ -1828,21 +1996,24 @@ pub const all_features = blk: {
         .description = "Has zero-cycle register moves for GPR64 registers",
         .dependencies = featureSet(&[_]Feature{}),
     };
-    result[@intFromEnum(Feature.zcz)] = .{
-        .llvm_name = "zcz",
-        .description = "Has zero-cycle zeroing instructions",
-        .dependencies = featureSet(&[_]Feature{
-            .zcz_gp,
-        }),
-    };
     result[@intFromEnum(Feature.zcz_fp_workaround)] = .{
         .llvm_name = "zcz-fp-workaround",
         .description = "The zero-cycle floating-point zeroing instruction has a bug",
         .dependencies = featureSet(&[_]Feature{}),
     };
-    result[@intFromEnum(Feature.zcz_gp)] = .{
-        .llvm_name = "zcz-gp",
-        .description = "Has zero-cycle zeroing instructions for generic registers",
+    result[@intFromEnum(Feature.zcz_fpr128)] = .{
+        .llvm_name = "zcz-fpr128",
+        .description = "Has zero-cycle zeroing instructions for FPR128 registers",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
+    result[@intFromEnum(Feature.zcz_gpr32)] = .{
+        .llvm_name = "zcz-gpr32",
+        .description = "Has zero-cycle zeroing instructions for GPR32 registers",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
+    result[@intFromEnum(Feature.zcz_gpr64)] = .{
+        .llvm_name = "zcz-gpr64",
+        .description = "Has zero-cycle zeroing instructions for GPR64 registers",
         .dependencies = featureSet(&[_]Feature{}),
     };
     const ti = @typeInfo(Feature);
@@ -1862,6 +2033,8 @@ pub const cpu = struct {
             .aggressive_fma,
             .arith_bcc_fusion,
             .complxnum,
+            .disable_unpredicated_ld_st_lower,
+            .max_interleave_factor_4,
             .perfmon,
             .predictable_select_expensive,
             .sha2,
@@ -1886,6 +2059,7 @@ pub const cpu = struct {
             .fuse_aes,
             .fuse_literals,
             .ldp_aligned_only,
+            .max_interleave_factor_4,
             .perfmon,
             .rand,
             .sha3,
@@ -1911,6 +2085,7 @@ pub const cpu = struct {
             .fuse_aes,
             .fuse_literals,
             .ldp_aligned_only,
+            .max_interleave_factor_4,
             .mte,
             .perfmon,
             .rand,
@@ -1939,6 +2114,7 @@ pub const cpu = struct {
             .fuse_aes,
             .fuse_literals,
             .ldp_aligned_only,
+            .max_interleave_factor_4,
             .mte,
             .perfmon,
             .predictable_select_expensive,
@@ -1951,6 +2127,38 @@ pub const cpu = struct {
             .v8_7a,
         }),
     };
+    pub const ampere1c: CpuModel = .{
+        .name = "ampere1c",
+        .llvm_name = "ampere1c",
+        .features = featureSet(&[_]Feature{
+            .aggressive_fma,
+            .alu_lsl_fast,
+            .arith_bcc_fusion,
+            .cmp_bcc_fusion,
+            .cssc,
+            .enable_select_opt,
+            .faminmax,
+            .fp16fml,
+            .fp8fma,
+            .fuse_address,
+            .fuse_adrp_add,
+            .fuse_aes,
+            .fuse_literals,
+            .lut,
+            .max_interleave_factor_4,
+            .mte,
+            .perfmon,
+            .predictable_select_expensive,
+            .rand,
+            .store_pair_suppress,
+            .sve_aes,
+            .sve_b16b16,
+            .sve_sha3,
+            .sve_sm4,
+            .use_postra_scheduler,
+            .v9_2a,
+        }),
+    };
     pub const apple_a10: CpuModel = .{
         .name = "apple_a10",
         .llvm_name = "apple-a10",
@@ -1964,6 +2172,7 @@ pub const cpu = struct {
             .fuse_aes,
             .fuse_crypto_eor,
             .lor,
+            .no_zcz_fpr64,
             .pan,
             .perfmon,
             .rdm,
@@ -1971,9 +2180,11 @@ pub const cpu = struct {
             .store_pair_suppress,
             .v8a,
             .vh,
-            .zcm_fpr64,
+            .zcm_fpr128,
             .zcm_gpr64,
-            .zcz,
+            .zcz_fpr128,
+            .zcz_gpr32,
+            .zcz_gpr64,
         }),
     };
     pub const apple_a11: CpuModel = .{
@@ -1988,13 +2199,16 @@ pub const cpu = struct {
             .fullfp16,
             .fuse_aes,
             .fuse_crypto_eor,
+            .no_zcz_fpr64,
             .perfmon,
             .sha2,
             .store_pair_suppress,
             .v8_2a,
-            .zcm_fpr64,
+            .zcm_fpr128,
             .zcm_gpr64,
-            .zcz,
+            .zcz_fpr128,
+            .zcz_gpr32,
+            .zcz_gpr64,
         }),
     };
     pub const apple_a12: CpuModel = .{
@@ -2009,13 +2223,16 @@ pub const cpu = struct {
             .fullfp16,
             .fuse_aes,
             .fuse_crypto_eor,
+            .no_zcz_fpr64,
             .perfmon,
             .sha2,
             .store_pair_suppress,
             .v8_3a,
-            .zcm_fpr64,
+            .zcm_fpr128,
             .zcm_gpr64,
-            .zcz,
+            .zcz_fpr128,
+            .zcz_gpr32,
+            .zcz_gpr64,
         }),
     };
     pub const apple_a13: CpuModel = .{
@@ -2030,13 +2247,16 @@ pub const cpu = struct {
             .fp16fml,
             .fuse_aes,
             .fuse_crypto_eor,
+            .no_zcz_fpr64,
             .perfmon,
             .sha3,
             .store_pair_suppress,
             .v8_4a,
-            .zcm_fpr64,
+            .zcm_fpr128,
             .zcm_gpr64,
-            .zcz,
+            .zcz_fpr128,
+            .zcz_gpr32,
+            .zcz_gpr64,
         }),
     };
     pub const apple_a14: CpuModel = .{
@@ -2059,6 +2279,8 @@ pub const cpu = struct {
             .fuse_crypto_eor,
             .fuse_csel,
             .fuse_literals,
+            .max_interleave_factor_4,
+            .no_zcz_fpr64,
             .perfmon,
             .predres,
             .sb,
@@ -2067,9 +2289,11 @@ pub const cpu = struct {
             .ssbs,
             .store_pair_suppress,
             .v8_4a,
-            .zcm_fpr64,
+            .zcm_fpr128,
             .zcm_gpr64,
-            .zcz,
+            .zcz_fpr128,
+            .zcz_gpr32,
+            .zcz_gpr64,
         }),
     };
     pub const apple_a15: CpuModel = .{
@@ -2090,13 +2314,17 @@ pub const cpu = struct {
             .fuse_crypto_eor,
             .fuse_csel,
             .fuse_literals,
+            .max_interleave_factor_4,
+            .no_zcz_fpr64,
             .perfmon,
             .sha3,
             .store_pair_suppress,
             .v8_6a,
-            .zcm_fpr64,
+            .zcm_fpr128,
             .zcm_gpr64,
-            .zcz,
+            .zcz_fpr128,
+            .zcz_gpr32,
+            .zcz_gpr64,
         }),
     };
     pub const apple_a16: CpuModel = .{
@@ -2118,13 +2346,17 @@ pub const cpu = struct {
             .fuse_csel,
             .fuse_literals,
             .hcx,
+            .max_interleave_factor_4,
+            .no_zcz_fpr64,
             .perfmon,
             .sha3,
             .store_pair_suppress,
             .v8_6a,
-            .zcm_fpr64,
+            .zcm_fpr128,
             .zcm_gpr64,
-            .zcz,
+            .zcz_fpr128,
+            .zcz_gpr32,
+            .zcz_gpr64,
         }),
     };
     pub const apple_a17: CpuModel = .{
@@ -2146,13 +2378,17 @@ pub const cpu = struct {
             .fuse_csel,
             .fuse_literals,
             .hcx,
+            .max_interleave_factor_4,
+            .no_zcz_fpr64,
             .perfmon,
             .sha3,
             .store_pair_suppress,
             .v8_6a,
-            .zcm_fpr64,
+            .zcm_fpr128,
             .zcm_gpr64,
-            .zcz,
+            .zcz_fpr128,
+            .zcz_gpr32,
+            .zcz_gpr64,
         }),
     };
     pub const apple_a18: CpuModel = .{
@@ -2173,15 +2409,58 @@ pub const cpu = struct {
             .fuse_crypto_eor,
             .fuse_csel,
             .fuse_literals,
+            .max_interleave_factor_4,
+            .no_zcz_fpr64,
             .perfmon,
             .sha3,
             .sme2,
             .sme_f64f64,
             .sme_i16i64,
             .v8_7a,
-            .zcm_fpr64,
+            .zcm_fpr128,
             .zcm_gpr64,
-            .zcz,
+            .zcz_fpr128,
+            .zcz_gpr32,
+            .zcz_gpr64,
+        }),
+    };
+    pub const apple_a19: CpuModel = .{
+        .name = "apple_a19",
+        .llvm_name = "apple-a19",
+        .features = featureSet(&[_]Feature{
+            .aes,
+            .alternate_sextload_cvt_f32_pattern,
+            .arith_bcc_fusion,
+            .arith_cbz_fusion,
+            .cssc,
+            .disable_latency_sched_heuristic,
+            .fp16fml,
+            .fpac,
+            .fuse_address,
+            .fuse_adrp_add,
+            .fuse_aes,
+            .fuse_arith_logic,
+            .fuse_crypto_eor,
+            .fuse_csel,
+            .fuse_literals,
+            .hbc,
+            .max_interleave_factor_4,
+            .mte,
+            .no_zcz_fpr64,
+            .perfmon,
+            .sha3,
+            .sme2p1,
+            .sme_b16b16,
+            .sme_f16f16,
+            .sme_f64f64,
+            .sme_i16i64,
+            .specres2,
+            .v8_7a,
+            .zcm_fpr128,
+            .zcm_gpr64,
+            .zcz_fpr128,
+            .zcz_gpr32,
+            .zcz_gpr64,
         }),
     };
     pub const apple_a7: CpuModel = .{
@@ -2195,14 +2474,17 @@ pub const cpu = struct {
             .disable_latency_sched_heuristic,
             .fuse_aes,
             .fuse_crypto_eor,
+            .no_zcz_fpr64,
             .perfmon,
             .sha2,
             .store_pair_suppress,
             .v8a,
-            .zcm_fpr64,
+            .zcm_fpr128,
             .zcm_gpr64,
-            .zcz,
             .zcz_fp_workaround,
+            .zcz_fpr128,
+            .zcz_gpr32,
+            .zcz_gpr64,
         }),
     };
     pub const apple_a8: CpuModel = .{
@@ -2216,14 +2498,17 @@ pub const cpu = struct {
             .disable_latency_sched_heuristic,
             .fuse_aes,
             .fuse_crypto_eor,
+            .no_zcz_fpr64,
             .perfmon,
             .sha2,
             .store_pair_suppress,
             .v8a,
-            .zcm_fpr64,
+            .zcm_fpr128,
             .zcm_gpr64,
-            .zcz,
             .zcz_fp_workaround,
+            .zcz_fpr128,
+            .zcz_gpr32,
+            .zcz_gpr64,
         }),
     };
     pub const apple_a9: CpuModel = .{
@@ -2237,14 +2522,17 @@ pub const cpu = struct {
             .disable_latency_sched_heuristic,
             .fuse_aes,
             .fuse_crypto_eor,
+            .no_zcz_fpr64,
             .perfmon,
             .sha2,
             .store_pair_suppress,
             .v8a,
-            .zcm_fpr64,
+            .zcm_fpr128,
             .zcm_gpr64,
-            .zcz,
             .zcz_fp_workaround,
+            .zcz_fpr128,
+            .zcz_gpr32,
+            .zcz_gpr64,
         }),
     };
     pub const apple_m1: CpuModel = .{
@@ -2267,6 +2555,8 @@ pub const cpu = struct {
             .fuse_crypto_eor,
             .fuse_csel,
             .fuse_literals,
+            .max_interleave_factor_4,
+            .no_zcz_fpr64,
             .perfmon,
             .predres,
             .sb,
@@ -2275,9 +2565,11 @@ pub const cpu = struct {
             .ssbs,
             .store_pair_suppress,
             .v8_4a,
-            .zcm_fpr64,
+            .zcm_fpr128,
             .zcm_gpr64,
-            .zcz,
+            .zcz_fpr128,
+            .zcz_gpr32,
+            .zcz_gpr64,
         }),
     };
     pub const apple_m2: CpuModel = .{
@@ -2298,13 +2590,17 @@ pub const cpu = struct {
             .fuse_crypto_eor,
             .fuse_csel,
             .fuse_literals,
+            .max_interleave_factor_4,
+            .no_zcz_fpr64,
             .perfmon,
             .sha3,
             .store_pair_suppress,
             .v8_6a,
-            .zcm_fpr64,
+            .zcm_fpr128,
             .zcm_gpr64,
-            .zcz,
+            .zcz_fpr128,
+            .zcz_gpr32,
+            .zcz_gpr64,
         }),
     };
     pub const apple_m3: CpuModel = .{
@@ -2326,13 +2622,17 @@ pub const cpu = struct {
             .fuse_csel,
             .fuse_literals,
             .hcx,
+            .max_interleave_factor_4,
+            .no_zcz_fpr64,
             .perfmon,
             .sha3,
             .store_pair_suppress,
             .v8_6a,
-            .zcm_fpr64,
+            .zcm_fpr128,
             .zcm_gpr64,
-            .zcz,
+            .zcz_fpr128,
+            .zcz_gpr32,
+            .zcz_gpr64,
         }),
     };
     pub const apple_m4: CpuModel = .{
@@ -2353,15 +2653,58 @@ pub const cpu = struct {
             .fuse_crypto_eor,
             .fuse_csel,
             .fuse_literals,
+            .max_interleave_factor_4,
+            .no_zcz_fpr64,
             .perfmon,
             .sha3,
             .sme2,
             .sme_f64f64,
             .sme_i16i64,
             .v8_7a,
-            .zcm_fpr64,
+            .zcm_fpr128,
             .zcm_gpr64,
-            .zcz,
+            .zcz_fpr128,
+            .zcz_gpr32,
+            .zcz_gpr64,
+        }),
+    };
+    pub const apple_m5: CpuModel = .{
+        .name = "apple_m5",
+        .llvm_name = "apple-m5",
+        .features = featureSet(&[_]Feature{
+            .aes,
+            .alternate_sextload_cvt_f32_pattern,
+            .arith_bcc_fusion,
+            .arith_cbz_fusion,
+            .cssc,
+            .disable_latency_sched_heuristic,
+            .fp16fml,
+            .fpac,
+            .fuse_address,
+            .fuse_adrp_add,
+            .fuse_aes,
+            .fuse_arith_logic,
+            .fuse_crypto_eor,
+            .fuse_csel,
+            .fuse_literals,
+            .hbc,
+            .max_interleave_factor_4,
+            .mte,
+            .no_zcz_fpr64,
+            .perfmon,
+            .sha3,
+            .sme2p1,
+            .sme_b16b16,
+            .sme_f16f16,
+            .sme_f64f64,
+            .sme_i16i64,
+            .specres2,
+            .v8_7a,
+            .zcm_fpr128,
+            .zcm_gpr64,
+            .zcz_fpr128,
+            .zcz_gpr32,
+            .zcz_gpr64,
         }),
     };
     pub const apple_s10: CpuModel = .{
@@ -2383,13 +2726,17 @@ pub const cpu = struct {
             .fuse_csel,
             .fuse_literals,
             .hcx,
+            .max_interleave_factor_4,
+            .no_zcz_fpr64,
             .perfmon,
             .sha3,
             .store_pair_suppress,
             .v8_6a,
-            .zcm_fpr64,
+            .zcm_fpr128,
             .zcm_gpr64,
-            .zcz,
+            .zcz_fpr128,
+            .zcz_gpr32,
+            .zcz_gpr64,
         }),
     };
     pub const apple_s4: CpuModel = .{
@@ -2404,13 +2751,16 @@ pub const cpu = struct {
             .fullfp16,
             .fuse_aes,
             .fuse_crypto_eor,
+            .no_zcz_fpr64,
             .perfmon,
             .sha2,
             .store_pair_suppress,
             .v8_3a,
-            .zcm_fpr64,
+            .zcm_fpr128,
             .zcm_gpr64,
-            .zcz,
+            .zcz_fpr128,
+            .zcz_gpr32,
+            .zcz_gpr64,
         }),
     };
     pub const apple_s5: CpuModel = .{
@@ -2425,13 +2775,16 @@ pub const cpu = struct {
             .fullfp16,
             .fuse_aes,
             .fuse_crypto_eor,
+            .no_zcz_fpr64,
             .perfmon,
             .sha2,
             .store_pair_suppress,
             .v8_3a,
-            .zcm_fpr64,
+            .zcm_fpr128,
             .zcm_gpr64,
-            .zcz,
+            .zcz_fpr128,
+            .zcz_gpr32,
+            .zcz_gpr64,
         }),
     };
     pub const apple_s6: CpuModel = .{
@@ -2446,13 +2799,16 @@ pub const cpu = struct {
             .fp16fml,
             .fuse_aes,
             .fuse_crypto_eor,
+            .no_zcz_fpr64,
             .perfmon,
             .sha3,
             .store_pair_suppress,
             .v8_4a,
-            .zcm_fpr64,
+            .zcm_fpr128,
             .zcm_gpr64,
-            .zcz,
+            .zcz_fpr128,
+            .zcz_gpr32,
+            .zcz_gpr64,
         }),
     };
     pub const apple_s7: CpuModel = .{
@@ -2467,13 +2823,16 @@ pub const cpu = struct {
             .fp16fml,
             .fuse_aes,
             .fuse_crypto_eor,
+            .no_zcz_fpr64,
             .perfmon,
             .sha3,
             .store_pair_suppress,
             .v8_4a,
-            .zcm_fpr64,
+            .zcm_fpr128,
             .zcm_gpr64,
-            .zcz,
+            .zcz_fpr128,
+            .zcz_gpr32,
+            .zcz_gpr64,
         }),
     };
     pub const apple_s8: CpuModel = .{
@@ -2488,13 +2847,16 @@ pub const cpu = struct {
             .fp16fml,
             .fuse_aes,
             .fuse_crypto_eor,
+            .no_zcz_fpr64,
             .perfmon,
             .sha3,
             .store_pair_suppress,
             .v8_4a,
-            .zcm_fpr64,
+            .zcm_fpr128,
             .zcm_gpr64,
-            .zcz,
+            .zcz_fpr128,
+            .zcz_gpr32,
+            .zcz_gpr64,
         }),
     };
     pub const apple_s9: CpuModel = .{
@@ -2516,13 +2878,126 @@ pub const cpu = struct {
             .fuse_csel,
             .fuse_literals,
             .hcx,
+            .max_interleave_factor_4,
+            .no_zcz_fpr64,
             .perfmon,
             .sha3,
             .store_pair_suppress,
             .v8_6a,
-            .zcm_fpr64,
+            .zcm_fpr128,
             .zcm_gpr64,
-            .zcz,
+            .zcz_fpr128,
+            .zcz_gpr32,
+            .zcz_gpr64,
+        }),
+    };
+    pub const c1_nano: CpuModel = .{
+        .name = "c1_nano",
+        .llvm_name = "c1-nano",
+        .features = featureSet(&[_]Feature{
+            .chk,
+            .clrbhb,
+            .ete,
+            .fp16fml,
+            .fpac,
+            .fuse_adrp_add,
+            .fuse_aes,
+            .mte,
+            .perfmon,
+            .rcpc3,
+            .sme2,
+            .specres2,
+            .sve_bitperm,
+            .use_fixed_over_scalable_if_equal_cost,
+            .use_postra_scheduler,
+            .use_wzr_to_vec_move,
+            .v9_3a,
+        }),
+    };
+    pub const c1_premium: CpuModel = .{
+        .name = "c1_premium",
+        .llvm_name = "c1-premium",
+        .features = featureSet(&[_]Feature{
+            .alu_lsl_fast,
+            .avoid_ldapur,
+            .chk,
+            .clrbhb,
+            .enable_select_opt,
+            .ete,
+            .fp16fml,
+            .fpac,
+            .fuse_adrp_add,
+            .fuse_aes,
+            .fuse_csel,
+            .fuse_cset,
+            .mte,
+            .perfmon,
+            .predictable_select_expensive,
+            .rcpc3,
+            .sme2,
+            .spe,
+            .specres2,
+            .sve_bitperm,
+            .use_fixed_over_scalable_if_equal_cost,
+            .use_postra_scheduler,
+            .v9_3a,
+        }),
+    };
+    pub const c1_pro: CpuModel = .{
+        .name = "c1_pro",
+        .llvm_name = "c1-pro",
+        .features = featureSet(&[_]Feature{
+            .alu_lsl_fast,
+            .chk,
+            .clrbhb,
+            .cmp_bcc_fusion,
+            .enable_select_opt,
+            .ete,
+            .fp16fml,
+            .fpac,
+            .fuse_adrp_add,
+            .fuse_aes,
+            .fuse_csel,
+            .fuse_cset,
+            .mte,
+            .perfmon,
+            .predictable_select_expensive,
+            .rcpc3,
+            .sme2,
+            .spe,
+            .specres2,
+            .sve_bitperm,
+            .use_postra_scheduler,
+            .v9_3a,
+        }),
+    };
+    pub const c1_ultra: CpuModel = .{
+        .name = "c1_ultra",
+        .llvm_name = "c1-ultra",
+        .features = featureSet(&[_]Feature{
+            .alu_lsl_fast,
+            .avoid_ldapur,
+            .chk,
+            .clrbhb,
+            .enable_select_opt,
+            .ete,
+            .fp16fml,
+            .fpac,
+            .fuse_adrp_add,
+            .fuse_aes,
+            .fuse_csel,
+            .fuse_cset,
+            .mte,
+            .perfmon,
+            .predictable_select_expensive,
+            .rcpc3,
+            .sme2,
+            .spe,
+            .specres2,
+            .sve_bitperm,
+            .use_fixed_over_scalable_if_equal_cost,
+            .use_postra_scheduler,
+            .v9_3a,
         }),
     };
     pub const carmel: CpuModel = .{
@@ -2541,12 +3016,15 @@ pub const cpu = struct {
         .features = featureSet(&[_]Feature{
             .alu_lsl_fast,
             .bf16,
+            .disable_maximize_scalable_bandwidth,
             .enable_select_opt,
             .ete,
             .fp16fml,
             .fpac,
             .fuse_adrp_add,
             .fuse_aes,
+            .fuse_csel,
+            .fuse_cset,
             .i8mm,
             .mte,
             .perfmon,
@@ -2607,6 +3085,7 @@ pub const cpu = struct {
             .sve_bitperm,
             .use_fixed_over_scalable_if_equal_cost,
             .use_postra_scheduler,
+            .use_wzr_to_vec_move,
             .v9a,
         }),
     };
@@ -2624,6 +3103,7 @@ pub const cpu = struct {
             .sve_bitperm,
             .use_fixed_over_scalable_if_equal_cost,
             .use_postra_scheduler,
+            .use_wzr_to_vec_move,
             .v9_2a,
         }),
     };
@@ -2640,6 +3120,7 @@ pub const cpu = struct {
             .perfmon,
             .sve_bitperm,
             .use_postra_scheduler,
+            .use_wzr_to_vec_move,
             .v9_2a,
         }),
     };
@@ -2655,6 +3136,7 @@ pub const cpu = struct {
             .perfmon,
             .sha2,
             .use_postra_scheduler,
+            .use_wzr_to_vec_move,
             .v8a,
         }),
     };
@@ -2672,6 +3154,7 @@ pub const cpu = struct {
             .rcpc,
             .sha2,
             .use_postra_scheduler,
+            .use_wzr_to_vec_move,
             .v8_2a,
         }),
     };
@@ -2687,6 +3170,7 @@ pub const cpu = struct {
             .fuse_adrp_add,
             .fuse_aes,
             .fuse_literals,
+            .max_interleave_factor_4,
             .perfmon,
             .predictable_select_expensive,
             .sha2,
@@ -2747,6 +3231,8 @@ pub const cpu = struct {
             .fpac,
             .fuse_adrp_add,
             .fuse_aes,
+            .fuse_csel,
+            .fuse_cset,
             .i8mm,
             .mte,
             .perfmon,
@@ -2769,6 +3255,8 @@ pub const cpu = struct {
             .fpac,
             .fuse_adrp_add,
             .fuse_aes,
+            .fuse_csel,
+            .fuse_cset,
             .i8mm,
             .mte,
             .perfmon,
@@ -2808,6 +3296,8 @@ pub const cpu = struct {
             .fpac,
             .fuse_adrp_add,
             .fuse_aes,
+            .fuse_csel,
+            .fuse_cset,
             .mte,
             .perfmon,
             .predictable_select_expensive,
@@ -2829,6 +3319,8 @@ pub const cpu = struct {
             .fpac,
             .fuse_adrp_add,
             .fuse_aes,
+            .fuse_csel,
+            .fuse_cset,
             .mte,
             .perfmon,
             .predictable_select_expensive,
@@ -2850,6 +3342,8 @@ pub const cpu = struct {
             .fpac,
             .fuse_adrp_add,
             .fuse_aes,
+            .fuse_csel,
+            .fuse_cset,
             .mte,
             .perfmon,
             .predictable_select_expensive,
@@ -2967,6 +3461,8 @@ pub const cpu = struct {
             .fullfp16,
             .fuse_adrp_add,
             .fuse_aes,
+            .fuse_csel,
+            .fuse_cset,
             .perfmon,
             .predictable_select_expensive,
             .rcpc,
@@ -2990,6 +3486,8 @@ pub const cpu = struct {
             .fullfp16,
             .fuse_adrp_add,
             .fuse_aes,
+            .fuse_csel,
+            .fuse_cset,
             .perfmon,
             .predictable_select_expensive,
             .rcpc,
@@ -3014,6 +3512,8 @@ pub const cpu = struct {
             .fullfp16,
             .fuse_adrp_add,
             .fuse_aes,
+            .fuse_csel,
+            .fuse_cset,
             .pauth,
             .perfmon,
             .predictable_select_expensive,
@@ -3157,6 +3657,8 @@ pub const cpu = struct {
             .fpac,
             .fuse_adrp_add,
             .fuse_aes,
+            .fuse_csel,
+            .fuse_cset,
             .mte,
             .perfmon,
             .predictable_select_expensive,
@@ -3179,6 +3681,8 @@ pub const cpu = struct {
             .fpac,
             .fuse_adrp_add,
             .fuse_aes,
+            .fuse_csel,
+            .fuse_cset,
             .mte,
             .perfmon,
             .predictable_select_expensive,
@@ -3200,14 +3704,17 @@ pub const cpu = struct {
             .disable_latency_sched_heuristic,
             .fuse_aes,
             .fuse_crypto_eor,
+            .no_zcz_fpr64,
             .perfmon,
             .sha2,
             .store_pair_suppress,
             .v8a,
-            .zcm_fpr64,
+            .zcm_fpr128,
             .zcm_gpr64,
-            .zcz,
             .zcz_fp_workaround,
+            .zcz_fpr128,
+            .zcz_gpr32,
+            .zcz_gpr64,
         }),
     };
     pub const emag: CpuModel = .{
@@ -3267,6 +3774,7 @@ pub const cpu = struct {
             .fuse_aes,
             .fuse_csel,
             .fuse_literals,
+            .max_interleave_factor_4,
             .perfmon,
             .predictable_select_expensive,
             .sha2,
@@ -3293,12 +3801,14 @@ pub const cpu = struct {
             .fuse_arith_logic,
             .fuse_csel,
             .fuse_literals,
+            .max_interleave_factor_4,
             .perfmon,
             .sha2,
             .store_pair_suppress,
             .use_postra_scheduler,
             .v8_2a,
-            .zcz,
+            .zcz_gpr32,
+            .zcz_gpr64,
         }),
     };
     pub const exynos_m5: CpuModel = .{
@@ -3319,12 +3829,14 @@ pub const cpu = struct {
             .fuse_arith_logic,
             .fuse_csel,
             .fuse_literals,
+            .max_interleave_factor_4,
             .perfmon,
             .sha2,
             .store_pair_suppress,
             .use_postra_scheduler,
             .v8_2a,
-            .zcz,
+            .zcz_gpr32,
+            .zcz_gpr64,
         }),
     };
     pub const falkor: CpuModel = .{
@@ -3334,6 +3846,7 @@ pub const cpu = struct {
             .aes,
             .alu_lsl_fast,
             .crc,
+            .max_interleave_factor_4,
             .perfmon,
             .predictable_select_expensive,
             .rdm,
@@ -3342,7 +3855,8 @@ pub const cpu = struct {
             .store_pair_suppress,
             .use_postra_scheduler,
             .v8a,
-            .zcz,
+            .zcz_gpr32,
+            .zcz_gpr64,
         }),
     };
     pub const fujitsu_monaka: CpuModel = .{
@@ -3382,6 +3896,8 @@ pub const cpu = struct {
             .fpac,
             .fuse_adrp_add,
             .fuse_aes,
+            .fuse_csel,
+            .fuse_cset,
             .mte,
             .perfmon,
             .predictable_select_expensive,
@@ -3422,11 +3938,13 @@ pub const cpu = struct {
             .fpac,
             .fuse_adrp_add,
             .fuse_aes,
+            .fuse_csel,
+            .fuse_cset,
             .i8mm,
+            .max_interleave_factor_4,
             .mte,
             .perfmon,
             .predictable_select_expensive,
-            .rand,
             .spe,
             .sve_aes,
             .sve_bitperm,
@@ -3444,13 +3962,15 @@ pub const cpu = struct {
             .aes,
             .alu_lsl_fast,
             .crc,
+            .max_interleave_factor_4,
             .perfmon,
             .predictable_select_expensive,
             .sha2,
             .store_pair_suppress,
             .use_postra_scheduler,
             .v8a,
-            .zcz,
+            .zcz_gpr32,
+            .zcz_gpr64,
         }),
     };
     pub const neoverse_512tvb: CpuModel = .{
@@ -3467,6 +3987,7 @@ pub const cpu = struct {
             .fuse_adrp_add,
             .fuse_aes,
             .i8mm,
+            .max_interleave_factor_4,
             .perfmon,
             .predictable_select_expensive,
             .rand,
@@ -3524,12 +4045,15 @@ pub const cpu = struct {
         .features = featureSet(&[_]Feature{
             .alu_lsl_fast,
             .bf16,
+            .disable_maximize_scalable_bandwidth,
             .enable_select_opt,
             .ete,
             .fp16fml,
             .fpac,
             .fuse_adrp_add,
             .fuse_aes,
+            .fuse_csel,
+            .fuse_cset,
             .i8mm,
             .mte,
             .perfmon,
@@ -3550,6 +4074,8 @@ pub const cpu = struct {
             .fpac,
             .fuse_adrp_add,
             .fuse_aes,
+            .fuse_csel,
+            .fuse_cset,
             .mte,
             .perfmon,
             .predictable_select_expensive,
@@ -3569,10 +4095,13 @@ pub const cpu = struct {
             .alu_lsl_fast,
             .bf16,
             .ccdp,
+            .disable_maximize_scalable_bandwidth,
             .enable_select_opt,
             .fp16fml,
             .fuse_adrp_add,
             .fuse_aes,
+            .fuse_csel,
+            .fuse_cset,
             .i8mm,
             .no_sve_fp_ld1r,
             .perfmon,
@@ -3602,7 +4131,10 @@ pub const cpu = struct {
             .fpac,
             .fuse_adrp_add,
             .fuse_aes,
+            .fuse_csel,
+            .fuse_cset,
             .i8mm,
+            .max_interleave_factor_4,
             .mte,
             .perfmon,
             .predictable_select_expensive,
@@ -3627,7 +4159,10 @@ pub const cpu = struct {
             .fpac,
             .fuse_adrp_add,
             .fuse_aes,
+            .fuse_csel,
+            .fuse_cset,
             .ls64,
+            .max_interleave_factor_4,
             .mte,
             .perfmon,
             .predictable_select_expensive,
@@ -3651,7 +4186,10 @@ pub const cpu = struct {
             .fpac,
             .fuse_adrp_add,
             .fuse_aes,
+            .fuse_csel,
+            .fuse_cset,
             .ls64,
+            .max_interleave_factor_4,
             .mte,
             .perfmon,
             .predictable_select_expensive,
@@ -3700,6 +4238,7 @@ pub const cpu = struct {
             .fuse_adrp_add,
             .fuse_aes,
             .fuse_crypto_eor,
+            .max_interleave_factor_4,
             .perfmon,
             .rand,
             .sha3,
@@ -3715,6 +4254,7 @@ pub const cpu = struct {
         .features = featureSet(&[_]Feature{
             .aes,
             .alu_lsl_fast,
+            .max_interleave_factor_4,
             .perfmon,
             .predictable_select_expensive,
             .sha2,
@@ -3722,7 +4262,8 @@ pub const cpu = struct {
             .store_pair_suppress,
             .use_postra_scheduler,
             .v8_4a,
-            .zcz,
+            .zcz_gpr32,
+            .zcz_gpr64,
         }),
     };
     pub const thunderx: CpuModel = .{
@@ -3746,6 +4287,7 @@ pub const cpu = struct {
             .aes,
             .aggressive_fma,
             .arith_bcc_fusion,
+            .max_interleave_factor_4,
             .predictable_select_expensive,
             .sha2,
             .store_pair_suppress,
@@ -3761,6 +4303,7 @@ pub const cpu = struct {
             .aggressive_fma,
             .arith_bcc_fusion,
             .balance_fp_ops,
+            .max_interleave_factor_4,
             .perfmon,
             .predictable_select_expensive,
             .sha2,
diff --git a/lib/std/Target/amdgcn.zig b/lib/std/Target/amdgcn.zig
index 99a1244f48..924fc44359 100644
--- a/lib/std/Target/amdgcn.zig
+++ b/lib/std/Target/amdgcn.zig
@@ -5,12 +5,17 @@ const CpuFeature = std.Target.Cpu.Feature;
 const CpuModel = std.Target.Cpu.Model;
 
 pub const Feature = enum {
+    @"1024_addressable_vgprs",
     @"16_bit_insts",
+    @"45_bit_num_records_buffer_resource",
     @"64_bit_literals",
     a16,
+    add_min_max_insts,
     add_no_carry_insts,
+    add_sub_u64_insts,
     addressablelocalmemorysize163840,
     addressablelocalmemorysize32768,
+    addressablelocalmemorysize327680,
     addressablelocalmemorysize65536,
     agent_scope_fine_grained_remote_memory_atomics,
     allocate1_5xvgprs,
@@ -18,6 +23,7 @@ pub const Feature = enum {
     architected_flat_scratch,
     architected_sgprs,
     ashr_pk_insts,
+    assembler_permissive_wavesize,
     atomic_buffer_global_pk_add_f16_insts,
     atomic_buffer_global_pk_add_f16_no_rtn_insts,
     atomic_buffer_pk_add_bf16_inst,
@@ -34,15 +40,22 @@ pub const Feature = enum {
     auto_waitcnt_before_barrier,
     back_off_barrier,
     bf16_cvt_insts,
+    bf16_pk_insts,
     bf16_trans_insts,
     bf8_cvt_scale_insts,
     bitop3_insts,
     block_vgpr_csr,
     bvh_dual_bvh_8_insts,
     ci_insts,
+    clusters,
+    cube_insts,
     cumode,
     cvt_fp8_vop1_bug,
+    cvt_norm_insts,
     cvt_pk_f16_f32_inst,
+    cvt_pknorm_vop2_insts,
+    cvt_pknorm_vop3_insts,
+    d16_write_vgpr32,
     default_component_broadcast,
     default_component_zero,
     dl_insts,
@@ -65,8 +78,7 @@ pub const Feature = enum {
     dpp_src1_sgpr,
     ds128,
     ds_src2_insts,
-    dynamic_vgpr,
-    dynamic_vgpr_block_size_32,
+    emulated_system_scope_atomics,
     extended_image_insts,
     f16bf16_to_fp6bf6_cvt_scale_insts,
     f32_to_f16bf16_cvt_sr_insts,
@@ -77,10 +89,12 @@ pub const Feature = enum {
     flat_buffer_global_fadd_f64_inst,
     flat_for_global,
     flat_global_insts,
+    flat_gvs_mode,
     flat_inst_offsets,
     flat_scratch,
     flat_scratch_insts,
     flat_segment_offset_bug,
+    fma_mix_bf16_insts,
     fma_mix_insts,
     fmacf64_inst,
     fmaf,
@@ -113,6 +127,7 @@ pub const Feature = enum {
     gfx940_insts,
     gfx950_insts,
     gfx9_insts,
+    globally_addressable_scratch,
     gws,
     half_rate_64_ops,
     ieee_minimum_maximum_insts,
@@ -128,20 +143,24 @@ pub const Feature = enum {
     lds_misaligned_bug,
     ldsbankcount16,
     ldsbankcount32,
+    lerp_inst,
     load_store_opt,
     lshl_add_u64_inst,
     mad_intra_fwd_bug,
     mad_mac_f32_insts,
     mad_mix_insts,
+    mad_u32_inst,
     mai_insts,
     max_hard_clause_length_32,
     max_hard_clause_length_63,
     max_private_element_size_16,
     max_private_element_size_4,
     max_private_element_size_8,
+    mcast_load_insts,
     memory_atomic_fadd_f32_denormal_support,
     mfma_inline_literal_bug,
     mimg_r128,
+    min3_max3_pkf16,
     minimum3_maximum3_f16,
     minimum3_maximum3_f32,
     minimum3_maximum3_pkf16,
@@ -160,6 +179,7 @@ pub const Feature = enum {
     partial_nsa_encoding,
     permlane16_swap,
     permlane32_swap,
+    pk_add_min_max_insts,
     pk_fmac_f16_inst,
     point_sample_accel,
     precise_memory,
@@ -168,6 +188,7 @@ pub const Feature = enum {
     promote_alloca,
     prt_strict_null,
     pseudo_scalar_trans,
+    qsad_insts,
     r128_a16,
     real_true16,
     relaxed_buffer_oob_mode,
@@ -176,6 +197,9 @@ pub const Feature = enum {
     restricted_soffset,
     s_memrealtime,
     s_memtime_inst,
+    s_wakeup_barrier_inst,
+    sad_insts,
+    safe_cu_prefetch,
     safe_smem_prefetch,
     salu_float,
     scalar_atomics,
@@ -190,6 +214,7 @@ pub const Feature = enum {
     sdwa_sdst,
     sea_islands,
     setprio_inc_wg_inst,
+    setreg_vgpr_msb_fixup,
     sgpr_init_bug,
     shader_cycles_hi_lo_registers,
     shader_cycles_register,
@@ -198,6 +223,8 @@ pub const Feature = enum {
     southern_islands,
     sramecc,
     sramecc_support,
+    tanh_insts,
+    tensor_cvt_lut_insts,
     tgsplit,
     transpose_load_f4f6_insts,
     trap_handler,
@@ -213,7 +240,9 @@ pub const Feature = enum {
     valu_trans_use_hazard,
     vcmpx_exec_war_hazard,
     vcmpx_permlane_hazard,
+    vgpr_align2,
     vgpr_index_mode,
+    vmem_pref_insts,
     vmem_to_lds_load_insts,
     vmem_to_scalar_write_hazard,
     vmem_write_vgpr_in_order,
@@ -223,6 +252,7 @@ pub const Feature = enum {
     vopd,
     vscnt,
     wait_xcnt,
+    waits_before_system_scope_stores,
     wavefrontsize16,
     wavefrontsize32,
     wavefrontsize64,
@@ -241,11 +271,21 @@ pub const all_features = blk: {
     const len = @typeInfo(Feature).@"enum".fields.len;
     std.debug.assert(len <= CpuFeature.Set.needed_bit_count);
     var result: [len]CpuFeature = undefined;
+    result[@intFromEnum(Feature.@"1024_addressable_vgprs")] = .{
+        .llvm_name = "1024-addressable-vgprs",
+        .description = "Has 1024 addressable VGPRs",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.@"16_bit_insts")] = .{
         .llvm_name = "16-bit-insts",
         .description = "Has i16/f16 instructions",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.@"45_bit_num_records_buffer_resource")] = .{
+        .llvm_name = "45-bit-num-records-buffer-resource",
+        .description = "The buffer resource (V#) supports 45-bit num_records",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.@"64_bit_literals")] = .{
         .llvm_name = "64-bit-literals",
         .description = "Can use 64-bit literals with single DWORD instructions",
@@ -256,11 +296,21 @@ pub const all_features = blk: {
         .description = "Support A16 for 16-bit coordinates/gradients/lod/clamp/mip image operands",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.add_min_max_insts)] = .{
+        .llvm_name = "add-min-max-insts",
+        .description = "Has v_add_{min|max}_{i|u}32 instructions",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.add_no_carry_insts)] = .{
         .llvm_name = "add-no-carry-insts",
         .description = "Have VALU add/sub instructions without carry out",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.add_sub_u64_insts)] = .{
+        .llvm_name = "add-sub-u64-insts",
+        .description = "Has v_add_u64 and v_sub_u64 instructions",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.addressablelocalmemorysize163840)] = .{
         .llvm_name = "addressablelocalmemorysize163840",
         .description = "The size of local memory in bytes",
@@ -271,6 +321,11 @@ pub const all_features = blk: {
         .description = "The size of local memory in bytes",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.addressablelocalmemorysize327680)] = .{
+        .llvm_name = "addressablelocalmemorysize327680",
+        .description = "The size of local memory in bytes",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.addressablelocalmemorysize65536)] = .{
         .llvm_name = "addressablelocalmemorysize65536",
         .description = "The size of local memory in bytes",
@@ -306,6 +361,11 @@ pub const all_features = blk: {
         .description = "Has Arithmetic Shift Pack instructions",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.assembler_permissive_wavesize)] = .{
+        .llvm_name = "assembler-permissive-wavesize",
+        .description = "allow parsing wave32 and wave64 variants of instructions",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.atomic_buffer_global_pk_add_f16_insts)] = .{
         .llvm_name = "atomic-buffer-global-pk-add-f16-insts",
         .description = "Has buffer_atomic_pk_add_f16 and global_atomic_pk_add_f16 instructions that can return original value",
@@ -357,12 +417,16 @@ pub const all_features = blk: {
     result[@intFromEnum(Feature.atomic_fmin_fmax_flat_f32)] = .{
         .llvm_name = "atomic-fmin-fmax-flat-f32",
         .description = "Has flat memory instructions for atomicrmw fmin/fmax for float",
-        .dependencies = featureSet(&[_]Feature{}),
+        .dependencies = featureSet(&[_]Feature{
+            .flat_address_space,
+        }),
     };
     result[@intFromEnum(Feature.atomic_fmin_fmax_flat_f64)] = .{
         .llvm_name = "atomic-fmin-fmax-flat-f64",
         .description = "Has flat memory instructions for atomicrmw fmin/fmax for double",
-        .dependencies = featureSet(&[_]Feature{}),
+        .dependencies = featureSet(&[_]Feature{
+            .flat_address_space,
+        }),
     };
     result[@intFromEnum(Feature.atomic_fmin_fmax_global_f32)] = .{
         .llvm_name = "atomic-fmin-fmax-global-f32",
@@ -396,6 +460,11 @@ pub const all_features = blk: {
         .description = "Has bf16 conversion instructions",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.bf16_pk_insts)] = .{
+        .llvm_name = "bf16-pk-insts",
+        .description = "Has bf16 packed instructions (fma, add, mul, max, min)",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.bf16_trans_insts)] = .{
         .llvm_name = "bf16-trans-insts",
         .description = "Has bf16 transcendental instructions",
@@ -426,6 +495,16 @@ pub const all_features = blk: {
         .description = "Additional instructions for CI+",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.clusters)] = .{
+        .llvm_name = "clusters",
+        .description = "Has clusters of workgroups support",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
+    result[@intFromEnum(Feature.cube_insts)] = .{
+        .llvm_name = "cube-insts",
+        .description = "Has v_cube* instructions",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.cumode)] = .{
         .llvm_name = "cumode",
         .description = "Enable CU wavefront execution mode",
@@ -438,11 +517,31 @@ pub const all_features = blk: {
             .fp8_conversion_insts,
         }),
     };
+    result[@intFromEnum(Feature.cvt_norm_insts)] = .{
+        .llvm_name = "cvt-norm-insts",
+        .description = "Has v_cvt_norm* instructions",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.cvt_pk_f16_f32_inst)] = .{
         .llvm_name = "cvt-pk-f16-f32-inst",
         .description = "Has cvt_pk_f16_f32 instruction",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.cvt_pknorm_vop2_insts)] = .{
+        .llvm_name = "cvt-pknorm-vop2-insts",
+        .description = "Has v_cvt_pk_norm_*f32 instructions/Has v_cvt_pk_norm_*_f16 instructions",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
+    result[@intFromEnum(Feature.cvt_pknorm_vop3_insts)] = .{
+        .llvm_name = "cvt-pknorm-vop3-insts",
+        .description = "Has v_cvt_pk_norm_*f32 instructions/Has v_cvt_pk_norm_*_f16 instructions",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
+    result[@intFromEnum(Feature.d16_write_vgpr32)] = .{
+        .llvm_name = "d16-write-vgpr32",
+        .description = "D16 instructions potentially have 32-bit data dependencies",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.default_component_broadcast)] = .{
         .llvm_name = "default-component-broadcast",
         .description = "BUFFER/IMAGE store instructions set unspecified components to x component (GFX12)",
@@ -553,14 +652,9 @@ pub const all_features = blk: {
         .description = "Has ds_*_src2 instructions",
         .dependencies = featureSet(&[_]Feature{}),
     };
-    result[@intFromEnum(Feature.dynamic_vgpr)] = .{
-        .llvm_name = "dynamic-vgpr",
-        .description = "Enable dynamic VGPR mode",
-        .dependencies = featureSet(&[_]Feature{}),
-    };
-    result[@intFromEnum(Feature.dynamic_vgpr_block_size_32)] = .{
-        .llvm_name = "dynamic-vgpr-block-size-32",
-        .description = "Use a block size of 32 for dynamic VGPR allocation (default is 16)",
+    result[@intFromEnum(Feature.emulated_system_scope_atomics)] = .{
+        .llvm_name = "emulated-system-scope-atomics",
+        .description = "System scope atomics unsupported by the PCI-e are emulated in HW via CAS loop and functional.",
         .dependencies = featureSet(&[_]Feature{}),
     };
     result[@intFromEnum(Feature.extended_image_insts)] = .{
@@ -596,7 +690,9 @@ pub const all_features = blk: {
     result[@intFromEnum(Feature.flat_atomic_fadd_f32_inst)] = .{
         .llvm_name = "flat-atomic-fadd-f32-inst",
         .description = "Has flat_atomic_add_f32 instruction",
-        .dependencies = featureSet(&[_]Feature{}),
+        .dependencies = featureSet(&[_]Feature{
+            .flat_address_space,
+        }),
     };
     result[@intFromEnum(Feature.flat_buffer_global_fadd_f64_inst)] = .{
         .llvm_name = "flat-buffer-global-fadd-f64-inst",
@@ -611,7 +707,16 @@ pub const all_features = blk: {
     result[@intFromEnum(Feature.flat_global_insts)] = .{
         .llvm_name = "flat-global-insts",
         .description = "Have global_* flat memory instructions",
-        .dependencies = featureSet(&[_]Feature{}),
+        .dependencies = featureSet(&[_]Feature{
+            .flat_address_space,
+        }),
+    };
+    result[@intFromEnum(Feature.flat_gvs_mode)] = .{
+        .llvm_name = "flat-gvs-mode",
+        .description = "Have GVS addressing mode with flat_* instructions",
+        .dependencies = featureSet(&[_]Feature{
+            .flat_address_space,
+        }),
     };
     result[@intFromEnum(Feature.flat_inst_offsets)] = .{
         .llvm_name = "flat-inst-offsets",
@@ -626,13 +731,20 @@ pub const all_features = blk: {
     result[@intFromEnum(Feature.flat_scratch_insts)] = .{
         .llvm_name = "flat-scratch-insts",
         .description = "Have scratch_* flat memory instructions",
-        .dependencies = featureSet(&[_]Feature{}),
+        .dependencies = featureSet(&[_]Feature{
+            .flat_address_space,
+        }),
     };
     result[@intFromEnum(Feature.flat_segment_offset_bug)] = .{
         .llvm_name = "flat-segment-offset-bug",
         .description = "GFX10 bug where inst_offset is ignored when flat instructions access global memory",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.fma_mix_bf16_insts)] = .{
+        .llvm_name = "fma-mix-bf16-insts",
+        .description = "Has v_fma_mix_f32_bf16, v_fma_mixlo_bf16, v_fma_mixhi_bf16 instructions",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.fma_mix_insts)] = .{
         .llvm_name = "fma-mix-insts",
         .description = "Has v_fma_mix_f32, v_fma_mixlo_f16, v_fma_mixhi_f16 instructions",
@@ -722,13 +834,16 @@ pub const all_features = blk: {
             .atomic_fmin_fmax_global_f32,
             .atomic_fmin_fmax_global_f64,
             .ci_insts,
+            .cube_insts,
+            .cvt_norm_insts,
+            .cvt_pknorm_vop2_insts,
+            .cvt_pknorm_vop3_insts,
             .default_component_zero,
             .dpp,
             .dpp8,
             .extended_image_insts,
             .fast_denormal_f32,
             .fast_fmaf,
-            .flat_address_space,
             .flat_global_insts,
             .flat_inst_offsets,
             .flat_scratch_insts,
@@ -743,14 +858,17 @@ pub const all_features = blk: {
             .image_insts,
             .int_clamp_insts,
             .inv_2pi_inline_imm,
+            .lerp_inst,
             .max_hard_clause_length_63,
             .mimg_r128,
             .movrel,
             .no_data_dep_hazard,
             .no_sdst_cmpx,
             .pk_fmac_f16_inst,
+            .qsad_insts,
             .s_memrealtime,
             .s_memtime_inst,
+            .sad_insts,
             .sdwa,
             .sdwa_omod,
             .sdwa_scalar,
@@ -797,13 +915,16 @@ pub const all_features = blk: {
             .atomic_fmin_fmax_flat_f32,
             .atomic_fmin_fmax_global_f32,
             .ci_insts,
+            .cube_insts,
+            .cvt_norm_insts,
+            .cvt_pknorm_vop2_insts,
+            .cvt_pknorm_vop3_insts,
             .default_component_zero,
             .dpp,
             .dpp8,
             .extended_image_insts,
             .fast_denormal_f32,
             .fast_fmaf,
-            .flat_address_space,
             .flat_global_insts,
             .flat_inst_offsets,
             .flat_scratch_insts,
@@ -821,12 +942,15 @@ pub const all_features = blk: {
             .gws,
             .int_clamp_insts,
             .inv_2pi_inline_imm,
+            .lerp_inst,
             .max_hard_clause_length_32,
             .mimg_r128,
             .movrel,
             .no_data_dep_hazard,
             .no_sdst_cmpx,
             .pk_fmac_f16_inst,
+            .qsad_insts,
+            .sad_insts,
             .true16,
             .unaligned_buffer_access,
             .unaligned_ds_access,
@@ -850,7 +974,6 @@ pub const all_features = blk: {
             .@"16_bit_insts",
             .a16,
             .add_no_carry_insts,
-            .addressablelocalmemorysize65536,
             .agent_scope_fine_grained_remote_memory_atomics,
             .aperture_regs,
             .atomic_fmin_fmax_flat_f32,
@@ -861,7 +984,6 @@ pub const all_features = blk: {
             .dpp8,
             .fast_denormal_f32,
             .fast_fmaf,
-            .flat_address_space,
             .flat_global_insts,
             .flat_inst_offsets,
             .flat_scratch_insts,
@@ -926,11 +1048,14 @@ pub const all_features = blk: {
             .add_no_carry_insts,
             .aperture_regs,
             .ci_insts,
+            .cube_insts,
+            .cvt_norm_insts,
+            .cvt_pknorm_vop2_insts,
+            .cvt_pknorm_vop3_insts,
             .default_component_zero,
             .dpp,
             .fast_denormal_f32,
             .fast_fmaf,
-            .flat_address_space,
             .flat_global_insts,
             .flat_inst_offsets,
             .flat_scratch_insts,
@@ -942,10 +1067,13 @@ pub const all_features = blk: {
             .gws,
             .int_clamp_insts,
             .inv_2pi_inline_imm,
+            .lerp_inst,
             .negative_scratch_offset_bug,
+            .qsad_insts,
             .r128_a16,
             .s_memrealtime,
             .s_memtime_inst,
+            .sad_insts,
             .scalar_atomics,
             .scalar_flat_scratch_insts,
             .scalar_stores,
@@ -997,6 +1125,11 @@ pub const all_features = blk: {
         .description = "Additional instructions for GFX9+",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.globally_addressable_scratch)] = .{
+        .llvm_name = "globally-addressable-scratch",
+        .description = "FLAT instructions can access scratch memory for any thread in any wave",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.gws)] = .{
         .llvm_name = "gws",
         .description = "Has Global Wave Sync",
@@ -1072,6 +1205,11 @@ pub const all_features = blk: {
         .description = "The number of LDS banks per compute unit.",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.lerp_inst)] = .{
+        .llvm_name = "lerp-inst",
+        .description = "Has v_lerp_u8 instruction",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.load_store_opt)] = .{
         .llvm_name = "load-store-opt",
         .description = "Enable SI load/store optimizer pass",
@@ -1097,6 +1235,11 @@ pub const all_features = blk: {
         .description = "Has v_mad_mix_f32, v_mad_mixlo_f16, v_mad_mixhi_f16 instructions",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.mad_u32_inst)] = .{
+        .llvm_name = "mad-u32-inst",
+        .description = "Has v_mad_u32 instruction",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.mai_insts)] = .{
         .llvm_name = "mai-insts",
         .description = "Has mAI instructions",
@@ -1127,6 +1270,11 @@ pub const all_features = blk: {
         .description = "Maximum private access size may be 8",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.mcast_load_insts)] = .{
+        .llvm_name = "mcast-load-insts",
+        .description = "Has multicast load instructions",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.memory_atomic_fadd_f32_denormal_support)] = .{
         .llvm_name = "memory-atomic-fadd-f32-denormal-support",
         .description = "global/flat/buffer atomic fadd for float supports denormal handling",
@@ -1142,6 +1290,11 @@ pub const all_features = blk: {
         .description = "Support 128-bit texture resources",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.min3_max3_pkf16)] = .{
+        .llvm_name = "min3-max3-pkf16",
+        .description = "Has v_pk_min3_num_f16 and v_pk_max3_num_f16 instructions",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.minimum3_maximum3_f16)] = .{
         .llvm_name = "minimum3-maximum3-f16",
         .description = "Has v_minimum3_f16 and v_maximum3_f16 instructions",
@@ -1232,6 +1385,11 @@ pub const all_features = blk: {
         .description = "Has v_permlane32_swap_b32 instructions",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.pk_add_min_max_insts)] = .{
+        .llvm_name = "pk-add-min-max-insts",
+        .description = "Has v_pk_add_{min|max}_{i|u}16 instructions",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.pk_fmac_f16_inst)] = .{
         .llvm_name = "pk-fmac-f16-inst",
         .description = "Has v_pk_fmac_f16 instruction",
@@ -1272,6 +1430,11 @@ pub const all_features = blk: {
         .description = "Has Pseudo Scalar Transcendental instructions",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.qsad_insts)] = .{
+        .llvm_name = "qsad-insts",
+        .description = "Has v_qsad* instructions",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.r128_a16)] = .{
         .llvm_name = "r128-a16",
         .description = "Support gfx9-style A16 for 16-bit coordinates/gradients/lod/clamp/mip image operands, where a16 is aliased with r128",
@@ -1312,6 +1475,21 @@ pub const all_features = blk: {
         .description = "Has s_memtime instruction",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.s_wakeup_barrier_inst)] = .{
+        .llvm_name = "s-wakeup-barrier-inst",
+        .description = "Has s_wakeup_barrier instruction.",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
+    result[@intFromEnum(Feature.sad_insts)] = .{
+        .llvm_name = "sad-insts",
+        .description = "Has v_sad* instructions",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
+    result[@intFromEnum(Feature.safe_cu_prefetch)] = .{
+        .llvm_name = "safe-cu-prefetch",
+        .description = "VMEM CU scope prefetches do not fail on illegal address",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.safe_smem_prefetch)] = .{
         .llvm_name = "safe-smem-prefetch",
         .description = "SMEM prefetches do not fail on illegal address",
@@ -1382,19 +1560,23 @@ pub const all_features = blk: {
             .atomic_fmin_fmax_global_f32,
             .atomic_fmin_fmax_global_f64,
             .ci_insts,
+            .cube_insts,
+            .cvt_pknorm_vop2_insts,
             .default_component_zero,
             .ds_src2_insts,
             .extended_image_insts,
-            .flat_address_space,
             .fp64,
             .gds,
             .gfx7_gfx8_gfx9_insts,
             .gws,
             .image_insts,
+            .lerp_inst,
             .mad_mac_f32_insts,
             .mimg_r128,
             .movrel,
+            .qsad_insts,
             .s_memtime_inst,
+            .sad_insts,
             .trig_reduced_range,
             .unaligned_buffer_access,
             .vmem_write_vgpr_in_order,
@@ -1406,6 +1588,11 @@ pub const all_features = blk: {
         .description = "Has s_setprio_inc_wg instruction.",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.setreg_vgpr_msb_fixup)] = .{
+        .llvm_name = "setreg-vgpr-msb-fixup",
+        .description = "S_SETREG to MODE clobbers VGPR MSB bits, requires fixup",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.sgpr_init_bug)] = .{
         .llvm_name = "sgpr-init-bug",
         .description = "VI SGPR initialization bug requiring a fixed SGPR allocation size",
@@ -1438,6 +1625,8 @@ pub const all_features = blk: {
             .addressablelocalmemorysize32768,
             .atomic_fmin_fmax_global_f32,
             .atomic_fmin_fmax_global_f64,
+            .cube_insts,
+            .cvt_pknorm_vop2_insts,
             .default_component_zero,
             .ds_src2_insts,
             .extended_image_insts,
@@ -1446,10 +1635,12 @@ pub const all_features = blk: {
             .gws,
             .image_insts,
             .ldsbankcount32,
+            .lerp_inst,
             .mad_mac_f32_insts,
             .mimg_r128,
             .movrel,
             .s_memtime_inst,
+            .sad_insts,
             .trig_reduced_range,
             .vmem_write_vgpr_in_order,
             .wavefrontsize64,
@@ -1465,6 +1656,16 @@ pub const all_features = blk: {
         .description = "Hardware supports SRAMECC",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.tanh_insts)] = .{
+        .llvm_name = "tanh-insts",
+        .description = "Has v_tanh_f32/f16 instructions",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
+    result[@intFromEnum(Feature.tensor_cvt_lut_insts)] = .{
+        .llvm_name = "tensor-cvt-lut-insts",
+        .description = "Has v_perm_pk16* instructions",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.tgsplit)] = .{
         .llvm_name = "tgsplit",
         .description = "Enable threadgroup split execution",
@@ -1540,11 +1741,21 @@ pub const all_features = blk: {
         .description = "TODO: describe me",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.vgpr_align2)] = .{
+        .llvm_name = "vgpr-align2",
+        .description = "VGPR and AGPR tuple operands require even alignment",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.vgpr_index_mode)] = .{
         .llvm_name = "vgpr-index-mode",
         .description = "Has VGPR mode register indexing",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.vmem_pref_insts)] = .{
+        .llvm_name = "vmem-pref-insts",
+        .description = "Has flat_prefect_b8 and global_prefetch_b8 instructions",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.vmem_to_lds_load_insts)] = .{
         .llvm_name = "vmem-to-lds-load-insts",
         .description = "The platform has memory to lds instructions (global_load w/lds bit set, buffer_load w/lds bit set or global_load_lds. This does not include scratch_load_lds.",
@@ -1567,6 +1778,8 @@ pub const all_features = blk: {
             .@"16_bit_insts",
             .addressablelocalmemorysize65536,
             .ci_insts,
+            .cube_insts,
+            .cvt_pknorm_vop2_insts,
             .default_component_zero,
             .dpp,
             .ds_src2_insts,
@@ -1582,11 +1795,14 @@ pub const all_features = blk: {
             .image_insts,
             .int_clamp_insts,
             .inv_2pi_inline_imm,
+            .lerp_inst,
             .mad_mac_f32_insts,
             .mimg_r128,
             .movrel,
+            .qsad_insts,
             .s_memrealtime,
             .s_memtime_inst,
+            .sad_insts,
             .scalar_stores,
             .sdwa,
             .sdwa_mav,
@@ -1623,6 +1839,11 @@ pub const all_features = blk: {
         .description = "Has s_wait_xcnt instruction",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.waits_before_system_scope_stores)] = .{
+        .llvm_name = "waits-before-system-scope-stores",
+        .description = "Target requires waits for loads and atomics before system scope stores",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.wavefrontsize16)] = .{
         .llvm_name = "wavefrontsize16",
         .description = "The number of threads per wavefront",
@@ -2044,6 +2265,8 @@ pub const cpu = struct {
             .architected_flat_scratch,
             .atomic_fadd_no_rtn_insts,
             .atomic_fadd_rtn_insts,
+            .back_off_barrier,
+            .d16_write_vgpr32,
             .dl_insts,
             .dot10_insts,
             .dot12_insts,
@@ -2077,6 +2300,8 @@ pub const cpu = struct {
             .architected_flat_scratch,
             .atomic_fadd_no_rtn_insts,
             .atomic_fadd_rtn_insts,
+            .back_off_barrier,
+            .d16_write_vgpr32,
             .dl_insts,
             .dot10_insts,
             .dot12_insts,
@@ -2108,6 +2333,8 @@ pub const cpu = struct {
             .architected_flat_scratch,
             .atomic_fadd_no_rtn_insts,
             .atomic_fadd_rtn_insts,
+            .back_off_barrier,
+            .d16_write_vgpr32,
             .dl_insts,
             .dot10_insts,
             .dot12_insts,
@@ -2140,6 +2367,8 @@ pub const cpu = struct {
             .architected_flat_scratch,
             .atomic_fadd_no_rtn_insts,
             .atomic_fadd_rtn_insts,
+            .back_off_barrier,
+            .d16_write_vgpr32,
             .dl_insts,
             .dot10_insts,
             .dot12_insts,
@@ -2171,6 +2400,8 @@ pub const cpu = struct {
             .architected_flat_scratch,
             .atomic_fadd_no_rtn_insts,
             .atomic_fadd_rtn_insts,
+            .back_off_barrier,
+            .d16_write_vgpr32,
             .dl_insts,
             .dot10_insts,
             .dot12_insts,
@@ -2188,6 +2419,7 @@ pub const cpu = struct {
             .packed_tid,
             .partial_nsa_encoding,
             .point_sample_accel,
+            .real_true16,
             .required_export_priority,
             .salu_float,
             .shader_cycles_register,
@@ -2202,6 +2434,8 @@ pub const cpu = struct {
             .architected_flat_scratch,
             .atomic_fadd_no_rtn_insts,
             .atomic_fadd_rtn_insts,
+            .back_off_barrier,
+            .d16_write_vgpr32,
             .dl_insts,
             .dot10_insts,
             .dot12_insts,
@@ -2219,6 +2453,7 @@ pub const cpu = struct {
             .packed_tid,
             .partial_nsa_encoding,
             .point_sample_accel,
+            .real_true16,
             .required_export_priority,
             .salu_float,
             .shader_cycles_register,
@@ -2232,6 +2467,8 @@ pub const cpu = struct {
             .architected_flat_scratch,
             .atomic_fadd_no_rtn_insts,
             .atomic_fadd_rtn_insts,
+            .back_off_barrier,
+            .d16_write_vgpr32,
             .dl_insts,
             .dot10_insts,
             .dot12_insts,
@@ -2249,6 +2486,7 @@ pub const cpu = struct {
             .packed_tid,
             .partial_nsa_encoding,
             .point_sample_accel,
+            .real_true16,
             .required_export_priority,
             .salu_float,
             .shader_cycles_register,
@@ -2262,6 +2500,8 @@ pub const cpu = struct {
             .architected_flat_scratch,
             .atomic_fadd_no_rtn_insts,
             .atomic_fadd_rtn_insts,
+            .back_off_barrier,
+            .d16_write_vgpr32,
             .dl_insts,
             .dot10_insts,
             .dot12_insts,
@@ -2278,6 +2518,7 @@ pub const cpu = struct {
             .nsa_encoding,
             .packed_tid,
             .partial_nsa_encoding,
+            .real_true16,
             .required_export_priority,
             .salu_float,
             .shader_cycles_register,
@@ -2291,6 +2532,8 @@ pub const cpu = struct {
             .architected_flat_scratch,
             .atomic_fadd_no_rtn_insts,
             .atomic_fadd_rtn_insts,
+            .back_off_barrier,
+            .d16_write_vgpr32,
             .dl_insts,
             .dot10_insts,
             .dot12_insts,
@@ -2309,6 +2552,7 @@ pub const cpu = struct {
             .packed_tid,
             .partial_nsa_encoding,
             .priv_enabled_trap2_nop_bug,
+            .real_true16,
             .required_export_priority,
             .requires_cov6,
             .shader_cycles_register,
@@ -2321,6 +2565,7 @@ pub const cpu = struct {
         .name = "gfx1200",
         .llvm_name = "gfx1200",
         .features = featureSet(&[_]Feature{
+            .addressablelocalmemorysize65536,
             .allocate1_5xvgprs,
             .architected_flat_scratch,
             .architected_sgprs,
@@ -2331,7 +2576,13 @@ pub const cpu = struct {
             .atomic_fadd_rtn_insts,
             .atomic_flat_pk_add_16_insts,
             .atomic_global_pk_add_bf16_inst,
+            .back_off_barrier,
             .bvh_dual_bvh_8_insts,
+            .cube_insts,
+            .cvt_norm_insts,
+            .cvt_pknorm_vop2_insts,
+            .cvt_pknorm_vop3_insts,
+            .d16_write_vgpr32,
             .dl_insts,
             .dot10_insts,
             .dot11_insts,
@@ -2346,22 +2597,27 @@ pub const cpu = struct {
             .gfx12,
             .image_insts,
             .ldsbankcount32,
+            .lerp_inst,
             .memory_atomic_fadd_f32_denormal_support,
             .nsa_encoding,
             .packed_tid,
             .partial_nsa_encoding,
             .pseudo_scalar_trans,
+            .qsad_insts,
             .restricted_soffset,
+            .sad_insts,
             .salu_float,
             .scalar_dwordx3_loads,
             .shader_cycles_hi_lo_registers,
             .vcmpx_permlane_hazard,
+            .waits_before_system_scope_stores,
         }),
     };
     pub const gfx1201: CpuModel = .{
         .name = "gfx1201",
         .llvm_name = "gfx1201",
         .features = featureSet(&[_]Feature{
+            .addressablelocalmemorysize65536,
             .allocate1_5xvgprs,
             .architected_flat_scratch,
             .architected_sgprs,
@@ -2372,7 +2628,13 @@ pub const cpu = struct {
             .atomic_fadd_rtn_insts,
             .atomic_flat_pk_add_16_insts,
             .atomic_global_pk_add_bf16_inst,
+            .back_off_barrier,
             .bvh_dual_bvh_8_insts,
+            .cube_insts,
+            .cvt_norm_insts,
+            .cvt_pknorm_vop2_insts,
+            .cvt_pknorm_vop3_insts,
+            .d16_write_vgpr32,
             .dl_insts,
             .dot10_insts,
             .dot11_insts,
@@ -2387,23 +2649,32 @@ pub const cpu = struct {
             .gfx12,
             .image_insts,
             .ldsbankcount32,
+            .lerp_inst,
             .memory_atomic_fadd_f32_denormal_support,
             .nsa_encoding,
             .packed_tid,
             .partial_nsa_encoding,
             .pseudo_scalar_trans,
+            .qsad_insts,
             .restricted_soffset,
+            .sad_insts,
             .salu_float,
             .scalar_dwordx3_loads,
             .shader_cycles_hi_lo_registers,
             .vcmpx_permlane_hazard,
+            .waits_before_system_scope_stores,
         }),
     };
     pub const gfx1250: CpuModel = .{
         .name = "gfx1250",
         .llvm_name = "gfx1250",
         .features = featureSet(&[_]Feature{
+            .@"1024_addressable_vgprs",
+            .@"45_bit_num_records_buffer_resource",
             .@"64_bit_literals",
+            .add_min_max_insts,
+            .add_sub_u64_insts,
+            .addressablelocalmemorysize327680,
             .architected_flat_scratch,
             .architected_sgprs,
             .ashr_pk_insts,
@@ -2417,49 +2688,164 @@ pub const cpu = struct {
             .atomic_fmin_fmax_global_f64,
             .atomic_global_pk_add_bf16_inst,
             .bf16_cvt_insts,
+            .bf16_pk_insts,
             .bf16_trans_insts,
             .bitop3_insts,
+            .clusters,
+            .cube_insts,
             .cumode,
+            .cvt_norm_insts,
             .cvt_pk_f16_f32_inst,
+            .cvt_pknorm_vop2_insts,
+            .cvt_pknorm_vop3_insts,
+            .d16_write_vgpr32,
             .dl_insts,
             .dot7_insts,
             .dot8_insts,
             .dpp_src1_sgpr,
+            .emulated_system_scope_atomics,
             .flat_atomic_fadd_f32_inst,
             .flat_buffer_global_fadd_f64_inst,
+            .flat_gvs_mode,
+            .fma_mix_bf16_insts,
             .fmacf64_inst,
             .fp8_conversion_insts,
             .fp8e5m3_insts,
             .gfx12,
             .gfx1250_insts,
+            .globally_addressable_scratch,
             .kernarg_preload,
             .lds_barrier_arrive_atomic,
             .ldsbankcount32,
+            .lerp_inst,
             .lshl_add_u64_inst,
+            .mad_u32_inst,
             .max_hard_clause_length_63,
+            .mcast_load_insts,
             .memory_atomic_fadd_f32_denormal_support,
+            .min3_max3_pkf16,
             .minimum3_maximum3_pkf16,
             .packed_fp32_ops,
             .packed_tid,
             .permlane16_swap,
+            .pk_add_min_max_insts,
             .prng_inst,
             .pseudo_scalar_trans,
+            .qsad_insts,
             .restricted_soffset,
+            .s_wakeup_barrier_inst,
+            .sad_insts,
+            .salu_float,
+            .scalar_dwordx3_loads,
+            .setprio_inc_wg_inst,
+            .setreg_vgpr_msb_fixup,
+            .shader_cycles_hi_lo_registers,
+            .sramecc_support,
+            .tanh_insts,
+            .tensor_cvt_lut_insts,
+            .transpose_load_f4f6_insts,
+            .vcmpx_permlane_hazard,
+            .vgpr_align2,
+            .vmem_pref_insts,
+            .wait_xcnt,
+            .wavefrontsize32,
+            .xnack,
+            .xnack_support,
+        }),
+    };
+    pub const gfx1251: CpuModel = .{
+        .name = "gfx1251",
+        .llvm_name = "gfx1251",
+        .features = featureSet(&[_]Feature{
+            .@"1024_addressable_vgprs",
+            .@"45_bit_num_records_buffer_resource",
+            .@"64_bit_literals",
+            .add_min_max_insts,
+            .add_sub_u64_insts,
+            .addressablelocalmemorysize327680,
+            .architected_flat_scratch,
+            .architected_sgprs,
+            .ashr_pk_insts,
+            .atomic_buffer_global_pk_add_f16_insts,
+            .atomic_buffer_pk_add_bf16_inst,
+            .atomic_ds_pk_add_16_insts,
+            .atomic_fadd_no_rtn_insts,
+            .atomic_fadd_rtn_insts,
+            .atomic_flat_pk_add_16_insts,
+            .atomic_fmin_fmax_flat_f64,
+            .atomic_fmin_fmax_global_f64,
+            .atomic_global_pk_add_bf16_inst,
+            .bf16_cvt_insts,
+            .bf16_pk_insts,
+            .bf16_trans_insts,
+            .bitop3_insts,
+            .clusters,
+            .cube_insts,
+            .cumode,
+            .cvt_norm_insts,
+            .cvt_pk_f16_f32_inst,
+            .cvt_pknorm_vop2_insts,
+            .cvt_pknorm_vop3_insts,
+            .d16_write_vgpr32,
+            .dl_insts,
+            .dot7_insts,
+            .dot8_insts,
+            .dpp_64bit,
+            .dpp_src1_sgpr,
+            .emulated_system_scope_atomics,
+            .flat_atomic_fadd_f32_inst,
+            .flat_buffer_global_fadd_f64_inst,
+            .flat_gvs_mode,
+            .fma_mix_bf16_insts,
+            .fmacf64_inst,
+            .fp8_conversion_insts,
+            .fp8e5m3_insts,
+            .gfx12,
+            .gfx1250_insts,
+            .globally_addressable_scratch,
+            .kernarg_preload,
+            .lds_barrier_arrive_atomic,
+            .ldsbankcount32,
+            .lerp_inst,
+            .lshl_add_u64_inst,
+            .mad_u32_inst,
+            .max_hard_clause_length_63,
+            .mcast_load_insts,
+            .memory_atomic_fadd_f32_denormal_support,
+            .min3_max3_pkf16,
+            .minimum3_maximum3_pkf16,
+            .packed_fp32_ops,
+            .packed_tid,
+            .permlane16_swap,
+            .pk_add_min_max_insts,
+            .prng_inst,
+            .pseudo_scalar_trans,
+            .qsad_insts,
+            .restricted_soffset,
+            .s_wakeup_barrier_inst,
+            .sad_insts,
             .salu_float,
             .scalar_dwordx3_loads,
             .setprio_inc_wg_inst,
             .shader_cycles_hi_lo_registers,
             .sramecc_support,
+            .tanh_insts,
+            .tensor_cvt_lut_insts,
             .transpose_load_f4f6_insts,
             .vcmpx_permlane_hazard,
+            .vgpr_align2,
+            .vmem_pref_insts,
             .wait_xcnt,
             .wavefrontsize32,
+            .xnack,
+            .xnack_support,
         }),
     };
     pub const gfx12_generic: CpuModel = .{
         .name = "gfx12_generic",
         .llvm_name = "gfx12-generic",
         .features = featureSet(&[_]Feature{
+            .addressablelocalmemorysize65536,
             .allocate1_5xvgprs,
             .architected_flat_scratch,
             .architected_sgprs,
@@ -2470,7 +2856,13 @@ pub const cpu = struct {
             .atomic_fadd_rtn_insts,
             .atomic_flat_pk_add_16_insts,
             .atomic_global_pk_add_bf16_inst,
+            .back_off_barrier,
             .bvh_dual_bvh_8_insts,
+            .cube_insts,
+            .cvt_norm_insts,
+            .cvt_pknorm_vop2_insts,
+            .cvt_pknorm_vop3_insts,
+            .d16_write_vgpr32,
             .dl_insts,
             .dot10_insts,
             .dot11_insts,
@@ -2485,17 +2877,21 @@ pub const cpu = struct {
             .gfx12,
             .image_insts,
             .ldsbankcount32,
+            .lerp_inst,
             .memory_atomic_fadd_f32_denormal_support,
             .nsa_encoding,
             .packed_tid,
             .partial_nsa_encoding,
             .pseudo_scalar_trans,
+            .qsad_insts,
             .requires_cov6,
             .restricted_soffset,
+            .sad_insts,
             .salu_float,
             .scalar_dwordx3_loads,
             .shader_cycles_hi_lo_registers,
             .vcmpx_permlane_hazard,
+            .waits_before_system_scope_stores,
         }),
     };
     pub const gfx600: CpuModel = .{
@@ -2779,6 +3175,7 @@ pub const cpu = struct {
             .packed_tid,
             .pk_fmac_f16_inst,
             .sramecc_support,
+            .vgpr_align2,
         }),
     };
     pub const gfx90c: CpuModel = .{
@@ -2842,6 +3239,7 @@ pub const cpu = struct {
             .packed_tid,
             .pk_fmac_f16_inst,
             .sramecc_support,
+            .vgpr_align2,
             .xf32_insts,
         }),
     };
@@ -2897,6 +3295,7 @@ pub const cpu = struct {
             .pk_fmac_f16_inst,
             .prng_inst,
             .sramecc_support,
+            .vgpr_align2,
         }),
     };
     pub const gfx9_4_generic: CpuModel = .{
@@ -2943,6 +3342,7 @@ pub const cpu = struct {
             .pk_fmac_f16_inst,
             .requires_cov6,
             .sramecc_support,
+            .vgpr_align2,
         }),
     };
     pub const gfx9_generic: CpuModel = .{
diff --git a/lib/std/Target/arm.zig b/lib/std/Target/arm.zig
index 7c2e1f88e3..4037341cc7 100644
--- a/lib/std/Target/arm.zig
+++ b/lib/std/Target/arm.zig
@@ -88,6 +88,7 @@ pub const Feature = enum {
     has_v9_4a,
     has_v9_5a,
     has_v9_6a,
+    has_v9_7a,
     has_v9a,
     hwdiv,
     hwdiv_arm,
@@ -107,7 +108,6 @@ pub const Feature = enum {
     mve2beat,
     mve4beat,
     mve_fp,
-    nacl_trap,
     neon,
     neon_fpmovs,
     neonfp,
@@ -187,6 +187,7 @@ pub const Feature = enum {
     v9_4a,
     v9_5a,
     v9_6a,
+    v9_7a,
     v9a,
     vfp2,
     vfp2sp,
@@ -748,6 +749,13 @@ pub const all_features = blk: {
             .has_v9_5a,
         }),
     };
+    result[@intFromEnum(Feature.has_v9_7a)] = .{
+        .llvm_name = "v9.7a",
+        .description = "Support ARM v9.7a instructions",
+        .dependencies = featureSet(&[_]Feature{
+            .has_v9_6a,
+        }),
+    };
     result[@intFromEnum(Feature.has_v9a)] = .{
         .llvm_name = "v9a",
         .description = "Support ARM v9a instructions",
@@ -859,11 +867,6 @@ pub const all_features = blk: {
             .mve,
         }),
     };
-    result[@intFromEnum(Feature.nacl_trap)] = .{
-        .llvm_name = "nacl-trap",
-        .description = "NaCl trap",
-        .dependencies = featureSet(&[_]Feature{}),
-    };
     result[@intFromEnum(Feature.neon)] = .{
         .llvm_name = "neon",
         .description = "Enable NEON instructions",
@@ -1579,6 +1582,22 @@ pub const all_features = blk: {
             .virtualization,
         }),
     };
+    result[@intFromEnum(Feature.v9_7a)] = .{
+        .llvm_name = "armv9.7-a",
+        .description = "ARMv97a architecture",
+        .dependencies = featureSet(&[_]Feature{
+            .aclass,
+            .crc,
+            .db,
+            .dsp,
+            .fp_armv8,
+            .has_v9_7a,
+            .mp,
+            .ras,
+            .trustzone,
+            .virtualization,
+        }),
+    };
     result[@intFromEnum(Feature.v9a)] = .{
         .llvm_name = "armv9-a",
         .description = "ARMv9a architecture",
@@ -2658,6 +2677,21 @@ pub const cpu = struct {
             .v8m_main,
         }),
     };
+    pub const star_mc3: CpuModel = .{
+        .name = "star_mc3",
+        .llvm_name = "star-mc3",
+        .features = featureSet(&[_]Feature{
+            .fp_armv8d16,
+            .loop_align,
+            .mve1beat,
+            .mve_fp,
+            .no_branch_predictor,
+            .pacbti,
+            .slowfpvmlx,
+            .use_misched,
+            .v8_1m_main,
+        }),
+    };
     pub const strongarm: CpuModel = .{
         .name = "strongarm",
         .llvm_name = "strongarm",
diff --git a/lib/std/Target/bpf.zig b/lib/std/Target/bpf.zig
index 3f72eb65e2..0c5ffad8e1 100644
--- a/lib/std/Target/bpf.zig
+++ b/lib/std/Target/bpf.zig
@@ -5,6 +5,7 @@ const CpuFeature = std.Target.Cpu.Feature;
 const CpuModel = std.Target.Cpu.Model;
 
 pub const Feature = enum {
+    allows_misaligned_mem_access,
     alu32,
     dummy,
     dwarfris,
@@ -19,6 +20,11 @@ pub const all_features = blk: {
     const len = @typeInfo(Feature).@"enum".fields.len;
     std.debug.assert(len <= CpuFeature.Set.needed_bit_count);
     var result: [len]CpuFeature = undefined;
+    result[@intFromEnum(Feature.allows_misaligned_mem_access)] = .{
+        .llvm_name = "allows-misaligned-mem-access",
+        .description = "Allows misaligned memory access",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.alu32)] = .{
         .llvm_name = "alu32",
         .description = "Enable ALU32 instructions",
diff --git a/lib/std/Target/hexagon.zig b/lib/std/Target/hexagon.zig
index a2185b2f5f..1ea2679708 100644
--- a/lib/std/Target/hexagon.zig
+++ b/lib/std/Target/hexagon.zig
@@ -25,6 +25,7 @@ pub const Feature = enum {
     hvxv73,
     hvxv75,
     hvxv79,
+    hvxv81,
     long_calls,
     mem_noshuf,
     memops,
@@ -36,7 +37,6 @@ pub const Feature = enum {
     reserved_r19,
     small_data,
     tinycore,
-    unsafe_fp,
     v5,
     v55,
     v60,
@@ -50,6 +50,7 @@ pub const Feature = enum {
     v73,
     v75,
     v79,
+    v81,
     zreg,
 };
 
@@ -189,6 +190,13 @@ pub const all_features = blk: {
             .hvxv75,
         }),
     };
+    result[@intFromEnum(Feature.hvxv81)] = .{
+        .llvm_name = "hvxv81",
+        .description = "Hexagon HVX instructions",
+        .dependencies = featureSet(&[_]Feature{
+            .hvxv79,
+        }),
+    };
     result[@intFromEnum(Feature.long_calls)] = .{
         .llvm_name = "long-calls",
         .description = "Use constant-extended calls",
@@ -248,11 +256,6 @@ pub const all_features = blk: {
         .description = "Hexagon Tiny Core",
         .dependencies = featureSet(&[_]Feature{}),
     };
-    result[@intFromEnum(Feature.unsafe_fp)] = .{
-        .llvm_name = "unsafe-fp",
-        .description = "Use unsafe FP math",
-        .dependencies = featureSet(&[_]Feature{}),
-    };
     result[@intFromEnum(Feature.v5)] = .{
         .llvm_name = "v5",
         .description = "Enable Hexagon V5 architecture",
@@ -318,6 +321,11 @@ pub const all_features = blk: {
         .description = "Enable Hexagon V79 architecture",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.v81)] = .{
+        .llvm_name = "v81",
+        .description = "Enable Hexagon V81 architecture",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.zreg)] = .{
         .llvm_name = "zreg",
         .description = "Hexagon ZReg extension instructions",
@@ -662,4 +670,31 @@ pub const cpu = struct {
             .v79,
         }),
     };
+    pub const hexagonv81: CpuModel = .{
+        .name = "hexagonv81",
+        .llvm_name = "hexagonv81",
+        .features = featureSet(&[_]Feature{
+            .compound,
+            .duplex,
+            .mem_noshuf,
+            .memops,
+            .nvj,
+            .nvs,
+            .small_data,
+            .v5,
+            .v55,
+            .v60,
+            .v62,
+            .v65,
+            .v66,
+            .v67,
+            .v68,
+            .v69,
+            .v71,
+            .v73,
+            .v75,
+            .v79,
+            .v81,
+        }),
+    };
 };
diff --git a/lib/std/Target/loongarch.zig b/lib/std/Target/loongarch.zig
index 251e202daf..63dc829408 100644
--- a/lib/std/Target/loongarch.zig
+++ b/lib/std/Target/loongarch.zig
@@ -175,6 +175,24 @@ pub const cpu = struct {
             .ual,
         }),
     };
+    pub const la32rv1_0: CpuModel = .{
+        .name = "la32rv1_0",
+        .llvm_name = null,
+        .features = featureSet(&[_]Feature{
+            .@"32bit",
+            .ual,
+        }),
+    };
+    pub const la32v1_0: CpuModel = .{
+        .name = "la32v1_0",
+        .llvm_name = null,
+        .features = featureSet(&[_]Feature{
+            .@"32bit",
+            .@"32s",
+            .d,
+            .ual,
+        }),
+    };
     pub const la464: CpuModel = .{
         .name = "la464",
         .llvm_name = "la464",
diff --git a/lib/std/Target/mips.zig b/lib/std/Target/mips.zig
index 2a1bedd713..b8a268702b 100644
--- a/lib/std/Target/mips.zig
+++ b/lib/std/Target/mips.zig
@@ -56,6 +56,7 @@ pub const Feature = enum {
     soft_float,
     strict_align,
     sym32,
+    use_compact_branches,
     use_indirect_jump_hazard,
     use_tcc_in_div,
     vfpu,
@@ -391,6 +392,11 @@ pub const all_features = blk: {
         .description = "Symbols are 32 bit on Mips64",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.use_compact_branches)] = .{
+        .llvm_name = "use-compact-branches",
+        .description = "Use compact branch instructions for MIPS32R6/MIPS64R6",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.use_indirect_jump_hazard)] = .{
         .llvm_name = "use-indirect-jump-hazard",
         .description = "Use indirect jump guards to prevent certain speculation based attacks",
diff --git a/lib/std/Target/nvptx.zig b/lib/std/Target/nvptx.zig
index 42f8e529bf..8573c18c89 100644
--- a/lib/std/Target/nvptx.zig
+++ b/lib/std/Target/nvptx.zig
@@ -35,6 +35,7 @@ pub const Feature = enum {
     ptx86,
     ptx87,
     ptx88,
+    ptx90,
     sm_100,
     sm_100a,
     sm_100f,
@@ -44,6 +45,9 @@ pub const Feature = enum {
     sm_103,
     sm_103a,
     sm_103f,
+    sm_110,
+    sm_110a,
+    sm_110f,
     sm_120,
     sm_120a,
     sm_120f,
@@ -68,6 +72,7 @@ pub const Feature = enum {
     sm_80,
     sm_86,
     sm_87,
+    sm_88,
     sm_89,
     sm_90,
     sm_90a,
@@ -232,6 +237,11 @@ pub const all_features = blk: {
         .description = "Use PTX version 88",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.ptx90)] = .{
+        .llvm_name = "ptx90",
+        .description = "Use PTX version 90",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.sm_100)] = .{
         .llvm_name = "sm_100",
         .description = "Target SM 100",
@@ -277,6 +287,21 @@ pub const all_features = blk: {
         .description = "Target SM 103f",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.sm_110)] = .{
+        .llvm_name = "sm_110",
+        .description = "Target SM 110",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
+    result[@intFromEnum(Feature.sm_110a)] = .{
+        .llvm_name = "sm_110a",
+        .description = "Target SM 110a",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
+    result[@intFromEnum(Feature.sm_110f)] = .{
+        .llvm_name = "sm_110f",
+        .description = "Target SM 110f",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.sm_120)] = .{
         .llvm_name = "sm_120",
         .description = "Target SM 120",
@@ -397,6 +422,11 @@ pub const all_features = blk: {
         .description = "Target SM 87",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.sm_88)] = .{
+        .llvm_name = "sm_88",
+        .description = "Target SM 88",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.sm_89)] = .{
         .llvm_name = "sm_89",
         .description = "Target SM 89",
@@ -425,7 +455,6 @@ pub const cpu = struct {
         .name = "sm_100",
         .llvm_name = "sm_100",
         .features = featureSet(&[_]Feature{
-            .ptx86,
             .sm_100,
         }),
     };
@@ -433,7 +462,6 @@ pub const cpu = struct {
         .name = "sm_100a",
         .llvm_name = "sm_100a",
         .features = featureSet(&[_]Feature{
-            .ptx86,
             .sm_100a,
         }),
     };
@@ -441,7 +469,6 @@ pub const cpu = struct {
         .name = "sm_100f",
         .llvm_name = "sm_100f",
         .features = featureSet(&[_]Feature{
-            .ptx88,
             .sm_100f,
         }),
     };
@@ -449,7 +476,6 @@ pub const cpu = struct {
         .name = "sm_101",
         .llvm_name = "sm_101",
         .features = featureSet(&[_]Feature{
-            .ptx86,
             .sm_101,
         }),
     };
@@ -457,7 +483,6 @@ pub const cpu = struct {
         .name = "sm_101a",
         .llvm_name = "sm_101a",
         .features = featureSet(&[_]Feature{
-            .ptx86,
             .sm_101a,
         }),
     };
@@ -465,7 +490,6 @@ pub const cpu = struct {
         .name = "sm_101f",
         .llvm_name = "sm_101f",
         .features = featureSet(&[_]Feature{
-            .ptx88,
             .sm_101f,
         }),
     };
@@ -473,7 +497,6 @@ pub const cpu = struct {
         .name = "sm_103",
         .llvm_name = "sm_103",
         .features = featureSet(&[_]Feature{
-            .ptx88,
             .sm_103,
         }),
     };
@@ -481,7 +504,6 @@ pub const cpu = struct {
         .name = "sm_103a",
         .llvm_name = "sm_103a",
         .features = featureSet(&[_]Feature{
-            .ptx88,
             .sm_103a,
         }),
     };
@@ -489,15 +511,34 @@ pub const cpu = struct {
         .name = "sm_103f",
         .llvm_name = "sm_103f",
         .features = featureSet(&[_]Feature{
-            .ptx88,
             .sm_103f,
         }),
     };
+    pub const sm_110: CpuModel = .{
+        .name = "sm_110",
+        .llvm_name = "sm_110",
+        .features = featureSet(&[_]Feature{
+            .sm_110,
+        }),
+    };
+    pub const sm_110a: CpuModel = .{
+        .name = "sm_110a",
+        .llvm_name = "sm_110a",
+        .features = featureSet(&[_]Feature{
+            .sm_110a,
+        }),
+    };
+    pub const sm_110f: CpuModel = .{
+        .name = "sm_110f",
+        .llvm_name = "sm_110f",
+        .features = featureSet(&[_]Feature{
+            .sm_110f,
+        }),
+    };
     pub const sm_120: CpuModel = .{
         .name = "sm_120",
         .llvm_name = "sm_120",
         .features = featureSet(&[_]Feature{
-            .ptx87,
             .sm_120,
         }),
     };
@@ -505,7 +546,6 @@ pub const cpu = struct {
         .name = "sm_120a",
         .llvm_name = "sm_120a",
         .features = featureSet(&[_]Feature{
-            .ptx87,
             .sm_120a,
         }),
     };
@@ -513,7 +553,6 @@ pub const cpu = struct {
         .name = "sm_120f",
         .llvm_name = "sm_120f",
         .features = featureSet(&[_]Feature{
-            .ptx88,
             .sm_120f,
         }),
     };
@@ -521,7 +560,6 @@ pub const cpu = struct {
         .name = "sm_121",
         .llvm_name = "sm_121",
         .features = featureSet(&[_]Feature{
-            .ptx88,
             .sm_121,
         }),
     };
@@ -529,7 +567,6 @@ pub const cpu = struct {
         .name = "sm_121a",
         .llvm_name = "sm_121a",
         .features = featureSet(&[_]Feature{
-            .ptx88,
             .sm_121a,
         }),
     };
@@ -537,7 +574,6 @@ pub const cpu = struct {
         .name = "sm_121f",
         .llvm_name = "sm_121f",
         .features = featureSet(&[_]Feature{
-            .ptx88,
             .sm_121f,
         }),
     };
@@ -545,7 +581,6 @@ pub const cpu = struct {
         .name = "sm_20",
         .llvm_name = "sm_20",
         .features = featureSet(&[_]Feature{
-            .ptx32,
             .sm_20,
         }),
     };
@@ -553,7 +588,6 @@ pub const cpu = struct {
         .name = "sm_21",
         .llvm_name = "sm_21",
         .features = featureSet(&[_]Feature{
-            .ptx32,
             .sm_21,
         }),
     };
@@ -568,7 +602,6 @@ pub const cpu = struct {
         .name = "sm_32",
         .llvm_name = "sm_32",
         .features = featureSet(&[_]Feature{
-            .ptx40,
             .sm_32,
         }),
     };
@@ -576,7 +609,6 @@ pub const cpu = struct {
         .name = "sm_35",
         .llvm_name = "sm_35",
         .features = featureSet(&[_]Feature{
-            .ptx32,
             .sm_35,
         }),
     };
@@ -584,7 +616,6 @@ pub const cpu = struct {
         .name = "sm_37",
         .llvm_name = "sm_37",
         .features = featureSet(&[_]Feature{
-            .ptx41,
             .sm_37,
         }),
     };
@@ -592,7 +623,6 @@ pub const cpu = struct {
         .name = "sm_50",
         .llvm_name = "sm_50",
         .features = featureSet(&[_]Feature{
-            .ptx40,
             .sm_50,
         }),
     };
@@ -600,7 +630,6 @@ pub const cpu = struct {
         .name = "sm_52",
         .llvm_name = "sm_52",
         .features = featureSet(&[_]Feature{
-            .ptx41,
             .sm_52,
         }),
     };
@@ -608,7 +637,6 @@ pub const cpu = struct {
         .name = "sm_53",
         .llvm_name = "sm_53",
         .features = featureSet(&[_]Feature{
-            .ptx42,
             .sm_53,
         }),
     };
@@ -616,7 +644,6 @@ pub const cpu = struct {
         .name = "sm_60",
         .llvm_name = "sm_60",
         .features = featureSet(&[_]Feature{
-            .ptx50,
             .sm_60,
         }),
     };
@@ -624,7 +651,6 @@ pub const cpu = struct {
         .name = "sm_61",
         .llvm_name = "sm_61",
         .features = featureSet(&[_]Feature{
-            .ptx50,
             .sm_61,
         }),
     };
@@ -632,7 +658,6 @@ pub const cpu = struct {
         .name = "sm_62",
         .llvm_name = "sm_62",
         .features = featureSet(&[_]Feature{
-            .ptx50,
             .sm_62,
         }),
     };
@@ -640,7 +665,6 @@ pub const cpu = struct {
         .name = "sm_70",
         .llvm_name = "sm_70",
         .features = featureSet(&[_]Feature{
-            .ptx60,
             .sm_70,
         }),
     };
@@ -648,7 +672,6 @@ pub const cpu = struct {
         .name = "sm_72",
         .llvm_name = "sm_72",
         .features = featureSet(&[_]Feature{
-            .ptx61,
             .sm_72,
         }),
     };
@@ -656,7 +679,6 @@ pub const cpu = struct {
         .name = "sm_75",
         .llvm_name = "sm_75",
         .features = featureSet(&[_]Feature{
-            .ptx63,
             .sm_75,
         }),
     };
@@ -664,7 +686,6 @@ pub const cpu = struct {
         .name = "sm_80",
         .llvm_name = "sm_80",
         .features = featureSet(&[_]Feature{
-            .ptx70,
             .sm_80,
         }),
     };
@@ -672,7 +693,6 @@ pub const cpu = struct {
         .name = "sm_86",
         .llvm_name = "sm_86",
         .features = featureSet(&[_]Feature{
-            .ptx71,
             .sm_86,
         }),
     };
@@ -680,15 +700,20 @@ pub const cpu = struct {
         .name = "sm_87",
         .llvm_name = "sm_87",
         .features = featureSet(&[_]Feature{
-            .ptx74,
             .sm_87,
         }),
     };
+    pub const sm_88: CpuModel = .{
+        .name = "sm_88",
+        .llvm_name = "sm_88",
+        .features = featureSet(&[_]Feature{
+            .sm_88,
+        }),
+    };
     pub const sm_89: CpuModel = .{
         .name = "sm_89",
         .llvm_name = "sm_89",
         .features = featureSet(&[_]Feature{
-            .ptx78,
             .sm_89,
         }),
     };
@@ -696,7 +721,6 @@ pub const cpu = struct {
         .name = "sm_90",
         .llvm_name = "sm_90",
         .features = featureSet(&[_]Feature{
-            .ptx78,
             .sm_90,
         }),
     };
@@ -704,7 +728,6 @@ pub const cpu = struct {
         .name = "sm_90a",
         .llvm_name = "sm_90a",
         .features = featureSet(&[_]Feature{
-            .ptx80,
             .sm_90a,
         }),
     };
diff --git a/lib/std/Target/powerpc.zig b/lib/std/Target/powerpc.zig
index 5348359f96..8e36d87f97 100644
--- a/lib/std/Target/powerpc.zig
+++ b/lib/std/Target/powerpc.zig
@@ -6,6 +6,7 @@ const CpuModel = std.Target.Cpu.Model;
 
 pub const Feature = enum {
     @"64bit",
+    @"64bit_support",
     @"64bitregs",
     allow_unaligned_fp_access,
     altivec,
@@ -97,7 +98,14 @@ pub const all_features = blk: {
     var result: [len]CpuFeature = undefined;
     result[@intFromEnum(Feature.@"64bit")] = .{
         .llvm_name = "64bit",
-        .description = "Enable 64-bit instructions",
+        .description = "Enable 64-bit mode",
+        .dependencies = featureSet(&[_]Feature{
+            .@"64bit_support",
+        }),
+    };
+    result[@intFromEnum(Feature.@"64bit_support")] = .{
+        .llvm_name = "64bit-support",
+        .description = "Supports 64-bit instructions",
         .dependencies = featureSet(&[_]Feature{}),
     };
     result[@intFromEnum(Feature.@"64bitregs")] = .{
@@ -705,7 +713,7 @@ pub const cpu = struct {
         .name = "970",
         .llvm_name = "970",
         .features = featureSet(&[_]Feature{
-            .@"64bit",
+            .@"64bit_support",
             .altivec,
             .fres,
             .frsqrte,
@@ -718,7 +726,7 @@ pub const cpu = struct {
         .name = "a2",
         .llvm_name = "a2",
         .features = featureSet(&[_]Feature{
-            .@"64bit",
+            .@"64bit_support",
             .booke,
             .cmpb,
             .fcpsgn,
@@ -761,7 +769,7 @@ pub const cpu = struct {
         .name = "e5500",
         .llvm_name = "e5500",
         .features = featureSet(&[_]Feature{
-            .@"64bit",
+            .@"64bit_support",
             .booke,
             .isel,
             .mfocrf,
@@ -772,7 +780,7 @@ pub const cpu = struct {
         .name = "future",
         .llvm_name = "future",
         .features = featureSet(&[_]Feature{
-            .@"64bit",
+            .@"64bit_support",
             .allow_unaligned_fp_access,
             .bpermd,
             .cmpb,
@@ -846,7 +854,7 @@ pub const cpu = struct {
         .name = "g5",
         .llvm_name = "g5",
         .features = featureSet(&[_]Feature{
-            .@"64bit",
+            .@"64bit_support",
             .altivec,
             .fres,
             .frsqrte,
@@ -873,7 +881,7 @@ pub const cpu = struct {
         .name = "ppc64",
         .llvm_name = "ppc64",
         .features = featureSet(&[_]Feature{
-            .@"64bit",
+            .@"64bit_support",
             .altivec,
             .fres,
             .frsqrte,
@@ -886,7 +894,7 @@ pub const cpu = struct {
         .name = "ppc64le",
         .llvm_name = "ppc64le",
         .features = featureSet(&[_]Feature{
-            .@"64bit",
+            .@"64bit_support",
             .allow_unaligned_fp_access,
             .bpermd,
             .cmpb,
@@ -926,7 +934,7 @@ pub const cpu = struct {
         .name = "pwr10",
         .llvm_name = "pwr10",
         .features = featureSet(&[_]Feature{
-            .@"64bit",
+            .@"64bit_support",
             .allow_unaligned_fp_access,
             .bpermd,
             .cmpb,
@@ -973,7 +981,7 @@ pub const cpu = struct {
         .name = "pwr11",
         .llvm_name = "pwr11",
         .features = featureSet(&[_]Feature{
-            .@"64bit",
+            .@"64bit_support",
             .allow_unaligned_fp_access,
             .bpermd,
             .cmpb,
@@ -1020,7 +1028,7 @@ pub const cpu = struct {
         .name = "pwr3",
         .llvm_name = "pwr3",
         .features = featureSet(&[_]Feature{
-            .@"64bit",
+            .@"64bit_support",
             .altivec,
             .fres,
             .frsqrte,
@@ -1032,7 +1040,7 @@ pub const cpu = struct {
         .name = "pwr4",
         .llvm_name = "pwr4",
         .features = featureSet(&[_]Feature{
-            .@"64bit",
+            .@"64bit_support",
             .altivec,
             .fres,
             .frsqrte,
@@ -1045,7 +1053,7 @@ pub const cpu = struct {
         .name = "pwr5",
         .llvm_name = "pwr5",
         .features = featureSet(&[_]Feature{
-            .@"64bit",
+            .@"64bit_support",
             .altivec,
             .fre,
             .fres,
@@ -1060,7 +1068,7 @@ pub const cpu = struct {
         .name = "pwr5x",
         .llvm_name = "pwr5x",
         .features = featureSet(&[_]Feature{
-            .@"64bit",
+            .@"64bit_support",
             .altivec,
             .fprnd,
             .fre,
@@ -1076,7 +1084,7 @@ pub const cpu = struct {
         .name = "pwr6",
         .llvm_name = "pwr6",
         .features = featureSet(&[_]Feature{
-            .@"64bit",
+            .@"64bit_support",
             .altivec,
             .cmpb,
             .fcpsgn,
@@ -1096,7 +1104,7 @@ pub const cpu = struct {
         .name = "pwr6x",
         .llvm_name = "pwr6x",
         .features = featureSet(&[_]Feature{
-            .@"64bit",
+            .@"64bit_support",
             .altivec,
             .cmpb,
             .fcpsgn,
@@ -1116,7 +1124,7 @@ pub const cpu = struct {
         .name = "pwr7",
         .llvm_name = "pwr7",
         .features = featureSet(&[_]Feature{
-            .@"64bit",
+            .@"64bit_support",
             .allow_unaligned_fp_access,
             .bpermd,
             .cmpb,
@@ -1145,7 +1153,7 @@ pub const cpu = struct {
         .name = "pwr8",
         .llvm_name = "pwr8",
         .features = featureSet(&[_]Feature{
-            .@"64bit",
+            .@"64bit_support",
             .allow_unaligned_fp_access,
             .bpermd,
             .cmpb,
@@ -1185,7 +1193,7 @@ pub const cpu = struct {
         .name = "pwr9",
         .llvm_name = "pwr9",
         .features = featureSet(&[_]Feature{
-            .@"64bit",
+            .@"64bit_support",
             .allow_unaligned_fp_access,
             .bpermd,
             .cmpb,
diff --git a/lib/std/Target/riscv.zig b/lib/std/Target/riscv.zig
index dac5206285..b745ac4bbe 100644
--- a/lib/std/Target/riscv.zig
+++ b/lib/std/Target/riscv.zig
@@ -8,49 +8,40 @@ pub const Feature = enum {
     @"32bit",
     @"64bit",
     a,
+    add_load_fusion,
+    addi_load_fusion,
     andes45,
     auipc_addi_fusion,
+    auipc_load_fusion,
     b,
+    bfext_fusion,
     c,
     conditional_cmv_fusion,
     d,
     disable_latency_sched_heuristic,
+    disable_misched_load_clustering,
+    disable_misched_store_clustering,
+    disable_postmisched_load_clustering,
+    disable_postmisched_store_clustering,
     dlen_factor_2,
     e,
+    enable_vsetvli_sched_heuristic,
     exact_asm,
     experimental,
     experimental_p,
     experimental_rvm23u32,
-    experimental_smctr,
-    experimental_ssctr,
+    experimental_smpmpmt,
     experimental_svukte,
-    experimental_xqccmp,
-    experimental_xqcia,
-    experimental_xqciac,
-    experimental_xqcibi,
-    experimental_xqcibm,
-    experimental_xqcicli,
-    experimental_xqcicm,
-    experimental_xqcics,
-    experimental_xqcicsr,
-    experimental_xqciint,
-    experimental_xqciio,
-    experimental_xqcilb,
-    experimental_xqcili,
-    experimental_xqcilia,
-    experimental_xqcilo,
-    experimental_xqcilsm,
-    experimental_xqcisim,
-    experimental_xqcisls,
-    experimental_xqcisync,
     experimental_xrivosvisni,
     experimental_xrivosvizip,
     experimental_xsfmclic,
     experimental_xsfsclic,
-    experimental_zalasr,
+    experimental_zibi,
     experimental_zicfilp,
     experimental_zicfiss,
     experimental_zvbc32e,
+    experimental_zvfbfa,
+    experimental_zvfofp8min,
     experimental_zvkgs,
     experimental_zvqdotq,
     f,
@@ -60,6 +51,7 @@ pub const Feature = enum {
     ld_add_fusion,
     log_vrgather,
     lui_addi_fusion,
+    lui_load_fusion,
     m,
     mips_p8700,
     no_default_unroll,
@@ -73,6 +65,7 @@ pub const Feature = enum {
     optimized_nf7_segment_load_store,
     optimized_nf8_segment_load_store,
     optimized_zero_stride_load,
+    permissive_zalrsc,
     predictable_select_expensive,
     prefer_vsetvli_over_read_vlenb,
     prefer_w_inst,
@@ -127,15 +120,21 @@ pub const Feature = enum {
     shgatpa,
     shifted_zextw_fusion,
     shlcofideleg,
-    short_forward_branch_opt,
+    short_forward_branch_ialu,
+    short_forward_branch_iload,
+    short_forward_branch_iminmax,
+    short_forward_branch_imul,
     shtvala,
     shvsatpa,
     shvstvala,
     shvstvecd,
+    shxadd_load_fusion,
+    single_element_vec_fp64,
     smaia,
     smcdeleg,
     smcntrpmf,
     smcsrind,
+    smctr,
     smdbltrp,
     smepmp,
     smmpm,
@@ -148,6 +147,7 @@ pub const Feature = enum {
     sscofpmf,
     sscounterenw,
     sscsrind,
+    ssctr,
     ssdbltrp,
     ssnpm,
     sspm,
@@ -179,6 +179,7 @@ pub const Feature = enum {
     xandesvbfhcvt,
     xandesvdot,
     xandesvpackfph,
+    xandesvsinth,
     xandesvsintload,
     xcvalu,
     xcvbi,
@@ -189,7 +190,28 @@ pub const Feature = enum {
     xcvsimd,
     xmipscbop,
     xmipscmov,
+    xmipsexectl,
     xmipslsp,
+    xqccmp,
+    xqci,
+    xqcia,
+    xqciac,
+    xqcibi,
+    xqcibm,
+    xqcicli,
+    xqcicm,
+    xqcics,
+    xqcicsr,
+    xqciint,
+    xqciio,
+    xqcilb,
+    xqcili,
+    xqcilia,
+    xqcilo,
+    xqcilsm,
+    xqcisim,
+    xqcisls,
+    xqcisync,
     xsfcease,
     xsfmm128t,
     xsfmm16t,
@@ -202,12 +224,18 @@ pub const Feature = enum {
     xsfmm64t,
     xsfmmbase,
     xsfvcp,
+    xsfvfbfexp16e,
+    xsfvfexp16e,
+    xsfvfexp32e,
+    xsfvfexpa,
+    xsfvfexpa64e,
     xsfvfnrclipxfqf,
     xsfvfwmaccqqq,
     xsfvqmaccdod,
     xsfvqmaccqoq,
     xsifivecdiscarddlone,
     xsifivecflushdlone,
+    xsmtvdot,
     xtheadba,
     xtheadbb,
     xtheadbs,
@@ -226,6 +254,7 @@ pub const Feature = enum {
     zaamo,
     zabha,
     zacas,
+    zalasr,
     zalrsc,
     zama16b,
     zawrs,
@@ -272,6 +301,7 @@ pub const Feature = enum {
     zihintpause,
     zihpm,
     zilsd,
+    zilsd_4byte_align,
     zimop,
     zk,
     zkn,
@@ -352,6 +382,16 @@ pub const all_features = blk: {
             .zalrsc,
         }),
     };
+    result[@intFromEnum(Feature.add_load_fusion)] = .{
+        .llvm_name = "add-load-fusion",
+        .description = "Enable ADD(.UW) + load macrofusion",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
+    result[@intFromEnum(Feature.addi_load_fusion)] = .{
+        .llvm_name = "addi-load-fusion",
+        .description = "Enable ADDI + load macrofusion",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.andes45)] = .{
         .llvm_name = "andes45",
         .description = "Andes 45-Series processors",
@@ -362,6 +402,11 @@ pub const all_features = blk: {
         .description = "Enable AUIPC+ADDI macrofusion",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.auipc_load_fusion)] = .{
+        .llvm_name = "auipc-load-fusion",
+        .description = "Enable AUIPC + load macrofusion",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.b)] = .{
         .llvm_name = "b",
         .description = "'B' (the collection of the Zba, Zbb, Zbs extensions)",
@@ -371,6 +416,11 @@ pub const all_features = blk: {
             .zbs,
         }),
     };
+    result[@intFromEnum(Feature.bfext_fusion)] = .{
+        .llvm_name = "bfext-fusion",
+        .description = "Enable SLLI+SRLI (bitfield extract) macrofusion",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.c)] = .{
         .llvm_name = "c",
         .description = "'C' (Compressed Instructions)",
@@ -395,6 +445,26 @@ pub const all_features = blk: {
         .description = "Disable latency scheduling heuristic",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.disable_misched_load_clustering)] = .{
+        .llvm_name = "disable-misched-load-clustering",
+        .description = "Disable load clustering in the machine scheduler",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
+    result[@intFromEnum(Feature.disable_misched_store_clustering)] = .{
+        .llvm_name = "disable-misched-store-clustering",
+        .description = "Disable store clustering in the machine scheduler",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
+    result[@intFromEnum(Feature.disable_postmisched_load_clustering)] = .{
+        .llvm_name = "disable-postmisched-load-clustering",
+        .description = "Disable PostRA load clustering in the machine scheduler",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
+    result[@intFromEnum(Feature.disable_postmisched_store_clustering)] = .{
+        .llvm_name = "disable-postmisched-store-clustering",
+        .description = "Disable PostRA store clustering in the machine scheduler",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.dlen_factor_2)] = .{
         .llvm_name = "dlen-factor-2",
         .description = "Vector unit DLEN(data path width) is half of VLEN",
@@ -405,6 +475,11 @@ pub const all_features = blk: {
         .description = "'E' (Embedded Instruction Set with 16 GPRs)",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.enable_vsetvli_sched_heuristic)] = .{
+        .llvm_name = "enable-vsetvli-sched-heuristic",
+        .description = "Enable vsetvli-based scheduling heuristic",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.exact_asm)] = .{
         .llvm_name = "exact-asm",
         .description = "Enable Exact Assembly (Disables Compression and Relaxation)",
@@ -437,144 +512,16 @@ pub const all_features = blk: {
             .zimop,
         }),
     };
-    result[@intFromEnum(Feature.experimental_smctr)] = .{
-        .llvm_name = "experimental-smctr",
-        .description = "'Smctr' (Control Transfer Records Machine Level)",
-        .dependencies = featureSet(&[_]Feature{
-            .sscsrind,
-        }),
-    };
-    result[@intFromEnum(Feature.experimental_ssctr)] = .{
-        .llvm_name = "experimental-ssctr",
-        .description = "'Ssctr' (Control Transfer Records Supervisor Level)",
-        .dependencies = featureSet(&[_]Feature{
-            .sscsrind,
-        }),
+    result[@intFromEnum(Feature.experimental_smpmpmt)] = .{
+        .llvm_name = "experimental-smpmpmt",
+        .description = "'Smpmpmt' (PMP-based Memory Types Extension)",
+        .dependencies = featureSet(&[_]Feature{}),
     };
     result[@intFromEnum(Feature.experimental_svukte)] = .{
         .llvm_name = "experimental-svukte",
         .description = "'Svukte' (Address-Independent Latency of User-Mode Faults to Supervisor Addresses)",
         .dependencies = featureSet(&[_]Feature{}),
     };
-    result[@intFromEnum(Feature.experimental_xqccmp)] = .{
-        .llvm_name = "experimental-xqccmp",
-        .description = "'Xqccmp' (Qualcomm 16-bit Push/Pop and Double Moves)",
-        .dependencies = featureSet(&[_]Feature{
-            .zca,
-        }),
-    };
-    result[@intFromEnum(Feature.experimental_xqcia)] = .{
-        .llvm_name = "experimental-xqcia",
-        .description = "'Xqcia' (Qualcomm uC Arithmetic Extension)",
-        .dependencies = featureSet(&[_]Feature{}),
-    };
-    result[@intFromEnum(Feature.experimental_xqciac)] = .{
-        .llvm_name = "experimental-xqciac",
-        .description = "'Xqciac' (Qualcomm uC Load-Store Address Calculation Extension)",
-        .dependencies = featureSet(&[_]Feature{
-            .zca,
-        }),
-    };
-    result[@intFromEnum(Feature.experimental_xqcibi)] = .{
-        .llvm_name = "experimental-xqcibi",
-        .description = "'Xqcibi' (Qualcomm uC Branch Immediate Extension)",
-        .dependencies = featureSet(&[_]Feature{
-            .zca,
-        }),
-    };
-    result[@intFromEnum(Feature.experimental_xqcibm)] = .{
-        .llvm_name = "experimental-xqcibm",
-        .description = "'Xqcibm' (Qualcomm uC Bit Manipulation Extension)",
-        .dependencies = featureSet(&[_]Feature{
-            .zca,
-        }),
-    };
-    result[@intFromEnum(Feature.experimental_xqcicli)] = .{
-        .llvm_name = "experimental-xqcicli",
-        .description = "'Xqcicli' (Qualcomm uC Conditional Load Immediate Extension)",
-        .dependencies = featureSet(&[_]Feature{}),
-    };
-    result[@intFromEnum(Feature.experimental_xqcicm)] = .{
-        .llvm_name = "experimental-xqcicm",
-        .description = "'Xqcicm' (Qualcomm uC Conditional Move Extension)",
-        .dependencies = featureSet(&[_]Feature{
-            .zca,
-        }),
-    };
-    result[@intFromEnum(Feature.experimental_xqcics)] = .{
-        .llvm_name = "experimental-xqcics",
-        .description = "'Xqcics' (Qualcomm uC Conditional Select Extension)",
-        .dependencies = featureSet(&[_]Feature{}),
-    };
-    result[@intFromEnum(Feature.experimental_xqcicsr)] = .{
-        .llvm_name = "experimental-xqcicsr",
-        .description = "'Xqcicsr' (Qualcomm uC CSR Extension)",
-        .dependencies = featureSet(&[_]Feature{}),
-    };
-    result[@intFromEnum(Feature.experimental_xqciint)] = .{
-        .llvm_name = "experimental-xqciint",
-        .description = "'Xqciint' (Qualcomm uC Interrupts Extension)",
-        .dependencies = featureSet(&[_]Feature{
-            .zca,
-        }),
-    };
-    result[@intFromEnum(Feature.experimental_xqciio)] = .{
-        .llvm_name = "experimental-xqciio",
-        .description = "'Xqciio' (Qualcomm uC External Input Output Extension)",
-        .dependencies = featureSet(&[_]Feature{}),
-    };
-    result[@intFromEnum(Feature.experimental_xqcilb)] = .{
-        .llvm_name = "experimental-xqcilb",
-        .description = "'Xqcilb' (Qualcomm uC Long Branch Extension)",
-        .dependencies = featureSet(&[_]Feature{
-            .zca,
-        }),
-    };
-    result[@intFromEnum(Feature.experimental_xqcili)] = .{
-        .llvm_name = "experimental-xqcili",
-        .description = "'Xqcili' (Qualcomm uC Load Large Immediate Extension)",
-        .dependencies = featureSet(&[_]Feature{
-            .zca,
-        }),
-    };
-    result[@intFromEnum(Feature.experimental_xqcilia)] = .{
-        .llvm_name = "experimental-xqcilia",
-        .description = "'Xqcilia' (Qualcomm uC Large Immediate Arithmetic Extension)",
-        .dependencies = featureSet(&[_]Feature{
-            .zca,
-        }),
-    };
-    result[@intFromEnum(Feature.experimental_xqcilo)] = .{
-        .llvm_name = "experimental-xqcilo",
-        .description = "'Xqcilo' (Qualcomm uC Large Offset Load Store Extension)",
-        .dependencies = featureSet(&[_]Feature{
-            .zca,
-        }),
-    };
-    result[@intFromEnum(Feature.experimental_xqcilsm)] = .{
-        .llvm_name = "experimental-xqcilsm",
-        .description = "'Xqcilsm' (Qualcomm uC Load Store Multiple Extension)",
-        .dependencies = featureSet(&[_]Feature{}),
-    };
-    result[@intFromEnum(Feature.experimental_xqcisim)] = .{
-        .llvm_name = "experimental-xqcisim",
-        .description = "'Xqcisim' (Qualcomm uC Simulation Hint Extension)",
-        .dependencies = featureSet(&[_]Feature{
-            .zca,
-        }),
-    };
-    result[@intFromEnum(Feature.experimental_xqcisls)] = .{
-        .llvm_name = "experimental-xqcisls",
-        .description = "'Xqcisls' (Qualcomm uC Scaled Load Store Extension)",
-        .dependencies = featureSet(&[_]Feature{}),
-    };
-    result[@intFromEnum(Feature.experimental_xqcisync)] = .{
-        .llvm_name = "experimental-xqcisync",
-        .description = "'Xqcisync' (Qualcomm uC Sync Delay Extension)",
-        .dependencies = featureSet(&[_]Feature{
-            .zca,
-        }),
-    };
     result[@intFromEnum(Feature.experimental_xrivosvisni)] = .{
         .llvm_name = "experimental-xrivosvisni",
         .description = "'XRivosVisni' (Rivos Vector Integer Small New)",
@@ -595,9 +542,9 @@ pub const all_features = blk: {
         .description = "'XSfsclic' (SiFive CLIC Supervisor-mode CSRs)",
         .dependencies = featureSet(&[_]Feature{}),
     };
-    result[@intFromEnum(Feature.experimental_zalasr)] = .{
-        .llvm_name = "experimental-zalasr",
-        .description = "'Zalasr' (Load-Acquire and Store-Release Instructions)",
+    result[@intFromEnum(Feature.experimental_zibi)] = .{
+        .llvm_name = "experimental-zibi",
+        .description = "'Zibi' (Branch with Immediate)",
         .dependencies = featureSet(&[_]Feature{}),
     };
     result[@intFromEnum(Feature.experimental_zicfilp)] = .{
@@ -622,6 +569,21 @@ pub const all_features = blk: {
             .zve32x,
         }),
     };
+    result[@intFromEnum(Feature.experimental_zvfbfa)] = .{
+        .llvm_name = "experimental-zvfbfa",
+        .description = "'Zvfbfa' (Additional BF16 vector compute support)",
+        .dependencies = featureSet(&[_]Feature{
+            .zfbfmin,
+            .zve32f,
+        }),
+    };
+    result[@intFromEnum(Feature.experimental_zvfofp8min)] = .{
+        .llvm_name = "experimental-zvfofp8min",
+        .description = "'Zvfofp8min' (Vector OFP8 Converts)",
+        .dependencies = featureSet(&[_]Feature{
+            .zve32f,
+        }),
+    };
     result[@intFromEnum(Feature.experimental_zvkgs)] = .{
         .llvm_name = "experimental-zvkgs",
         .description = "'Zvkgs' (Vector-Scalar GCM instructions for Cryptography)",
@@ -673,6 +635,11 @@ pub const all_features = blk: {
         .description = "Enable LUI+ADDI macro fusion",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.lui_load_fusion)] = .{
+        .llvm_name = "lui-load-fusion",
+        .description = "Enable LUI + load macrofusion",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.m)] = .{
         .llvm_name = "m",
         .description = "'M' (Integer Multiplication and Division)",
@@ -740,6 +707,11 @@ pub const all_features = blk: {
         .description = "Optimized (perform fewer memory operations)zero-stride vector load",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.permissive_zalrsc)] = .{
+        .llvm_name = "permissive-zalrsc",
+        .description = "Implementation permits non-base instructions between LR/SC pairs",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.predictable_select_expensive)] = .{
         .llvm_name = "predictable-select-expensive",
         .description = "Prefer likely predicted branches over selects",
@@ -1262,11 +1234,32 @@ pub const all_features = blk: {
         .description = "'Shlcofideleg' (Delegating LCOFI Interrupts to VS-mode)",
         .dependencies = featureSet(&[_]Feature{}),
     };
-    result[@intFromEnum(Feature.short_forward_branch_opt)] = .{
-        .llvm_name = "short-forward-branch-opt",
-        .description = "Enable short forward branch optimization",
+    result[@intFromEnum(Feature.short_forward_branch_ialu)] = .{
+        .llvm_name = "short-forward-branch-ialu",
+        .description = "Enable short forward branch optimization for RVI base instructions",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.short_forward_branch_iload)] = .{
+        .llvm_name = "short-forward-branch-iload",
+        .description = "Enable short forward branch optimization for load instructions",
+        .dependencies = featureSet(&[_]Feature{
+            .short_forward_branch_ialu,
+        }),
+    };
+    result[@intFromEnum(Feature.short_forward_branch_iminmax)] = .{
+        .llvm_name = "short-forward-branch-iminmax",
+        .description = "Enable short forward branch optimization for MIN,MAX instructions in Zbb",
+        .dependencies = featureSet(&[_]Feature{
+            .short_forward_branch_ialu,
+        }),
+    };
+    result[@intFromEnum(Feature.short_forward_branch_imul)] = .{
+        .llvm_name = "short-forward-branch-imul",
+        .description = "Enable short forward branch optimization for MUL instruction",
+        .dependencies = featureSet(&[_]Feature{
+            .short_forward_branch_ialu,
+        }),
+    };
     result[@intFromEnum(Feature.shtvala)] = .{
         .llvm_name = "shtvala",
         .description = "'Shtvala' (htval provides all needed values)",
@@ -1287,6 +1280,16 @@ pub const all_features = blk: {
         .description = "'Shvstvecd' (vstvec supports Direct mode)",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.shxadd_load_fusion)] = .{
+        .llvm_name = "shxadd-load-fusion",
+        .description = "Enable SH(1|2|3)ADD(.UW) + load macrofusion",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
+    result[@intFromEnum(Feature.single_element_vec_fp64)] = .{
+        .llvm_name = "single-element-vec-fp64",
+        .description = "Certain vector FP64 operations produce a single result element per cycle",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.smaia)] = .{
         .llvm_name = "smaia",
         .description = "'Smaia' (Advanced Interrupt Architecture Machine Level)",
@@ -1307,6 +1310,13 @@ pub const all_features = blk: {
         .description = "'Smcsrind' (Indirect CSR Access Machine Level)",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.smctr)] = .{
+        .llvm_name = "smctr",
+        .description = "'Smctr' (Control Transfer Records Machine Level)",
+        .dependencies = featureSet(&[_]Feature{
+            .sscsrind,
+        }),
+    };
     result[@intFromEnum(Feature.smdbltrp)] = .{
         .llvm_name = "smdbltrp",
         .description = "'Smdbltrp' (Double Trap Machine Level)",
@@ -1369,6 +1379,13 @@ pub const all_features = blk: {
         .description = "'Sscsrind' (Indirect CSR Access Supervisor Level)",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.ssctr)] = .{
+        .llvm_name = "ssctr",
+        .description = "'Ssctr' (Control Transfer Records Supervisor Level)",
+        .dependencies = featureSet(&[_]Feature{
+            .sscsrind,
+        }),
+    };
     result[@intFromEnum(Feature.ssdbltrp)] = .{
         .llvm_name = "ssdbltrp",
         .description = "'Ssdbltrp' (Double Trap Supervisor Level)",
@@ -1537,6 +1554,13 @@ pub const all_features = blk: {
             .f,
         }),
     };
+    result[@intFromEnum(Feature.xandesvsinth)] = .{
+        .llvm_name = "xandesvsinth",
+        .description = "'XAndesVSIntH' (Andes Vector Small INT Handling Extension)",
+        .dependencies = featureSet(&[_]Feature{
+            .zve32x,
+        }),
+    };
     result[@intFromEnum(Feature.xandesvsintload)] = .{
         .llvm_name = "xandesvsintload",
         .description = "'XAndesVSIntLoad' (Andes Vector INT4 Load Extension)",
@@ -1589,11 +1613,159 @@ pub const all_features = blk: {
         .description = "'XMIPSCMov' (MIPS conditional move instruction (mips.ccmov))",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.xmipsexectl)] = .{
+        .llvm_name = "xmipsexectl",
+        .description = "'XMIPSEXECTL' (MIPS execution control)",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.xmipslsp)] = .{
         .llvm_name = "xmipslsp",
         .description = "'XMIPSLSP' (MIPS optimization for hardware load-store bonding)",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.xqccmp)] = .{
+        .llvm_name = "xqccmp",
+        .description = "'Xqccmp' (Qualcomm 16-bit Push/Pop and Double Moves)",
+        .dependencies = featureSet(&[_]Feature{
+            .zca,
+        }),
+    };
+    result[@intFromEnum(Feature.xqci)] = .{
+        .llvm_name = "xqci",
+        .description = "'Xqci' (Qualcomm uC Extension)",
+        .dependencies = featureSet(&[_]Feature{
+            .xqcia,
+            .xqciac,
+            .xqcibi,
+            .xqcibm,
+            .xqcicli,
+            .xqcicm,
+            .xqcics,
+            .xqcicsr,
+            .xqciint,
+            .xqciio,
+            .xqcilb,
+            .xqcili,
+            .xqcilia,
+            .xqcilo,
+            .xqcilsm,
+            .xqcisim,
+            .xqcisls,
+            .xqcisync,
+        }),
+    };
+    result[@intFromEnum(Feature.xqcia)] = .{
+        .llvm_name = "xqcia",
+        .description = "'Xqcia' (Qualcomm uC Arithmetic Extension)",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
+    result[@intFromEnum(Feature.xqciac)] = .{
+        .llvm_name = "xqciac",
+        .description = "'Xqciac' (Qualcomm uC Load-Store Address Calculation Extension)",
+        .dependencies = featureSet(&[_]Feature{
+            .zca,
+        }),
+    };
+    result[@intFromEnum(Feature.xqcibi)] = .{
+        .llvm_name = "xqcibi",
+        .description = "'Xqcibi' (Qualcomm uC Branch Immediate Extension)",
+        .dependencies = featureSet(&[_]Feature{
+            .zca,
+        }),
+    };
+    result[@intFromEnum(Feature.xqcibm)] = .{
+        .llvm_name = "xqcibm",
+        .description = "'Xqcibm' (Qualcomm uC Bit Manipulation Extension)",
+        .dependencies = featureSet(&[_]Feature{
+            .zca,
+        }),
+    };
+    result[@intFromEnum(Feature.xqcicli)] = .{
+        .llvm_name = "xqcicli",
+        .description = "'Xqcicli' (Qualcomm uC Conditional Load Immediate Extension)",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
+    result[@intFromEnum(Feature.xqcicm)] = .{
+        .llvm_name = "xqcicm",
+        .description = "'Xqcicm' (Qualcomm uC Conditional Move Extension)",
+        .dependencies = featureSet(&[_]Feature{
+            .zca,
+        }),
+    };
+    result[@intFromEnum(Feature.xqcics)] = .{
+        .llvm_name = "xqcics",
+        .description = "'Xqcics' (Qualcomm uC Conditional Select Extension)",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
+    result[@intFromEnum(Feature.xqcicsr)] = .{
+        .llvm_name = "xqcicsr",
+        .description = "'Xqcicsr' (Qualcomm uC CSR Extension)",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
+    result[@intFromEnum(Feature.xqciint)] = .{
+        .llvm_name = "xqciint",
+        .description = "'Xqciint' (Qualcomm uC Interrupts Extension)",
+        .dependencies = featureSet(&[_]Feature{
+            .zca,
+        }),
+    };
+    result[@intFromEnum(Feature.xqciio)] = .{
+        .llvm_name = "xqciio",
+        .description = "'Xqciio' (Qualcomm uC External Input Output Extension)",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
+    result[@intFromEnum(Feature.xqcilb)] = .{
+        .llvm_name = "xqcilb",
+        .description = "'Xqcilb' (Qualcomm uC Long Branch Extension)",
+        .dependencies = featureSet(&[_]Feature{
+            .zca,
+        }),
+    };
+    result[@intFromEnum(Feature.xqcili)] = .{
+        .llvm_name = "xqcili",
+        .description = "'Xqcili' (Qualcomm uC Load Large Immediate Extension)",
+        .dependencies = featureSet(&[_]Feature{
+            .zca,
+        }),
+    };
+    result[@intFromEnum(Feature.xqcilia)] = .{
+        .llvm_name = "xqcilia",
+        .description = "'Xqcilia' (Qualcomm uC Large Immediate Arithmetic Extension)",
+        .dependencies = featureSet(&[_]Feature{
+            .zca,
+        }),
+    };
+    result[@intFromEnum(Feature.xqcilo)] = .{
+        .llvm_name = "xqcilo",
+        .description = "'Xqcilo' (Qualcomm uC Large Offset Load Store Extension)",
+        .dependencies = featureSet(&[_]Feature{
+            .zca,
+        }),
+    };
+    result[@intFromEnum(Feature.xqcilsm)] = .{
+        .llvm_name = "xqcilsm",
+        .description = "'Xqcilsm' (Qualcomm uC Load Store Multiple Extension)",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
+    result[@intFromEnum(Feature.xqcisim)] = .{
+        .llvm_name = "xqcisim",
+        .description = "'Xqcisim' (Qualcomm uC Simulation Hint Extension)",
+        .dependencies = featureSet(&[_]Feature{
+            .zca,
+        }),
+    };
+    result[@intFromEnum(Feature.xqcisls)] = .{
+        .llvm_name = "xqcisls",
+        .description = "'Xqcisls' (Qualcomm uC Scaled Load Store Extension)",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
+    result[@intFromEnum(Feature.xqcisync)] = .{
+        .llvm_name = "xqcisync",
+        .description = "'Xqcisync' (Qualcomm uC Sync Delay Extension)",
+        .dependencies = featureSet(&[_]Feature{
+            .zca,
+        }),
+    };
     result[@intFromEnum(Feature.xsfcease)] = .{
         .llvm_name = "xsfcease",
         .description = "'XSfcease' (SiFive sf.cease Instruction)",
@@ -1684,6 +1856,40 @@ pub const all_features = blk: {
             .zve32x,
         }),
     };
+    result[@intFromEnum(Feature.xsfvfbfexp16e)] = .{
+        .llvm_name = "xsfvfbfexp16e",
+        .description = "'XSfvfbfexp16e' (SiFive Vector Floating-Point Exponential Function Instruction, BFloat16)",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
+    result[@intFromEnum(Feature.xsfvfexp16e)] = .{
+        .llvm_name = "xsfvfexp16e",
+        .description = "'XSfvfexp16e' (SiFive Vector Floating-Point Exponential Function Instruction, Half Precision)",
+        .dependencies = featureSet(&[_]Feature{
+            .zvfh,
+        }),
+    };
+    result[@intFromEnum(Feature.xsfvfexp32e)] = .{
+        .llvm_name = "xsfvfexp32e",
+        .description = "'XSfvfexp32e' (SiFive Vector Floating-Point Exponential Function Instruction, Single Precision)",
+        .dependencies = featureSet(&[_]Feature{
+            .zve32f,
+        }),
+    };
+    result[@intFromEnum(Feature.xsfvfexpa)] = .{
+        .llvm_name = "xsfvfexpa",
+        .description = "'XSfvfexpa' (SiFive Vector Floating-Point Exponential Approximation Instruction)",
+        .dependencies = featureSet(&[_]Feature{
+            .zve32f,
+        }),
+    };
+    result[@intFromEnum(Feature.xsfvfexpa64e)] = .{
+        .llvm_name = "xsfvfexpa64e",
+        .description = "'XSfvfexpa64e' (SiFive Vector Floating-Point Exponential Approximation Instruction with Double-Precision)",
+        .dependencies = featureSet(&[_]Feature{
+            .xsfvfexpa,
+            .zve64d,
+        }),
+    };
     result[@intFromEnum(Feature.xsfvfnrclipxfqf)] = .{
         .llvm_name = "xsfvfnrclipxfqf",
         .description = "'XSfvfnrclipxfqf' (SiFive FP32-to-int8 Ranged Clip Instructions)",
@@ -1696,6 +1902,7 @@ pub const all_features = blk: {
         .description = "'XSfvfwmaccqqq' (SiFive Matrix Multiply Accumulate Instruction (4-by-4))",
         .dependencies = featureSet(&[_]Feature{
             .zvfbfmin,
+            .zvl128b,
         }),
     };
     result[@intFromEnum(Feature.xsfvqmaccdod)] = .{
@@ -1703,6 +1910,7 @@ pub const all_features = blk: {
         .description = "'XSfvqmaccdod' (SiFive Int8 Matrix Multiplication Instructions (2-by-8 and 8-by-2))",
         .dependencies = featureSet(&[_]Feature{
             .zve32x,
+            .zvl128b,
         }),
     };
     result[@intFromEnum(Feature.xsfvqmaccqoq)] = .{
@@ -1710,6 +1918,7 @@ pub const all_features = blk: {
         .description = "'XSfvqmaccqoq' (SiFive Int8 Matrix Multiplication Instructions (4-by-8 and 8-by-4))",
         .dependencies = featureSet(&[_]Feature{
             .zve32x,
+            .zvl256b,
         }),
     };
     result[@intFromEnum(Feature.xsifivecdiscarddlone)] = .{
@@ -1722,6 +1931,13 @@ pub const all_features = blk: {
         .description = "'XSiFivecflushdlone' (SiFive sf.cflush.d.l1 Instruction)",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.xsmtvdot)] = .{
+        .llvm_name = "xsmtvdot",
+        .description = "'XSMTVDot' (SpacemiT Vector Dot Product Extension)",
+        .dependencies = featureSet(&[_]Feature{
+            .zve32f,
+        }),
+    };
     result[@intFromEnum(Feature.xtheadba)] = .{
         .llvm_name = "xtheadba",
         .description = "'XTHeadBa' (T-Head address calculation instructions)",
@@ -1820,6 +2036,11 @@ pub const all_features = blk: {
             .zaamo,
         }),
     };
+    result[@intFromEnum(Feature.zalasr)] = .{
+        .llvm_name = "zalasr",
+        .description = "'Zalasr' (Load-Acquire and Store-Release Instructions)",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.zalrsc)] = .{
         .llvm_name = "zalrsc",
         .description = "'Zalrsc' (Load-Reserved/Store-Conditional)",
@@ -2092,6 +2313,11 @@ pub const all_features = blk: {
         .description = "'Zilsd' (Load/Store Pair Instructions)",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.zilsd_4byte_align)] = .{
+        .llvm_name = "zilsd-4byte-align",
+        .description = "Allow 4-byte alignment for Zilsd LD/SD instructions",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.zimop)] = .{
         .llvm_name = "zimop",
         .description = "'Zimop' (May-Be-Operations)",
@@ -2461,7 +2687,7 @@ pub const cpu = struct {
         .features = featureSet(&[_]Feature{
             .andes45,
             .no_default_unroll,
-            .short_forward_branch_opt,
+            .short_forward_branch_ialu,
             .use_postra_scheduler,
         }),
     };
@@ -2491,7 +2717,7 @@ pub const cpu = struct {
             .i,
             .m,
             .no_default_unroll,
-            .short_forward_branch_opt,
+            .short_forward_branch_ialu,
             .use_postra_scheduler,
             .xandesperf,
             .zifencei,
@@ -2523,7 +2749,7 @@ pub const cpu = struct {
             .i,
             .m,
             .no_default_unroll,
-            .short_forward_branch_opt,
+            .short_forward_branch_ialu,
             .use_postra_scheduler,
             .xandesperf,
             .zifencei,
@@ -2540,7 +2766,7 @@ pub const cpu = struct {
             .i,
             .m,
             .no_default_unroll,
-            .short_forward_branch_opt,
+            .short_forward_branch_ialu,
             .use_postra_scheduler,
             .v,
             .xandesperf,
@@ -2559,7 +2785,7 @@ pub const cpu = struct {
             .i,
             .m,
             .no_default_unroll,
-            .short_forward_branch_opt,
+            .short_forward_branch_ialu,
             .use_postra_scheduler,
             .xandesperf,
             .zifencei,
@@ -2577,7 +2803,7 @@ pub const cpu = struct {
             .i,
             .m,
             .no_default_unroll,
-            .short_forward_branch_opt,
+            .short_forward_branch_ialu,
             .use_postra_scheduler,
             .xandesperf,
             .zifencei,
@@ -2648,6 +2874,7 @@ pub const cpu = struct {
             .mips_p8700,
             .xmipscbop,
             .xmipscmov,
+            .xmipsexectl,
             .xmipslsp,
             .zba,
             .zbb,
@@ -2703,7 +2930,7 @@ pub const cpu = struct {
         .llvm_name = "sifive-7-series",
         .features = featureSet(&[_]Feature{
             .no_default_unroll,
-            .short_forward_branch_opt,
+            .short_forward_branch_ialu,
             .use_postra_scheduler,
         }),
     };
@@ -2782,7 +3009,7 @@ pub const cpu = struct {
             .i,
             .m,
             .no_default_unroll,
-            .short_forward_branch_opt,
+            .short_forward_branch_ialu,
             .use_postra_scheduler,
             .zifencei,
         }),
@@ -2815,7 +3042,6 @@ pub const cpu = struct {
             .ziccif,
             .zicclsm,
             .ziccrse,
-            .zicntr,
             .zifencei,
             .zihintntl,
             .zihintpause,
@@ -2855,7 +3081,6 @@ pub const cpu = struct {
             .ziccif,
             .zicclsm,
             .ziccrse,
-            .zicntr,
             .zifencei,
             .zihintntl,
             .zihintpause,
@@ -2918,7 +3143,6 @@ pub const cpu = struct {
             .ziccif,
             .zicclsm,
             .ziccrse,
-            .zicntr,
             .zifencei,
             .zihintntl,
             .zihintpause,
@@ -3035,7 +3259,7 @@ pub const cpu = struct {
             .i,
             .m,
             .no_default_unroll,
-            .short_forward_branch_opt,
+            .short_forward_branch_ialu,
             .use_postra_scheduler,
             .zifencei,
             .zihintpause,
@@ -3065,7 +3289,7 @@ pub const cpu = struct {
             .i,
             .m,
             .no_default_unroll,
-            .short_forward_branch_opt,
+            .short_forward_branch_ialu,
             .use_postra_scheduler,
             .zifencei,
         }),
@@ -3083,7 +3307,7 @@ pub const cpu = struct {
             .no_default_unroll,
             .optimized_nf2_segment_load_store,
             .optimized_zero_stride_load,
-            .short_forward_branch_opt,
+            .short_forward_branch_ialu,
             .use_postra_scheduler,
             .v,
             .vl_dependent_latency,
@@ -3111,7 +3335,8 @@ pub const cpu = struct {
             .no_default_unroll,
             .optimized_nf2_segment_load_store,
             .optimized_zero_stride_load,
-            .short_forward_branch_opt,
+            .short_forward_branch_ialu,
+            .single_element_vec_fp64,
             .use_postra_scheduler,
             .v,
             .vl_dependent_latency,
@@ -3173,6 +3398,7 @@ pub const cpu = struct {
             .unaligned_scalar_mem,
             .v,
             .vxrm_pipeline_flush,
+            .xsmtvdot,
             .za64rs,
             .zbc,
             .zbkc,
@@ -3341,6 +3567,13 @@ pub const cpu = struct {
             .log_vrgather,
             .m,
             .no_default_unroll,
+            .optimized_nf2_segment_load_store,
+            .optimized_nf3_segment_load_store,
+            .optimized_nf4_segment_load_store,
+            .optimized_nf5_segment_load_store,
+            .optimized_nf6_segment_load_store,
+            .optimized_nf7_segment_load_store,
+            .optimized_nf8_segment_load_store,
             .optimized_zero_stride_load,
             .sha,
             .smaia,
@@ -3400,12 +3633,17 @@ pub const cpu = struct {
         .features = featureSet(&[_]Feature{
             .@"64bit",
             .a,
+            .add_load_fusion,
             .auipc_addi_fusion,
+            .auipc_load_fusion,
             .c,
             .d,
+            .disable_misched_load_clustering,
+            .disable_postmisched_load_clustering,
+            .disable_postmisched_store_clustering,
             .i,
-            .ld_add_fusion,
             .lui_addi_fusion,
+            .lui_load_fusion,
             .m,
             .shifted_zextw_fusion,
             .ventana_veyron,
diff --git a/lib/std/Target/sparc.zig b/lib/std/Target/sparc.zig
index e4b7f73e48..d7a0f8f746 100644
--- a/lib/std/Target/sparc.zig
+++ b/lib/std/Target/sparc.zig
@@ -5,6 +5,7 @@ const CpuFeature = std.Target.Cpu.Feature;
 const CpuModel = std.Target.Cpu.Model;
 
 pub const Feature = enum {
+    @"64bit",
     crypto,
     deprecated_v8,
     detectroundchange,
@@ -23,6 +24,7 @@ pub const Feature = enum {
     leonpwrpsr,
     no_fmuls,
     no_fsmuld,
+    no_predictor,
     osa2011,
     popc,
     reserve_g1,
@@ -73,6 +75,13 @@ pub const all_features = blk: {
     const len = @typeInfo(Feature).@"enum".fields.len;
     std.debug.assert(len <= CpuFeature.Set.needed_bit_count);
     var result: [len]CpuFeature = undefined;
+    result[@intFromEnum(Feature.@"64bit")] = .{
+        .llvm_name = "64bit",
+        .description = "Enable 64-bit mode",
+        .dependencies = featureSet(&[_]Feature{
+            .v9,
+        }),
+    };
     result[@intFromEnum(Feature.crypto)] = .{
         .llvm_name = "crypto",
         .description = "Enable cryptographic extensions",
@@ -165,6 +174,11 @@ pub const all_features = blk: {
         .description = "Disable the fsmuld instruction.",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.no_predictor)] = .{
+        .llvm_name = "no-predictor",
+        .description = "Processor has no branch predictor, branches stall execution",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.osa2011)] = .{
         .llvm_name = "osa2011",
         .description = "Enable Oracle SPARC Architecture 2011 extensions",
@@ -586,6 +600,7 @@ pub const cpu = struct {
         .llvm_name = "niagara",
         .features = featureSet(&[_]Feature{
             .deprecated_v8,
+            .no_predictor,
             .ua2005,
         }),
     };
@@ -594,6 +609,7 @@ pub const cpu = struct {
         .llvm_name = "niagara2",
         .features = featureSet(&[_]Feature{
             .deprecated_v8,
+            .no_predictor,
             .popc,
             .ua2005,
         }),
@@ -603,6 +619,7 @@ pub const cpu = struct {
         .llvm_name = "niagara3",
         .features = featureSet(&[_]Feature{
             .deprecated_v8,
+            .no_predictor,
             .popc,
             .ua2005,
             .ua2007,
diff --git a/lib/std/Target/wasm.zig b/lib/std/Target/wasm.zig
index 3862a91edd..d9b171408f 100644
--- a/lib/std/Target/wasm.zig
+++ b/lib/std/Target/wasm.zig
@@ -12,6 +12,7 @@ pub const Feature = enum {
     exception_handling,
     extended_const,
     fp16,
+    gc,
     multimemory,
     multivalue,
     mutable_globals,
@@ -71,6 +72,11 @@ pub const all_features = blk: {
         .description = "Enable FP16 instructions",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.gc)] = .{
+        .llvm_name = "gc",
+        .description = "Enable wasm gc",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.multimemory)] = .{
         .llvm_name = "multimemory",
         .description = "Enable multiple memories",
@@ -148,6 +154,7 @@ pub const cpu = struct {
             .exception_handling,
             .extended_const,
             .fp16,
+            .gc,
             .multimemory,
             .multivalue,
             .mutable_globals,
diff --git a/lib/std/Target/x86.zig b/lib/std/Target/x86.zig
index df0110089d..eb5dcc0b20 100644
--- a/lib/std/Target/x86.zig
+++ b/lib/std/Target/x86.zig
@@ -22,7 +22,6 @@ pub const Feature = enum {
     amx_movrs,
     amx_tf32,
     amx_tile,
-    amx_transpose,
     avx,
     avx10_1,
     avx10_2,
@@ -67,7 +66,6 @@ pub const Feature = enum {
     egpr,
     enqcmd,
     ermsb,
-    evex512,
     f16c,
     false_deps_getmant,
     false_deps_lzcnt_tzcnt,
@@ -136,6 +134,7 @@ pub const Feature = enum {
     ppx,
     prefer_128_bit,
     prefer_256_bit,
+    prefer_legacy_setcc,
     prefer_mask_registers,
     prefer_movmsk_over_vtest,
     prefer_no_gather,
@@ -168,6 +167,7 @@ pub const Feature = enum {
     slow_lea,
     slow_pmaddwd,
     slow_pmulld,
+    slow_pmullq,
     slow_shld,
     slow_two_mem_ops,
     slow_unaligned_mem_16,
@@ -199,6 +199,7 @@ pub const Feature = enum {
     waitpkg,
     wbnoinvd,
     widekl,
+    x32,
     x87,
     xop,
     xsave,
@@ -324,13 +325,6 @@ pub const all_features = blk: {
         .description = "Support AMX-TILE instructions",
         .dependencies = featureSet(&[_]Feature{}),
     };
-    result[@intFromEnum(Feature.amx_transpose)] = .{
-        .llvm_name = "amx-transpose",
-        .description = "Support AMX amx-transpose instructions",
-        .dependencies = featureSet(&[_]Feature{
-            .amx_tile,
-        }),
-    };
     result[@intFromEnum(Feature.avx)] = .{
         .llvm_name = "avx",
         .description = "Enable AVX instructions",
@@ -339,8 +333,8 @@ pub const all_features = blk: {
         }),
     };
     result[@intFromEnum(Feature.avx10_1)] = .{
-        .llvm_name = "avx10.1-512",
-        .description = "Support AVX10.1 up to 512-bit instruction",
+        .llvm_name = "avx10.1",
+        .description = "Support AVX10.1 instruction",
         .dependencies = featureSet(&[_]Feature{
             .avx512bf16,
             .avx512bitalg,
@@ -356,8 +350,8 @@ pub const all_features = blk: {
         }),
     };
     result[@intFromEnum(Feature.avx10_2)] = .{
-        .llvm_name = "avx10.2-512",
-        .description = "Support AVX10.2 up to 512-bit instruction",
+        .llvm_name = "avx10.2",
+        .description = "Support AVX10.2 instruction",
         .dependencies = featureSet(&[_]Feature{
             .avx10_1,
         }),
@@ -416,7 +410,6 @@ pub const all_features = blk: {
         .description = "Enable AVX-512 instructions",
         .dependencies = featureSet(&[_]Feature{
             .avx2,
-            .evex512,
             .f16c,
             .fma,
         }),
@@ -616,11 +609,6 @@ pub const all_features = blk: {
         .description = "REP MOVS/STOS are fast",
         .dependencies = featureSet(&[_]Feature{}),
     };
-    result[@intFromEnum(Feature.evex512)] = .{
-        .llvm_name = "evex512",
-        .description = "Support ZMM and 64-bit mask instructions",
-        .dependencies = featureSet(&[_]Feature{}),
-    };
     result[@intFromEnum(Feature.f16c)] = .{
         .llvm_name = "f16c",
         .description = "Support 16-bit floating point conversion instructions",
@@ -974,6 +962,11 @@ pub const all_features = blk: {
         .description = "Prefer 256-bit AVX instructions",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.prefer_legacy_setcc)] = .{
+        .llvm_name = "prefer-legacy-setcc",
+        .description = "Prefer to emit legacy SetCC.",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.prefer_mask_registers)] = .{
         .llvm_name = "prefer-mask-registers",
         .description = "Prefer AVX512 mask registers over PTEST/MOVMSK",
@@ -1145,6 +1138,11 @@ pub const all_features = blk: {
         .description = "PMULLD instruction is slow (compared to PMULLW/PMULHW and PMULUDQ)",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.slow_pmullq)] = .{
+        .llvm_name = "slow-pmullq",
+        .description = "PMULLQ instruction is slow",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.slow_shld)] = .{
         .llvm_name = "slow-shld",
         .description = "SHLD instruction is slow",
@@ -1325,6 +1323,11 @@ pub const all_features = blk: {
             .kl,
         }),
     };
+    result[@intFromEnum(Feature.x32)] = .{
+        .llvm_name = "x32",
+        .description = "64-bit with ILP32 programming model (e.g. x32 ABI)",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.x87)] = .{
         .llvm_name = "x87",
         .description = "Enable X87 float instructions",
@@ -1393,7 +1396,6 @@ pub const cpu = struct {
             .cx16,
             .f16c,
             .false_deps_perm,
-            .false_deps_popcnt,
             .fast_15bytenop,
             .fast_gather,
             .fast_scalar_fsqrt,
@@ -1432,6 +1434,7 @@ pub const cpu = struct {
             .sha,
             .shstk,
             .slow_3ops_lea,
+            .slow_pmullq,
             .smap,
             .smep,
             .tuning_fast_imm_vector_shift,
@@ -1490,7 +1493,6 @@ pub const cpu = struct {
             .enqcmd,
             .f16c,
             .false_deps_perm,
-            .false_deps_popcnt,
             .fast_15bytenop,
             .fast_gather,
             .fast_scalar_fsqrt,
@@ -1529,6 +1531,7 @@ pub const cpu = struct {
             .sha,
             .shstk,
             .slow_3ops_lea,
+            .slow_pmullq,
             .smap,
             .smep,
             .tuning_fast_imm_vector_shift,
@@ -1566,7 +1569,6 @@ pub const cpu = struct {
             .enqcmd,
             .f16c,
             .false_deps_perm,
-            .false_deps_popcnt,
             .fast_15bytenop,
             .fast_gather,
             .fast_scalar_fsqrt,
@@ -1606,6 +1608,7 @@ pub const cpu = struct {
             .sha512,
             .shstk,
             .slow_3ops_lea,
+            .slow_pmullq,
             .sm3,
             .sm4,
             .smap,
@@ -2204,6 +2207,7 @@ pub const cpu = struct {
             .sahf,
             .sha,
             .slow_3ops_lea,
+            .slow_pmullq,
             .smap,
             .smep,
             .tuning_fast_imm_vector_shift,
@@ -2297,7 +2301,6 @@ pub const cpu = struct {
             .enqcmd,
             .f16c,
             .false_deps_perm,
-            .false_deps_popcnt,
             .fast_15bytenop,
             .fast_gather,
             .fast_scalar_fsqrt,
@@ -2338,6 +2341,7 @@ pub const cpu = struct {
             .sha512,
             .shstk,
             .slow_3ops_lea,
+            .slow_pmullq,
             .sm3,
             .sm4,
             .tuning_fast_imm_vector_shift,
@@ -2464,7 +2468,6 @@ pub const cpu = struct {
             .amx_int8,
             .amx_movrs,
             .amx_tf32,
-            .amx_transpose,
             .avx10_2,
             .avxifma,
             .avxneconvert,
@@ -2475,7 +2478,6 @@ pub const cpu = struct {
             .bmi2,
             .branch_hint,
             .ccmp,
-            .cf,
             .cldemote,
             .clflushopt,
             .clwb,
@@ -2533,12 +2535,12 @@ pub const cpu = struct {
             .sha,
             .sha512,
             .shstk,
+            .slow_pmullq,
             .sm3,
             .sm4,
             .tsxldtrk,
             .tuning_fast_imm_vector_shift,
             .uintr,
-            .usermsr,
             .vaes,
             .vpclmulqdq,
             .vzeroupper,
@@ -2622,6 +2624,7 @@ pub const cpu = struct {
             .serialize,
             .sha,
             .shstk,
+            .slow_pmullq,
             .smap,
             .smep,
             .tsxldtrk,
@@ -2935,6 +2938,7 @@ pub const cpu = struct {
             .serialize,
             .sha,
             .shstk,
+            .slow_pmullq,
             .tsxldtrk,
             .tuning_fast_imm_vector_shift,
             .uintr,
@@ -3024,6 +3028,7 @@ pub const cpu = struct {
             .serialize,
             .sha,
             .shstk,
+            .slow_pmullq,
             .tsxldtrk,
             .tuning_fast_imm_vector_shift,
             .uintr,
@@ -3181,6 +3186,7 @@ pub const cpu = struct {
             .rdseed,
             .sahf,
             .sha,
+            .slow_pmullq,
             .tuning_fast_imm_vector_shift,
             .vaes,
             .vpclmulqdq,
@@ -3245,6 +3251,7 @@ pub const cpu = struct {
             .rdseed,
             .sahf,
             .sha,
+            .slow_pmullq,
             .tuning_fast_imm_vector_shift,
             .vaes,
             .vpclmulqdq,
@@ -3475,7 +3482,6 @@ pub const cpu = struct {
             .enqcmd,
             .f16c,
             .false_deps_perm,
-            .false_deps_popcnt,
             .fast_15bytenop,
             .fast_gather,
             .fast_scalar_fsqrt,
@@ -3515,6 +3521,7 @@ pub const cpu = struct {
             .sha512,
             .shstk,
             .slow_3ops_lea,
+            .slow_pmullq,
             .sm3,
             .sm4,
             .tuning_fast_imm_vector_shift,
@@ -3546,7 +3553,6 @@ pub const cpu = struct {
             .cx16,
             .f16c,
             .false_deps_perm,
-            .false_deps_popcnt,
             .fast_15bytenop,
             .fast_gather,
             .fast_scalar_fsqrt,
@@ -3585,6 +3591,7 @@ pub const cpu = struct {
             .sha,
             .shstk,
             .slow_3ops_lea,
+            .slow_pmullq,
             .smap,
             .smep,
             .tuning_fast_imm_vector_shift,
@@ -3635,6 +3642,90 @@ pub const cpu = struct {
             .x87,
         }),
     };
+    pub const novalake: CpuModel = .{
+        .name = "novalake",
+        .llvm_name = "novalake",
+        .features = featureSet(&[_]Feature{
+            .@"64bit",
+            .adx,
+            .allow_light_256_bit,
+            .avx10_2,
+            .avxifma,
+            .avxneconvert,
+            .avxvnni,
+            .avxvnniint16,
+            .avxvnniint8,
+            .bmi,
+            .bmi2,
+            .ccmp,
+            .clflushopt,
+            .clwb,
+            .cmov,
+            .cmpccxadd,
+            .cx16,
+            .egpr,
+            .enqcmd,
+            .false_deps_perm,
+            .fast_15bytenop,
+            .fast_gather,
+            .fast_scalar_fsqrt,
+            .fast_shld_rotate,
+            .fast_variable_crosslane_shuffle,
+            .fast_variable_perlane_shuffle,
+            .fast_vector_fsqrt,
+            .fsgsbase,
+            .fxsr,
+            .gfni,
+            .hreset,
+            .idivq_to_divl,
+            .invpcid,
+            .lzcnt,
+            .macrofusion,
+            .mmx,
+            .movbe,
+            .movdir64b,
+            .movdiri,
+            .movrs,
+            .ndd,
+            .nf,
+            .no_bypass_delay_blend,
+            .no_bypass_delay_mov,
+            .no_bypass_delay_shuffle,
+            .nopl,
+            .pconfig,
+            .pku,
+            .popcnt,
+            .ppx,
+            .prefer_movmsk_over_vtest,
+            .prefetchi,
+            .prfchw,
+            .ptwrite,
+            .push2pop2,
+            .rdpid,
+            .rdrnd,
+            .rdseed,
+            .sahf,
+            .serialize,
+            .sha,
+            .sha512,
+            .shstk,
+            .slow_3ops_lea,
+            .slow_pmullq,
+            .sm3,
+            .sm4,
+            .tuning_fast_imm_vector_shift,
+            .uintr,
+            .vaes,
+            .vpclmulqdq,
+            .vzeroupper,
+            .waitpkg,
+            .x87,
+            .xsavec,
+            .xsaveopt,
+            .xsaves,
+            .zu,
+        }),
+    };
     pub const opteron: CpuModel = .{
         .name = "opteron",
         .llvm_name = "opteron",
@@ -3697,7 +3788,6 @@ pub const cpu = struct {
             .enqcmd,
             .f16c,
             .false_deps_perm,
-            .false_deps_popcnt,
             .fast_15bytenop,
             .fast_gather,
             .fast_scalar_fsqrt,
@@ -3726,7 +3816,6 @@ pub const cpu = struct {
             .pku,
             .popcnt,
             .prefer_movmsk_over_vtest,
-            .prefetchi,
             .prfchw,
             .ptwrite,
             .rdpid,
@@ -3738,6 +3827,7 @@ pub const cpu = struct {
             .sha512,
             .shstk,
             .slow_3ops_lea,
+            .slow_pmullq,
             .sm3,
             .sm4,
             .tuning_fast_imm_vector_shift,
@@ -3908,7 +3998,6 @@ pub const cpu = struct {
             .cx16,
             .f16c,
             .false_deps_perm,
-            .false_deps_popcnt,
             .fast_15bytenop,
             .fast_gather,
             .fast_scalar_fsqrt,
@@ -3947,6 +4036,7 @@ pub const cpu = struct {
             .sha,
             .shstk,
             .slow_3ops_lea,
+            .slow_pmullq,
             .smap,
             .smep,
             .tuning_fast_imm_vector_shift,
@@ -4013,6 +4103,7 @@ pub const cpu = struct {
             .rdseed,
             .sahf,
             .sha,
+            .slow_pmullq,
             .smap,
             .smep,
             .tuning_fast_imm_vector_shift,
@@ -4124,6 +4215,7 @@ pub const cpu = struct {
             .serialize,
             .sha,
             .shstk,
+            .slow_pmullq,
             .smap,
             .smep,
             .tsxldtrk,
@@ -4494,6 +4586,7 @@ pub const cpu = struct {
             .sahf,
             .sha,
             .shstk,
+            .slow_pmullq,
             .smap,
             .smep,
             .tuning_fast_imm_vector_shift,
@@ -4567,6 +4660,82 @@ pub const cpu = struct {
             .x87,
         }),
     };
+    pub const wildcatlake: CpuModel = .{
+        .name = "wildcatlake",
+        .llvm_name = "wildcatlake",
+        .features = featureSet(&[_]Feature{
+            .@"64bit",
+            .adx,
+            .allow_light_256_bit,
+            .avxifma,
+            .avxneconvert,
+            .avxvnni,
+            .avxvnniint16,
+            .avxvnniint8,
+            .bmi,
+            .bmi2,
+            .clflushopt,
+            .clwb,
+            .cmov,
+            .cmpccxadd,
+            .cx16,
+            .enqcmd,
+            .f16c,
+            .false_deps_perm,
+            .fast_15bytenop,
+            .fast_gather,
+            .fast_scalar_fsqrt,
+            .fast_shld_rotate,
+            .fast_variable_crosslane_shuffle,
+            .fast_variable_perlane_shuffle,
+            .fast_vector_fsqrt,
+            .fma,
+            .fsgsbase,
+            .fxsr,
+            .gfni,
+            .hreset,
+            .idivq_to_divl,
+            .invpcid,
+            .lzcnt,
+            .macrofusion,
+            .mmx,
+            .movbe,
+            .movdir64b,
+            .movdiri,
+            .no_bypass_delay_blend,
+            .no_bypass_delay_mov,
+            .no_bypass_delay_shuffle,
+            .nopl,
+            .pconfig,
+            .pku,
+            .popcnt,
+            .prefer_movmsk_over_vtest,
+            .prfchw,
+            .ptwrite,
+            .rdpid,
+            .rdrnd,
+            .rdseed,
+            .sahf,
+            .serialize,
+            .sha,
+            .sha512,
+            .shstk,
+            .slow_3ops_lea,
+            .slow_pmullq,
+            .sm3,
+            .sm4,
+            .tuning_fast_imm_vector_shift,
+            .uintr,
+            .vaes,
+            .vpclmulqdq,
+            .vzeroupper,
+            .waitpkg,
+            .x87,
+            .xsavec,
+            .xsaveopt,
+            .xsaves,
+        }),
+    };
     pub const winchip2: CpuModel = .{
         .name = "winchip2",
         .llvm_name = "winchip2",
diff --git a/lib/std/Target/xtensa.zig b/lib/std/Target/xtensa.zig
index 474a0227ba..9009a7640d 100644
--- a/lib/std/Target/xtensa.zig
+++ b/lib/std/Target/xtensa.zig
@@ -15,6 +15,7 @@ pub const Feature = enum {
     div32,
     exception,
     extendedl32r,
+    forced_atomics,
     fp,
     highpriinterrupts,
     highpriinterrupts_level3,
@@ -34,6 +35,7 @@ pub const Feature = enum {
     prid,
     regprotect,
     rvector,
+    s32c1i,
     sext,
     threadptr,
     timers1,
@@ -101,6 +103,11 @@ pub const all_features = blk: {
         .description = "Enable Xtensa Extended L32R option",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.forced_atomics)] = .{
+        .llvm_name = "forced-atomics",
+        .description = "Assume that lock-free native-width atomics are available",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.fp)] = .{
         .llvm_name = "fp",
         .description = "Enable Xtensa Single FP instructions",
@@ -206,6 +213,11 @@ pub const all_features = blk: {
         .description = "Enable Xtensa Relocatable Vector option",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.s32c1i)] = .{
+        .llvm_name = "s32c1i",
+        .description = "Enable Xtensa S32C1I option",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.sext)] = .{
         .llvm_name = "sext",
         .description = "Enable Xtensa Sign Extend option",
@@ -245,6 +257,59 @@ pub const all_features = blk: {
 };
 
 pub const cpu = struct {
+    pub const esp32: CpuModel = .{
+        .name = "esp32",
+        .llvm_name = "esp32",
+        .features = featureSet(&[_]Feature{
+            .bool,
+            .clamps,
+            .coprocessor,
+            .dcache,
+            .debug,
+            .density,
+            .dfpaccel,
+            .div32,
+            .exception,
+            .fp,
+            .highpriinterrupts_level7,
+            .interrupt,
+            .loop,
+            .mac16,
+            .minmax,
+            .miscsr,
+            .mul16,
+            .mul32,
+            .mul32high,
+            .nsa,
+            .prid,
+            .regprotect,
+            .rvector,
+            .s32c1i,
+            .sext,
+            .threadptr,
+            .timers3,
+            .windowed,
+        }),
+    };
+    pub const esp8266: CpuModel = .{
+        .name = "esp8266",
+        .llvm_name = "esp8266",
+        .features = featureSet(&[_]Feature{
+            .debug,
+            .density,
+            .exception,
+            .extendedl32r,
+            .highpriinterrupts_level3,
+            .interrupt,
+            .mul16,
+            .mul32,
+            .nsa,
+            .prid,
+            .regprotect,
+            .rvector,
+            .timers1,
+        }),
+    };
     pub const generic: CpuModel = .{
         .name = "generic",
         .llvm_name = "generic",
diff --git a/lib/std/hash/xxhash.zig b/lib/std/hash/xxhash.zig
index 270759802e..107b608006 100644
--- a/lib/std/hash/xxhash.zig
+++ b/lib/std/hash/xxhash.zig
@@ -780,8 +780,6 @@ fn testExpect(comptime H: type, seed: anytype, input: []const u8, expected: u64)
 }
 
 test "xxhash3" {
-    if (builtin.cpu.arch.isMIPS64()) return error.SkipZigTest; // https://github.com/ziglang/zig/issues/23807
-
     const H = XxHash3;
     // Non-Seeded Tests
     try testExpect(H, 0, "", 0x2d06800538d394c2);
@@ -813,8 +811,6 @@ test "xxhash3" {
 }
 
 test "xxhash3 smhasher" {
-    if (builtin.cpu.arch.isMIPS64()) return error.SkipZigTest; // https://github.com/ziglang/zig/issues/23807
-
     const Test = struct {
         fn do() !void {
             try expectEqual(verify.smhasher(XxHash3.hash), 0x9a636405);
@@ -826,8 +822,6 @@ test "xxhash3 smhasher" {
 }
 
 test "xxhash3 iterative api" {
-    if (builtin.cpu.arch.isMIPS64()) return error.SkipZigTest; // https://github.com/ziglang/zig/issues/23807
-
     const Test = struct {
         fn do() !void {
             try verify.iterativeApi(XxHash3);
diff --git a/lib/std/math/big/int_test.zig b/lib/std/math/big/int_test.zig
index 661d8a3a32..4779d145a4 100644
--- a/lib/std/math/big/int_test.zig
+++ b/lib/std/math/big/int_test.zig
@@ -484,7 +484,7 @@ fn toFloat(comptime Float: type) !void {
     );
 }
 test toFloat {
-    if (builtin.zig_backend == .stage2_llvm) return error.SkipZigTest; // https://github.com/ziglang/zig/issues/24191
+    if (builtin.cpu.arch == .x86) return error.SkipZigTest;
     try toFloat(f16);
     try toFloat(f32);
     try toFloat(f64);
@@ -2801,8 +2801,6 @@ test "bitNotWrap more than two limbs" {
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
-    // LLVM: unexpected runtime library name: __umodei4
-    if (builtin.zig_backend == .stage2_llvm and comptime builtin.target.cpu.arch.isWasm()) return error.SkipZigTest; // TODO
 
     var a = try Managed.initSet(testing.allocator, maxInt(Limb));
     defer a.deinit();
diff --git a/lib/std/math/log10.zig b/lib/std/math/log10.zig
index 4c1bb412a0..8e5366a8e3 100644
--- a/lib/std/math/log10.zig
+++ b/lib/std/math/log10.zig
@@ -139,8 +139,6 @@ test log10_int {
     if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_llvm and comptime builtin.target.cpu.arch.isWasm()) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_llvm and comptime builtin.target.cpu.arch == .hexagon) return error.SkipZigTest;
 
     inline for (
         .{ u8, u16, u32, u64, u128, u256, u512 },
diff --git a/lib/std/math/modf.zig b/lib/std/math/modf.zig
index 99068d1a78..00ca26b01f 100644
--- a/lib/std/math/modf.zig
+++ b/lib/std/math/modf.zig
@@ -87,7 +87,6 @@ fn ModfTests(comptime T: type) type {
         test "vector" {
             if (builtin.os.tag.isDarwin() and builtin.cpu.arch == .aarch64) return error.SkipZigTest;
             if (builtin.cpu.arch == .s390x) return error.SkipZigTest;
-            if (comptime builtin.cpu.has(.loongarch, .lsx)) return error.SkipZigTest; // https://github.com/llvm/llvm-project/issues/159529
 
             const widths = [_]comptime_int{ 1, 2, 3, 4, 8, 16 };
 
diff --git a/lib/std/mem.zig b/lib/std/mem.zig
index 3d8f3a604e..04c47ae68c 100644
--- a/lib/std/mem.zig
+++ b/lib/std/mem.zig
@@ -4809,8 +4809,7 @@ pub fn doNotOptimizeAway(val: anytype) void {
             } else doNotOptimizeAway(&val);
         },
         .float => {
-            // https://github.com/llvm/llvm-project/issues/159200
-            if ((t.float.bits == 32 or t.float.bits == 64) and builtin.zig_backend != .stage2_c and !builtin.cpu.arch.isLoongArch()) {
+            if ((t.float.bits == 32 or t.float.bits == 64) and builtin.zig_backend != .stage2_c) {
                 asm volatile (""
                     :
                     : [_] "rm" (val),
diff --git a/lib/std/os/linux/test.zig b/lib/std/os/linux/test.zig
index 63e0d2f96a..043c04bf1f 100644
--- a/lib/std/os/linux/test.zig
+++ b/lib/std/os/linux/test.zig
@@ -10,8 +10,6 @@ const expectEqual = std.testing.expectEqual;
 const fs = std.fs;
 
 test "fallocate" {
-    if (builtin.cpu.arch.isMIPS64() and (builtin.abi == .gnuabin32 or builtin.abi == .muslabin32)) return error.SkipZigTest; // https://codeberg.org/ziglang/zig/issues/30220
-
     const io = std.testing.io;
 
     var tmp = std.testing.tmpDir(.{});
diff --git a/lib/std/os/windows.zig b/lib/std/os/windows.zig
index 5655c10b8e..a7a1852c4c 100644
--- a/lib/std/os/windows.zig
+++ b/lib/std/os/windows.zig
@@ -5665,7 +5665,136 @@ pub const PF = enum(DWORD) {
     ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE = 44,
 
     /// This Arm processor implements the Arm v8.3 LRCPC instructions (for example, LDAPR). Note that certain Arm v8.2 CPUs may optionally support the LRCPC instructions.
-    ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE,
+    ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE = 45,
+
+    /// This Arm processor implements the SVE (Scalable Vector Extension) instructions (FEAT_SVE).
+    ARM_SVE_INSTRUCTIONS_AVAILABLE = 46,
+
+    /// This Arm processor implements the SVE2 instructions (FEAT_SVE2).
+    ARM_SVE2_INSTRUCTIONS_AVAILABLE = 47,
+
+    /// This Arm processor implements the SVE2.1 instructions (FEAT_SVE2p1).
+    ARM_SVE2_1_INSTRUCTIONS_AVAILABLE = 48,
+
+    /// This Arm processor implements the SVE AES instructions (FEAT_SVE_AES).
+    ARM_SVE_AES_INSTRUCTIONS_AVAILABLE = 49,
+
+    /// This Arm processor implements the SVE 128-bit polynomial multiply long instructions (FEAT_SVE_PMULL128).
+    ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE = 50,
+
+    /// This Arm processor implements the SVE bit permute instructions (FEAT_SVE_BitPerm).
+    ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE = 51,
+
+    /// This Arm processor implements the SVE BF16 (BFloat16) instructions (FEAT_BF16).
+    ARM_SVE_BF16_INSTRUCTIONS_AVAILABLE = 52,
+
+    /// This Arm processor implements the SVE EBF16 (Extended BFloat16) instructions (FEAT_EBF16).
+    ARM_SVE_EBF16_INSTRUCTIONS_AVAILABLE = 53,
+
+    /// This Arm processor implements the SVE B16B16 instructions (FEAT_SVE_B16B16).
+    ARM_SVE_B16B16_INSTRUCTIONS_AVAILABLE = 54,
+
+    /// This Arm processor implements the SVE SHA-3 cryptographic instructions (FEAT_SVE_SHA3).
+    ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE = 55,
+
+    /// This Arm processor implements the SVE SM4 cryptographic instructions (FEAT_SVE_SM4).
+    ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE = 56,
+
+    /// This Arm processor implements the SVE I8MM (Int8 matrix multiply) instructions (FEAT_I8MM).
+    ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE = 57,
+
+    /// This Arm processor implements the SVE F32MM (FP32 matrix multiply) instructions (FEAT_F32MM).
+    ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE = 58,
+
+    /// This Arm processor implements the SVE F64MM (FP64 matrix multiply) instructions (FEAT_F64MM).
+    ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE = 59,
+
+    /// This x64 processor implements the BMI2 instruction set.
+    BMI2_INSTRUCTIONS_AVAILABLE = 60,
+
+    /// This x64 processor implements the MOVDIR64B instruction.
+    MOVDIR64B_INSTRUCTION_AVAILABLE = 61,
+
+    /// This Arm processor implements the LSE2 atomic instructions (FEAT_LSE2).
+    ARM_LSE2_AVAILABLE = 62,
+
+    /// This Arm processor implements the SHA-3 cryptographic instructions (FEAT_SHA3).
+    ARM_SHA3_INSTRUCTIONS_AVAILABLE = 64,
+
+    /// This Arm processor implements the SHA-512 cryptographic instructions (FEAT_SHA512).
+    ARM_SHA512_INSTRUCTIONS_AVAILABLE = 65,
+
+    /// This Arm processor implements the I8MM (Int8 matrix multiply) NEON instructions (FEAT_I8MM).
+    ARM_V82_I8MM_INSTRUCTIONS_AVAILABLE = 66,
+
+    /// This Arm processor implements the FP16 (half-precision floating point) NEON instructions (FEAT_FP16).
+    ARM_V82_FP16_INSTRUCTIONS_AVAILABLE = 67,
+
+    /// This Arm processor implements the BF16 (BFloat16) NEON instructions (FEAT_BF16).
+    ARM_V86_BF16_INSTRUCTIONS_AVAILABLE = 68,
+
+    /// This Arm processor implements the EBF16 (Extended BFloat16) NEON instructions (FEAT_EBF16).
+    ARM_V86_EBF16_INSTRUCTIONS_AVAILABLE = 69,
+
+    /// This Arm processor implements the SME (Scalable Matrix Extension) instructions (FEAT_SME).
+    ARM_SME_INSTRUCTIONS_AVAILABLE = 70,
+
+    /// This Arm processor implements the SME2 instructions (FEAT_SME2).
+    ARM_SME2_INSTRUCTIONS_AVAILABLE = 71,
+
+    /// This Arm processor implements the SME2.1 instructions (FEAT_SME2p1).
+    ARM_SME2_1_INSTRUCTIONS_AVAILABLE = 72,
+
+    /// This Arm processor implements the SME2.2 instructions (FEAT_SME2p2).
+    ARM_SME2_2_INSTRUCTIONS_AVAILABLE = 73,
+
+    /// This Arm processor implements the SVE AES instructions when in Streaming SVE mode (FEAT_SSVE_AES).
+    ARM_SME_AES_INSTRUCTIONS_AVAILABLE = 74,
+
+    /// This Arm processor implements the SVE bit permute instructions when in Streaming SVE mode (FEAT_SSVE_BitPerm).
+    ARM_SME_SBITPERM_INSTRUCTIONS_AVAILABLE = 75,
+
+    /// This Arm processor implements the SVE FMMLA (widening, 4-way, FP8 to FP16) instruction when in Streaming SVE mode (FEAT_SSVE_F8F16MM).
+    ARM_SME_SF8MM4_INSTRUCTIONS_AVAILABLE = 76,
+
+    /// This Arm processor implements the SVE FMMLA (widening, 8-way, FP8 to FP32) instruction when in Streaming SVE mode (FEAT_SSVE_F8F32MM).
+    ARM_SME_SF8MM8_INSTRUCTIONS_AVAILABLE = 77,
+
+    /// This Arm processor implements the SVE2 FP8DOT2 instructions when in Streaming SVE mode (FEAT_SSVE_FP8DOT2).
+    ARM_SME_SF8DP2_INSTRUCTIONS_AVAILABLE = 78,
+
+    /// This Arm processor implements the SVE2 FP8DOT4 instructions when in Streaming SVE mode (FEAT_SSVE_FP8DOT4).
+    ARM_SME_SF8DP4_INSTRUCTIONS_AVAILABLE = 79,
+
+    /// This Arm processor implements the SVE2 FP8FMA instructions when in Streaming SVE mode (FEAT_SSVE_FP8FMA).
+    ARM_SME_SF8FMA_INSTRUCTIONS_AVAILABLE = 80,
+
+    /// This Arm processor implements the SME F8F32 instructions (FEAT_SME_F8F32).
+    ARM_SME_F8F32_INSTRUCTIONS_AVAILABLE = 81,
+
+    /// This Arm processor implements the SME F8F16 instructions (FEAT_SME_F8F16).
+    ARM_SME_F8F16_INSTRUCTIONS_AVAILABLE = 82,
+
+    /// This Arm processor implements the SME F16F16 instructions (FEAT_SME_F16F16).
+    ARM_SME_F16F16_INSTRUCTIONS_AVAILABLE = 83,
+
+    /// This Arm processor implements the SME B16B16 instructions (FEAT_SME_B16B16).
+    ARM_SME_B16B16_INSTRUCTIONS_AVAILABLE = 84,
+
+    /// This Arm processor implements the SME F64F64 instructions (FEAT_SME_F64F64).
+    ARM_SME_F64F64_INSTRUCTIONS_AVAILABLE = 85,
+
+    /// This Arm processor implements the SME I16I64 instructions (FEAT_SME_I16I64).
+    ARM_SME_I16I64_INSTRUCTIONS_AVAILABLE = 86,
+
+    /// This Arm processor implements the SME LUTv2 instructions (FEAT_SME_LUTv2).
+    ARM_SME_LUTv2_INSTRUCTIONS_AVAILABLE = 87,
+
+    /// This Arm processor implements SME FA64 (Full AArch64 instruction set when in Streaming SVE mode) (FEAT_SME_FA64).
+    ARM_SME_FA64_INSTRUCTIONS_AVAILABLE = 88,
+
+    /// This x64 processor implements the UMONITOR instruction.
+    UMONITOR_INSTRUCTION_AVAILABLE = 89,
 };
 
 pub const MAX_WOW64_SHARED_ENTRIES = 16;
diff --git a/lib/std/simd.zig b/lib/std/simd.zig
index ea29098f35..d2ff4032a9 100644
--- a/lib/std/simd.zig
+++ b/lib/std/simd.zig
@@ -157,13 +157,6 @@ pub fn join(a: anytype, b: anytype) @Vector(vectorLength(@TypeOf(a)) + vectorLen
 /// Returns a vector whose elements alternates between those of each input vector.
 /// For example, `interlace(.{[4]u32{11, 12, 13, 14}, [4]u32{21, 22, 23, 24}})` returns a vector containing `.{11, 21, 12, 22, 13, 23, 14, 24}`.
 pub fn interlace(vecs: anytype) @Vector(vectorLength(@TypeOf(vecs[0])) * vecs.len, std.meta.Child(@TypeOf(vecs[0]))) {
-    // interlace doesn't work on MIPS, for some reason.
-    // Notes from earlier debug attempt:
-    //  The indices are correct. The problem seems to be with the @shuffle builtin.
-    //  On MIPS, the test that interlaces small_base gives { 0, 2, 0, 0, 64, 255, 248, 200, 0, 0 }.
-    //  Calling this with two inputs seems to work fine, but I'll let the compile error trigger for all inputs, just to be safe.
-    if (builtin.cpu.arch.isMIPS()) @compileError("TODO: Find out why interlace() doesn't work on MIPS");
-
     const VecType = @TypeOf(vecs[0]);
     const vecs_arr = @as([vecs.len]VecType, vecs);
     const Child = std.meta.Child(@TypeOf(vecs_arr[0]));
@@ -247,13 +240,11 @@ test "vector patterns" {
     try std.testing.expectEqual([8]u32{ 10, 20, 30, 40, 55, 66, 77, 88 }, join(base, other_base));
     try std.testing.expectEqual([2]u32{ 20, 30 }, extract(base, 1, 2));
 
-    if (!builtin.cpu.arch.isMIPS()) {
-        try std.testing.expectEqual([8]u32{ 10, 55, 20, 66, 30, 77, 40, 88 }, interlace(.{ base, other_base }));
+    try std.testing.expectEqual([8]u32{ 10, 55, 20, 66, 30, 77, 40, 88 }, interlace(.{ base, other_base }));
 
-        const small_braid = interlace(small_bases);
-        try std.testing.expectEqual([10]u8{ 0, 2, 4, 6, 8, 1, 3, 5, 7, 9 }, small_braid);
-        try std.testing.expectEqual(small_bases, deinterlace(small_bases.len, small_braid));
-    }
+    const small_braid = interlace(small_bases);
+    try std.testing.expectEqual([10]u8{ 0, 2, 4, 6, 8, 1, 3, 5, 7, 9 }, small_braid);
+    try std.testing.expectEqual(small_bases, deinterlace(small_bases.len, small_braid));
 }
 
 /// Joins two vectors, shifts them leftwards (towards lower indices) and extracts the leftmost elements into a vector the length of a and b.
@@ -384,9 +375,6 @@ pub fn prefixScanWithFunc(
     /// For example, this should be 0 for addition or 1 for multiplication.
     comptime identity: std.meta.Child(@TypeOf(vec)),
 ) if (ErrorType == void) @TypeOf(vec) else ErrorType!@TypeOf(vec) {
-    // I haven't debugged this, but it might be a cousin of sorts to what's going on with interlace.
-    if (builtin.cpu.arch.isMIPS()) @compileError("TODO: Find out why prefixScan doesn't work on MIPS");
-
     const len = vectorLength(@TypeOf(vec));
 
     if (hop == 0) @compileError("hop can not be 0; you'd be going nowhere forever!");
@@ -456,11 +444,6 @@ pub fn prefixScan(comptime op: std.builtin.ReduceOp, comptime hop: isize, vec: a
 }
 
 test "vector prefix scan" {
-    if (builtin.cpu.arch == .aarch64_be and builtin.zig_backend == .stage2_llvm) return error.SkipZigTest; // https://github.com/ziglang/zig/issues/21893
-    if (builtin.zig_backend == .stage2_llvm and builtin.cpu.arch == .hexagon) return error.SkipZigTest;
-
-    if (builtin.cpu.arch.isMIPS()) return error.SkipZigTest;
-
     const int_base = @Vector(4, i32){ 11, 23, 9, -21 };
     const float_base = @Vector(4, f32){ 2, 0.5, -10, 6.54321 };
     const bool_base = @Vector(4, bool){ true, false, true, false };
diff --git a/lib/std/start.zig b/lib/std/start.zig
index 01b33ba5f1..52de24257f 100644
--- a/lib/std/start.zig
+++ b/lib/std/start.zig
@@ -20,8 +20,9 @@ comptime {
     _ = root;
 
     if (builtin.output_mode == .Lib and builtin.link_mode == .dynamic) {
-        if (native_os == .windows and !@hasDecl(root, "_DllMainCRTStartup")) {
-            @export(&_DllMainCRTStartup, .{ .name = "_DllMainCRTStartup" });
+        const dll_main_crt_startup = if (builtin.abi.isGnu()) "DllMainCRTStartup" else "_DllMainCRTStartup";
+        if (native_os == .windows and !builtin.link_libc and !@hasDecl(root, dll_main_crt_startup)) {
+            @export(&DllMainCRTStartup, .{ .name = dll_main_crt_startup });
         }
     } else if (builtin.output_mode == .Exe or @hasDecl(root, "main")) {
         if (builtin.link_libc and @hasDecl(root, "main")) {
@@ -71,7 +72,7 @@ comptime {
     }
 }
 
-fn _DllMainCRTStartup(
+fn DllMainCRTStartup(
     hinstDLL: std.os.windows.HINSTANCE,
     fdwReason: std.os.windows.DWORD,
     lpReserved: std.os.windows.LPVOID,
diff --git a/lib/std/zig/llvm/Builder.zig b/lib/std/zig/llvm/Builder.zig
index d4316d45a0..93c2e492f0 100644
--- a/lib/std/zig/llvm/Builder.zig
+++ b/lib/std/zig/llvm/Builder.zig
@@ -13947,8 +13947,8 @@ pub fn toBitcode(self: *Builder, allocator: Allocator, producer: Producer) bitco
                         const bit_count = extra.type.scalarBits(self);
                         const val: i64 = if (bit_count <= 64)
                             bigint.toInt(i64) catch unreachable
-                        else if (bigint.toInt(u64)) |val|
-                            @bitCast(val)
+                        else if (bigint.toInt(u63)) |val|
+                            @bitCast(@as(u64, val))
                         else |_| {
                             const limbs = try record.addManyAsSlice(
                                 self.gpa,
diff --git a/lib/std/zig/system.zig b/lib/std/zig/system.zig
index 61f78093d6..de407b31be 100644
--- a/lib/std/zig/system.zig
+++ b/lib/std/zig/system.zig
@@ -460,16 +460,6 @@ pub fn resolveTargetQuery(io: Io, query: Target.Query) DetectError!Target {
         if (result.cpu.arch.isArm() and result.abi.float() == .soft) {
             result.cpu.features.removeFeature(@intFromEnum(Target.arm.Feature.vfp2));
         }
-
-        // https://github.com/llvm/llvm-project/issues/135283
-        if (result.cpu.arch.isMIPS() and result.abi.float() == .soft) {
-            result.cpu.features.addFeature(@intFromEnum(Target.mips.Feature.soft_float));
-        }
-
-        // https://github.com/llvm/llvm-project/issues/168992
-        if (result.cpu.arch == .s390x) {
-            result.cpu.features.removeFeature(@intFromEnum(Target.s390x.Feature.vector));
-        }
     }
 
     // It's possible that we detect the native ABI, but fail to detect the OS version or were told
diff --git a/lib/std/zig/system/arm.zig b/lib/std/zig/system/arm.zig
index 95f8f8aebc..461edb2e19 100644
--- a/lib/std/zig/system/arm.zig
+++ b/lib/std/zig/system/arm.zig
@@ -21,7 +21,7 @@ pub const cpu_models = struct {
     };
 
     // implementer = 0x41
-    const ARM = [_]E{
+    const Arm = [_]E{
         E{ .part = 0x926, .m32 = &A32.arm926ej_s },
         E{ .part = 0xb02, .m32 = &A32.mpcore },
         E{ .part = 0xb36, .m32 = &A32.arm1136j_s },
@@ -88,8 +88,12 @@ pub const cpu_models = struct {
         E{ .part = 0xd87, .m64 = &A64.cortex_a725 },
         E{ .part = 0xd88, .m64 = &A64.cortex_a520ae },
         E{ .part = 0xd89, .m64 = &A64.cortex_a720ae },
+        E{ .part = 0xd8a, .m64 = &A64.c1_nano },
+        E{ .part = 0xd8b, .m64 = &A64.c1_pro },
+        E{ .part = 0xd8c, .m64 = &A64.c1_ultra },
         E{ .part = 0xd8e, .m64 = &A64.neoverse_n3 },
         E{ .part = 0xd8f, .m64 = &A64.cortex_a320 },
+        E{ .part = 0xd90, .m64 = &A64.c1_premium },
     };
     // implementer = 0x42
     const Broadcom = [_]E{
@@ -102,10 +106,17 @@ pub const cpu_models = struct {
         E{ .part = 0x0a3, .m64 = &A64.thunderxt83 },
         E{ .part = 0x0a1, .m64 = &A64.thunderxt88 },
         E{ .part = 0x0af, .m64 = &A64.thunderx2t99 },
+        E{ .part = 0x0b0, .m64 = &A64.cortex_a57 },
+        E{ .part = 0x0b1, .m64 = &A64.cortex_a57 },
+        E{ .part = 0x0b2, .m64 = &A64.cortex_a57 },
+        E{ .part = 0x0b3, .m64 = &A64.cortex_a57 },
+        E{ .part = 0x0b4, .m64 = &A64.cortex_a57 },
+        E{ .part = 0x0b5, .m64 = &A64.cortex_a57 },
     };
     // implementer = 0x46
     const Fujitsu = [_]E{
         E{ .part = 0x001, .m64 = &A64.a64fx },
+        E{ .part = 0x003, .m64 = &A64.fujitsu_monaka },
     };
     // implementer = 0x48
     const HiSilicon = [_]E{
@@ -137,8 +148,14 @@ pub const cpu_models = struct {
         E{ .part = 0xc00, .m64 = &A64.falkor },
         E{ .part = 0xc01, .m64 = &A64.saphira },
     };
+    // implementer = 0x53
+    const Samsung = [_]E{
+        E{ .part = 0x000, .m64 = &A64.exynos_m1 },
+    };
     // implementer = 0x61
     const Apple = [_]E{
+        E{ .part = 0x020, .m64 = &A64.apple_m1 },
+        E{ .part = 0x021, .m64 = &A64.apple_m1 },
         E{ .part = 0x022, .m64 = &A64.apple_m1 },
         E{ .part = 0x023, .m64 = &A64.apple_m1 },
         E{ .part = 0x024, .m64 = &A64.apple_m1 },
@@ -151,11 +168,43 @@ pub const cpu_models = struct {
         E{ .part = 0x035, .m64 = &A64.apple_m2 },
         E{ .part = 0x038, .m64 = &A64.apple_m2 },
         E{ .part = 0x039, .m64 = &A64.apple_m2 },
+        E{ .part = 0x042, .m64 = &A64.apple_m3 },
+        E{ .part = 0x043, .m64 = &A64.apple_m3 },
+        E{ .part = 0x044, .m64 = &A64.apple_m3 },
+        E{ .part = 0x045, .m64 = &A64.apple_m3 },
+        E{ .part = 0x048, .m64 = &A64.apple_m3 },
+        E{ .part = 0x049, .m64 = &A64.apple_m3 },
+        E{ .part = 0x052, .m64 = &A64.apple_m4 },
+        E{ .part = 0x053, .m64 = &A64.apple_m4 },
+        E{ .part = 0x054, .m64 = &A64.apple_m4 },
+        E{ .part = 0x055, .m64 = &A64.apple_m4 },
+        E{ .part = 0x058, .m64 = &A64.apple_m4 },
+        E{ .part = 0x059, .m64 = &A64.apple_m4 },
+    };
+    // implementer = 0x63
+    const ArmChina = [_]E{
+        E{ .part = 0x132, .m32 = &A32.star_mc1 },
+        E{ .part = 0xd25, .m32 = &A32.star_mc3 },
+    };
+    // implementer = 0x68
+    const Hxt = [_]E{
+        E{ .part = 0x000, .m64 = &A64.cortex_a57 },
+    };
+    // implementer = 0x6d
+    const Microsoft = [_]E{
+        E{ .part = 0xd49, .m64 = &A64.neoverse_n2 },
+    };
+    // implementer = 0xC0
+    const AmpereOne = [_]E{
+        E{ .part = 0xac3, .m64 = &A64.ampere1 },
+        E{ .part = 0xac4, .m64 = &A64.ampere1a },
+        E{ .part = 0xac5, .m64 = &A64.ampere1b },
+        E{ .part = 0xac7, .m64 = &A64.ampere1c },
     };
 
     pub fn isKnown(core: CoreInfo, is_64bit: bool) ?*const Target.Cpu.Model {
         const models = switch (core.implementer) {
-            0x41 => &ARM,
+            0x41 => &Arm,
             0x42 => &Broadcom,
             0x43 => &Cavium,
             0x46 => &Fujitsu,
@@ -163,7 +212,12 @@ pub const cpu_models = struct {
             0x4e => &Nvidia,
             0x50 => &Ampere,
             0x51 => &Qualcomm,
+            0x53 => &Samsung,
             0x61 => &Apple,
+            0x63 => &ArmChina,
+            0x68 => &Hxt,
+            0x6d => &Microsoft,
+            0xC0 => &AmpereOne,
             else => return null,
         };
 
diff --git a/lib/std/zig/system/windows.zig b/lib/std/zig/system/windows.zig
index b34309c09b..70cfaed4d3 100644
--- a/lib/std/zig/system/windows.zig
+++ b/lib/std/zig/system/windows.zig
@@ -215,10 +215,43 @@ fn genericCpuAndNativeFeatures(arch: Target.Cpu.Arch) Target.Cpu {
             // Override any features that are either present or absent
             setFeature(Feature, &cpu, .neon, IsProcessorFeaturePresent(PF.ARM_NEON_INSTRUCTIONS_AVAILABLE));
             setFeature(Feature, &cpu, .crc, IsProcessorFeaturePresent(PF.ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE));
-            setFeature(Feature, &cpu, .crypto, IsProcessorFeaturePresent(PF.ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE));
+            setFeature(Feature, &cpu, .aes, IsProcessorFeaturePresent(PF.ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE));
+            setFeature(Feature, &cpu, .sha2, IsProcessorFeaturePresent(PF.ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE));
             setFeature(Feature, &cpu, .lse, IsProcessorFeaturePresent(PF.ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE));
             setFeature(Feature, &cpu, .dotprod, IsProcessorFeaturePresent(PF.ARM_V82_DP_INSTRUCTIONS_AVAILABLE));
             setFeature(Feature, &cpu, .jsconv, IsProcessorFeaturePresent(PF.ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE));
+            setFeature(Feature, &cpu, .rcpc, IsProcessorFeaturePresent(PF.ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE));
+            setFeature(Feature, &cpu, .sve, IsProcessorFeaturePresent(PF.ARM_SVE_INSTRUCTIONS_AVAILABLE));
+            setFeature(Feature, &cpu, .sve2, IsProcessorFeaturePresent(PF.ARM_SVE2_INSTRUCTIONS_AVAILABLE));
+            setFeature(Feature, &cpu, .sve2p1, IsProcessorFeaturePresent(PF.ARM_SVE2_1_INSTRUCTIONS_AVAILABLE));
+            setFeature(Feature, &cpu, .sve_aes, IsProcessorFeaturePresent(PF.ARM_SVE_AES_INSTRUCTIONS_AVAILABLE) or IsProcessorFeaturePresent(PF.ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE));
+            setFeature(Feature, &cpu, .sve_bitperm, IsProcessorFeaturePresent(PF.ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE));
+            setFeature(Feature, &cpu, .bf16, IsProcessorFeaturePresent(PF.ARM_V86_BF16_INSTRUCTIONS_AVAILABLE));
+            setFeature(Feature, &cpu, .sve_b16b16, IsProcessorFeaturePresent(PF.ARM_SVE_B16B16_INSTRUCTIONS_AVAILABLE));
+            setFeature(Feature, &cpu, .sve_sha3, IsProcessorFeaturePresent(PF.ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE));
+            setFeature(Feature, &cpu, .sve_sm4, IsProcessorFeaturePresent(PF.ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE));
+            setFeature(Feature, &cpu, .i8mm, IsProcessorFeaturePresent(PF.ARM_V82_I8MM_INSTRUCTIONS_AVAILABLE));
+            setFeature(Feature, &cpu, .f32mm, IsProcessorFeaturePresent(PF.ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE));
+            setFeature(Feature, &cpu, .f64mm, IsProcessorFeaturePresent(PF.ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE));
+            setFeature(Feature, &cpu, .sme, IsProcessorFeaturePresent(PF.ARM_SME_INSTRUCTIONS_AVAILABLE));
+            setFeature(Feature, &cpu, .sme2, IsProcessorFeaturePresent(PF.ARM_SME2_INSTRUCTIONS_AVAILABLE));
+            setFeature(Feature, &cpu, .lse2, IsProcessorFeaturePresent(PF.ARM_LSE2_AVAILABLE));
+            setFeature(Feature, &cpu, .sha3, IsProcessorFeaturePresent(PF.ARM_SHA3_INSTRUCTIONS_AVAILABLE) and IsProcessorFeaturePresent(PF.ARM_SHA512_INSTRUCTIONS_AVAILABLE));
+            setFeature(Feature, &cpu, .fullfp16, IsProcessorFeaturePresent(PF.ARM_V82_FP16_INSTRUCTIONS_AVAILABLE));
+            setFeature(Feature, &cpu, .sme2p1, IsProcessorFeaturePresent(PF.ARM_SME2_1_INSTRUCTIONS_AVAILABLE));
+            setFeature(Feature, &cpu, .sme2p2, IsProcessorFeaturePresent(PF.ARM_SME2_2_INSTRUCTIONS_AVAILABLE));
+            setFeature(Feature, &cpu, .ssve_aes, IsProcessorFeaturePresent(PF.ARM_SME_AES_INSTRUCTIONS_AVAILABLE));
+            setFeature(Feature, &cpu, .ssve_bitperm, IsProcessorFeaturePresent(PF.ARM_SME_SBITPERM_INSTRUCTIONS_AVAILABLE));
+            setFeature(Feature, &cpu, .ssve_fp8dot2, IsProcessorFeaturePresent(PF.ARM_SME_SF8DP2_INSTRUCTIONS_AVAILABLE));
+            setFeature(Feature, &cpu, .ssve_fp8dot4, IsProcessorFeaturePresent(PF.ARM_SME_SF8DP4_INSTRUCTIONS_AVAILABLE));
+            setFeature(Feature, &cpu, .ssve_fp8fma, IsProcessorFeaturePresent(PF.ARM_SME_SF8FMA_INSTRUCTIONS_AVAILABLE));
+            setFeature(Feature, &cpu, .sme_f8f32, IsProcessorFeaturePresent(PF.ARM_SME_F8F32_INSTRUCTIONS_AVAILABLE));
+            setFeature(Feature, &cpu, .sme_f8f16, IsProcessorFeaturePresent(PF.ARM_SME_F8F16_INSTRUCTIONS_AVAILABLE));
+            setFeature(Feature, &cpu, .sme_b16b16, IsProcessorFeaturePresent(PF.ARM_SME_B16B16_INSTRUCTIONS_AVAILABLE));
+            setFeature(Feature, &cpu, .sme_f64f64, IsProcessorFeaturePresent(PF.ARM_SME_F64F64_INSTRUCTIONS_AVAILABLE));
+            setFeature(Feature, &cpu, .sme_i16i64, IsProcessorFeaturePresent(PF.ARM_SME_I16I64_INSTRUCTIONS_AVAILABLE));
+            setFeature(Feature, &cpu, .sme_lutv2, IsProcessorFeaturePresent(PF.ARM_SME_LUTv2_INSTRUCTIONS_AVAILABLE));
+            setFeature(Feature, &cpu, .sme_fa64, IsProcessorFeaturePresent(PF.ARM_SME_FA64_INSTRUCTIONS_AVAILABLE));
         },
         else => {},
     }
diff --git a/lib/std/zig/system/x86.zig b/lib/std/zig/system/x86.zig
index 60e72589c8..eb09923ade 100644
--- a/lib/std/zig/system/x86.zig
+++ b/lib/std/zig/system/x86.zig
@@ -232,7 +232,7 @@ fn detectIntelProcessor(cpu: *Target.Cpu, family: u32, model: u32, brand_id: u32
                     cpu.model = &Target.x86.cpu.lunarlake;
                     return;
                 },
-                0xcc => {
+                0xcc, 0xd5 => {
                     cpu.model = &Target.x86.cpu.pantherlake;
                     return;
                 },
@@ -307,6 +307,20 @@ fn detectIntelProcessor(cpu: *Target.Cpu, family: u32, model: u32, brand_id: u32
             cpu.model = &Target.x86.cpu.pentium4;
             return;
         },
+        18 => switch (model) {
+            0x01, 0x03 => {
+                cpu.model = &Target.x86.cpu.novalake;
+                return;
+            },
+            else => return, // Unknown CPU Model
+        },
+        19 => switch (model) {
+            0x01 => {
+                cpu.model = &Target.x86.cpu.diamondrapids;
+                return;
+            },
+            else => return, // Unknown CPU Model
+        },
         else => return, // Unknown CPU Model
     }
 }
@@ -412,6 +426,8 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void {
     // AMX requires additional context to be saved by the OS.
     const has_amx_save = xcr0.xtilecfg and xcr0.xtiledata;
 
+    const has_apx_save = xcr0.apx;
+
     setFeature(cpu, .avx, has_avx_save);
     setFeature(cpu, .fma, bit(leaf.ecx, 12) and has_avx_save);
     // Only enable XSAVE if OS has enabled support for saving YMM state.
@@ -470,7 +486,20 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void {
         }
     }
 
-    if (max_level >= 0x7) {
+    if (max_ext_level >= 0x80000021) {
+        leaf = cpuid(0x80000021, 0);
+
+        // AMD uses a different bit for prefetchi.
+        setFeature(cpu, .prefetchi, bit(leaf.eax, 20));
+    } else {
+        for ([_]Target.x86.Feature{
+            .prefetchi,
+        }) |feat| {
+            setFeature(cpu, feat, false);
+        }
+    }
+
+    const has_avx10 = if (max_level >= 0x7) has_avx10: {
         leaf = cpuid(0x7, 0);
 
         setFeature(cpu, .fsgsbase, bit(leaf.ebx, 0));
@@ -484,7 +513,6 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void {
         setFeature(cpu, .rtm, bit(leaf.ebx, 11));
         // AVX512 is only supported if the OS supports the context save for it.
         setFeature(cpu, .avx512f, bit(leaf.ebx, 16) and has_avx512_save);
-        setFeature(cpu, .evex512, bit(leaf.ebx, 16) and has_avx512_save);
         setFeature(cpu, .avx512dq, bit(leaf.ebx, 17) and has_avx512_save);
         setFeature(cpu, .rdseed, bit(leaf.ebx, 18));
         setFeature(cpu, .adx, bit(leaf.ebx, 19));
@@ -556,15 +584,20 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void {
             setFeature(cpu, .avxneconvert, bit(leaf.edx, 5) and has_avx_save);
             setFeature(cpu, .amx_complex, bit(leaf.edx, 8) and has_amx_save);
             setFeature(cpu, .avxvnniint16, bit(leaf.edx, 10) and has_avx_save);
-            setFeature(cpu, .prefetchi, bit(leaf.edx, 14));
+            // This needs to account for prefetchi already being detected above on AMD.
+            setFeature(cpu, .prefetchi, cpu.has(.x86, .prefetchi) or bit(leaf.edx, 14));
             setFeature(cpu, .usermsr, bit(leaf.edx, 15));
             // APX
-            setFeature(cpu, .egpr, bit(leaf.edx, 21));
-            setFeature(cpu, .push2pop2, bit(leaf.edx, 21));
-            setFeature(cpu, .ppx, bit(leaf.edx, 21));
-            setFeature(cpu, .ndd, bit(leaf.edx, 21));
-            setFeature(cpu, .ccmp, bit(leaf.edx, 21));
-            setFeature(cpu, .cf, bit(leaf.edx, 21));
+            setFeature(cpu, .egpr, bit(leaf.edx, 21) and has_apx_save);
+            setFeature(cpu, .push2pop2, bit(leaf.edx, 21) and has_apx_save);
+            setFeature(cpu, .ppx, bit(leaf.edx, 21) and has_apx_save);
+            setFeature(cpu, .ndd, bit(leaf.edx, 21) and has_apx_save);
+            setFeature(cpu, .ccmp, bit(leaf.edx, 21) and has_apx_save);
+            setFeature(cpu, .nf, bit(leaf.edx, 21) and has_apx_save);
+            setFeature(cpu, .cf, bit(leaf.edx, 21) and has_apx_save);
+            setFeature(cpu, .zu, bit(leaf.edx, 21) and has_apx_save);
+
+            break :has_avx10 bit(leaf.edx, 19);
         } else {
             for ([_]Target.x86.Feature{
                 .sha512,
@@ -582,19 +615,23 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void {
                 .avxneconvert,
                 .amx_complex,
                 .avxvnniint16,
-                .prefetchi,
+                // prefetchi already handled earlier.
                 .usermsr,
                 .egpr,
                 .push2pop2,
                 .ppx,
                 .ndd,
                 .ccmp,
+                .nf,
                 .cf,
+                .zu,
             }) |feat| {
                 setFeature(cpu, feat, false);
             }
         }
-    } else {
+
+        break :has_avx10 false;
+    } else has_avx10: {
         for ([_]Target.x86.Feature{
             .fsgsbase,
             .sgx,
@@ -605,7 +642,6 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void {
             .invpcid,
             .rtm,
             .avx512f,
-            .evex512,
             .avx512dq,
             .rdseed,
             .adx,
@@ -664,18 +700,22 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void {
             .avxneconvert,
             .amx_complex,
             .avxvnniint16,
-            .prefetchi,
+            // prefetchi already handled earlier.
             .usermsr,
             .egpr,
             .push2pop2,
             .ppx,
             .ndd,
             .ccmp,
+            .nf,
             .cf,
+            .zu,
         }) |feat| {
             setFeature(cpu, feat, false);
         }
-    }
+
+        break :has_avx10 false;
+    };
 
     if (max_level >= 0xD and has_avx_save) {
         leaf = cpuid(0xD, 0x1);
@@ -721,10 +761,14 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void {
     if (max_level >= 0x24) {
         leaf = cpuid(0x24, 0);
 
-        setFeature(cpu, .avx10_1, bit(leaf.ebx, 18));
+        const avx_ver = leaf.ebx & 0xff;
+
+        setFeature(cpu, .avx10_1, has_avx10 and avx_ver >= 1);
+        setFeature(cpu, .avx10_2, has_avx10 and avx_ver >= 2);
     } else {
         for ([_]Target.x86.Feature{
             .avx10_1,
+            .avx10_2,
         }) |feat| {
             setFeature(cpu, feat, false);
         }
diff --git a/src/Compilation.zig b/src/Compilation.zig
index 2d7e351f1e..6a58bcc9db 100644
--- a/src/Compilation.zig
+++ b/src/Compilation.zig
@@ -3013,6 +3013,13 @@ pub fn update(comp: *Compilation, main_progress_node: std.Progress.Node) UpdateE
         try comp.appendFileSystemInput(try .fromUnresolved(arena, comp.dirs, &.{c_object.src.src_path}));
     }
 
+    for (comp.link_inputs) |input| if (input.path()) |path| {
+        try comp.appendFileSystemInput(try .fromUnresolved(arena, comp.dirs, &.{
+            path.root_dir.path orelse ".",
+            path.sub_path,
+        }));
+    };
+
     // For compiling Win32 resources, we rely on the cache hash system to avoid duplicating work.
     // Add a Job for each Win32 resource file.
     try comp.win32_resource_work_queue.ensureUnusedCapacity(gpa, comp.win32_resource_table.count());
@@ -4763,12 +4770,12 @@ fn writeDepFile(
 
     try w.print("{f}:", .{bin_file});
 
-    {
+    if (fsi.len > 0) {
         var it = std.mem.splitScalar(u8, fsi, 0);
         while (it.next()) |input| try w.print(" \\\n {f}{s}", .{ prefixes[input[0] - 1], input[1..] });
     }
 
-    {
+    if (fsi.len > 0) {
         var it = std.mem.splitScalar(u8, fsi, 0);
         while (it.next()) |input| try w.print("\n\n{f}{s}:", .{ prefixes[input[0] - 1], input[1..] });
     }
@@ -6355,6 +6362,13 @@ fn addCommonCCArgs(
                     try argv.append(
                         try std.fmt.allocPrint(arena, "-D_WIN32_WINNT=0x{x:0>4}", .{minver}),
                     );
+
+                    // MinGW-w64's inline functions in headers (e.g. `fabs`), which are emitted with `linkonce_odr`
+                    // linkage, sometimes cause duplicate symbol errors due to us providing the same symbols with
+                    // `weak` linkage in compiler-rt or libzigc. So just disable them. Besides, they undermine the
+                    // goal of moving more libc code to Zig, and they're also just kind of unnecessary since LLVM is
+                    // perfectly capable of recognizing and optimizing libcalls.
+                    try argv.append("-D__CRT__NO_INLINE");
                 } else if (target.isFreeBSDLibC()) {
                     // https://docs.freebsd.org/en/books/porters-handbook/versions
                     const min_ver = target.os.version_range.semver.min;
@@ -6794,6 +6808,8 @@ pub fn addCCArgs(
                     // We communicate these to Clang through the dedicated options.
                     if (std.mem.startsWith(u8, llvm_name, "soft-float") or
                         std.mem.startsWith(u8, llvm_name, "hard-float") or
+                        (target.cpu.arch.isPowerPC() and std.mem.startsWith(u8, llvm_name, "64bit")) or
+                        (target.cpu.arch.isX86() and std.mem.startsWith(u8, llvm_name, "x32")) or
                         (target.cpu.arch == .s390x and std.mem.eql(u8, llvm_name, "backchain")))
                         continue;
 
diff --git a/src/Package/Module.zig b/src/Package/Module.zig
index d6ec7f8aaf..aee98bbe3d 100644
--- a/src/Package/Module.zig
+++ b/src/Package/Module.zig
@@ -347,6 +347,9 @@ pub fn create(arena: Allocator, options: CreateOptions) !*Package.Module {
                 // See https://github.com/ziglang/zig/issues/23539
                 if (target_util.isDynamicAMDGCNFeature(target, feature)) continue;
 
+                if (target.cpu.arch.isPowerPC() and @as(std.Target.powerpc.Feature, @enumFromInt(feature.index)) == .@"64bit") continue;
+                if (target.cpu.arch.isX86() and @as(std.Target.x86.Feature, @enumFromInt(feature.index)) == .x32) continue;
+
                 var is_enabled = target.cpu.features.isEnabled(feature.index);
                 if (target.cpu.arch == .s390x and @as(std.Target.s390x.Feature, @enumFromInt(feature.index)) == .backchain) {
                     is_enabled = !omit_frame_pointer;
diff --git a/src/clang_options.zon b/src/clang_options.zon
index 2984aadbb3..15b49f8210 100644
--- a/src/clang_options.zon
+++ b/src/clang_options.zon
@@ -273,6 +273,9 @@
 .{ .name = "utf-8", .psl = true },
 .{ .name = "validate-charset", .psl = true },
 .{ .name = "validate-charset-", .psl = true },
+.{ .name = "vlen", .psl = true },
+.{ .name = "vlen=256", .psl = true },
+.{ .name = "vlen=512", .psl = true },
 .{ .name = "vmb", .psl = true },
 .{ .name = "vmg", .psl = true },
 .{ .name = "vmm", .psl = true },
@@ -692,9 +695,9 @@
 .{ .name = "fnew-alignment", .syntax = .separate },
 .{ .name = "faligned-new" },
 .{ .name = "fno-aligned-new" },
-.{ .name = "fsched-interblock" },
 .{ .name = "fcuda-rdc" },
 .{ .name = "fno-cuda-rdc" },
+.{ .name = "fsched-interblock" },
 .{ .name = "ftree-vectorize" },
 .{ .name = "fno-tree-vectorize" },
 .{ .name = "ftree-slp-vectorize" },
@@ -723,6 +726,8 @@
 .{ .name = "mtune=help", .ze = .mcpu },
 .{ .name = "integrated-as" },
 .{ .name = "no-integrated-as" },
+.{ .name = "shared-libasan" },
+.{ .name = "static-libasan" },
 .{ .name = "fopenmp-is-device" },
 .{ .name = "fcuda-approx-transcendentals" },
 .{ .name = "fno-cuda-approx-transcendentals" },
@@ -760,14 +765,12 @@
 .{ .name = "Oy-", .psl = true },
 .{ .name = "Qgather-", .psl = true },
 .{ .name = "Qscatter-", .psl = true },
-.{ .name = "shared-libasan" },
 .{ .name = "Xmicrosoft-visualc-tools-root", .syntax = .separate },
 .{ .name = "Xmicrosoft-visualc-tools-version", .syntax = .separate },
 .{ .name = "Xmicrosoft-windows-sdk-root", .syntax = .separate },
 .{ .name = "Xmicrosoft-windows-sdk-version", .syntax = .separate },
 .{ .name = "Xmicrosoft-windows-sys-root", .syntax = .separate },
 .{ .name = "Qembed_debug", .psl = true },
-.{ .name = "static-libasan" },
 .{ .name = "fslp-vectorize-aggressive" },
 .{ .name = "fident" },
 .{ .name = "fno-ident" },
@@ -787,13 +790,13 @@
 .{ .name = "fno-sanitize-blacklist" },
 .{ .name = "fhonor-infinites" },
 .{ .name = "fno-honor-infinites" },
-.{ .name = "findirect-virtual-calls" },
 .{
     .name = "config",
     .syntax = .separate,
     .pd1 = false,
     .pd2 = true,
 },
+.{ .name = "findirect-virtual-calls" },
 .{ .name = "ansi", .pd2 = true },
 .{ .name = "arch", .syntax = .separate },
 .{ .name = "arch_errors_fatal" },
@@ -900,6 +903,7 @@
 .{ .name = "dead_strip" },
 .{ .name = "debug-forward-template-params" },
 .{ .name = "debug-info-macro" },
+.{ .name = "debug-info-macro-expansion-loc" },
 .{ .name = "default-function-attr", .syntax = .separate },
 .{
     .name = "defsym",
@@ -934,14 +938,19 @@
 .{ .name = "dwarf-debug-producer", .syntax = .separate },
 .{ .name = "dwarf-explicit-import" },
 .{ .name = "dwarf-ext-refs" },
+.{ .name = "all-resources-bound", .psl = true },
+.{ .name = "Zpc", .psl = true },
 .{ .name = "Vd", .psl = true },
 .{ .name = "Gis", .psl = true },
 .{ .name = "hlsl-no-stdinc", .psl = true },
+.{ .name = "rootsig-define", .syntax = .separate },
 .{
     .name = "force-rootsig-ver",
     .syntax = .separate,
     .psl = true,
 },
+.{ .name = "Zpr", .psl = true },
+.{ .name = "Qstrip-rootsignature", .psl = true },
 .{ .name = "dylib_file", .syntax = .separate },
 .{ .name = "dylinker" },
 .{ .name = "dynamic", .ze = .dynamic },
@@ -1089,6 +1098,7 @@
 .{ .name = "fcheck-array-temporaries" },
 .{ .name = "fcheck-new" },
 .{ .name = "fclangir" },
+.{ .name = "fcoarray" },
 .{ .name = "fcodegen-data-generate" },
 .{ .name = "fcodegen-data-use" },
 .{ .name = "fcolor-diagnostics", .ze = .color_diagnostics },
@@ -1148,8 +1158,11 @@
 .{ .name = "fdeclspec" },
 .{ .name = "fdefault-double-8" },
 .{ .name = "fdefault-inline" },
+.{ .name = "fdefault-integer-4" },
 .{ .name = "fdefault-integer-8" },
+.{ .name = "fdefault-real-4" },
 .{ .name = "fdefault-real-8" },
+.{ .name = "fdefer-ts" },
 .{ .name = "fdefine-target-os-macros" },
 .{ .name = "fdelayed-template-parsing" },
 .{ .name = "fdelete-null-pointer-checks" },
@@ -1205,9 +1218,11 @@
 .{ .name = "ferror-limit", .syntax = .separate },
 .{ .name = "fescaping-block-tail-calls" },
 .{ .name = "fexceptions" },
+.{ .name = "fexperimental-call-graph-section" },
 .{ .name = "fexperimental-isel" },
 .{ .name = "fexperimental-late-parse-attributes" },
 .{ .name = "fexperimental-library" },
+.{ .name = "fexperimental-loop-fusion" },
 .{ .name = "fexperimental-new-constant-interpreter" },
 .{ .name = "fexperimental-omit-vtable-rtti" },
 .{ .name = "fexperimental-relative-c++-abi-vtables" },
@@ -1221,6 +1236,7 @@
 .{ .name = "ff2c" },
 .{ .name = "ffake-address-space-map" },
 .{ .name = "ffast-math" },
+.{ .name = "ffast-real-mod" },
 .{ .name = "ffat-lto-objects" },
 .{ .name = "ffile-reproducible" },
 .{ .name = "fimplicit-modules-use-lock" },
@@ -1345,6 +1361,8 @@
 .{ .name = "fhip-fp32-correctly-rounded-divide-sqrt" },
 .{ .name = "fhip-kernel-arg-name" },
 .{ .name = "fhip-new-launch-api" },
+.{ .name = "fspv-enable-maximal-reconvergence" },
+.{ .name = "fspv-use-unknown-image-format" },
 .{ .name = "fhlsl-strict-availability" },
 .{ .name = "fhonor-infinities" },
 .{ .name = "fhonor-nans" },
@@ -1393,6 +1411,7 @@
 .{ .name = "flat_namespace" },
 .{ .name = "flax-vector-conversions" },
 .{ .name = "fexperimental-lifetime-safety" },
+.{ .name = "fexperimental-lifetime-safety-inference" },
 .{ .name = "flimit-debug-info" },
 .{ .name = "flogical-abbreviations" },
 .{ .name = "floop-interchange" },
@@ -1428,7 +1447,9 @@
 .{ .name = "fmodules-force-validate-user-headers" },
 .{ .name = "fmodules-hash-content" },
 .{ .name = "fmodules-local-submodule-visibility" },
+.{ .name = "fmodules-reduced-bmi" },
 .{ .name = "fmodules-search-all" },
+.{ .name = "fmodules-single-module-parse-mode" },
 .{ .name = "fmodules-skip-diagnostic-options" },
 .{ .name = "fmodules-skip-header-search-paths" },
 .{ .name = "fmodules-strict-context-hash" },
@@ -1448,8 +1469,10 @@
 .{ .name = "fms-volatile" },
 .{ .name = "fmudflap" },
 .{ .name = "fmudflapth" },
+.{ .name = "fnamed-loops" },
 .{ .name = "fnative-half-arguments-and-returns" },
 .{ .name = "fnative-half-type" },
+.{ .name = "fnative-int16-type" },
 .{ .name = "fnested-functions" },
 .{ .name = "fnew-infallible" },
 .{ .name = "fnext-runtime" },
@@ -1539,6 +1562,7 @@
 .{ .name = "fno-debug-types-section" },
 .{ .name = "fno-declspec" },
 .{ .name = "fno-default-inline" },
+.{ .name = "fno-defer-ts" },
 .{ .name = "fno-define-target-os-macros" },
 .{ .name = "fno-delayed-template-parsing" },
 .{ .name = "fno-delete-null-pointer-checks" },
@@ -1573,14 +1597,17 @@
 .{ .name = "fno-emulated-tls" },
 .{ .name = "fno-escaping-block-tail-calls" },
 .{ .name = "fno-exceptions" },
+.{ .name = "fno-experimental-call-graph-section" },
 .{ .name = "fno-experimental-isel" },
 .{ .name = "fno-experimental-late-parse-attributes" },
 .{ .name = "fno-experimental-library" },
+.{ .name = "fno-experimental-loop-fusion" },
 .{ .name = "fno-experimental-omit-vtable-rtti" },
 .{ .name = "fno-experimental-relative-c++-abi-vtables" },
 .{ .name = "fno-external-blas" },
 .{ .name = "fno-f2c" },
 .{ .name = "fno-fast-math" },
+.{ .name = "fno-fast-real-mod" },
 .{ .name = "fno-fat-lto-objects" },
 .{ .name = "fno-file-reproducible" },
 .{ .name = "fno-implicit-modules-use-lock" },
@@ -1646,6 +1673,7 @@
 .{ .name = "fno-knr-functions" },
 .{ .name = "fno-lax-vector-conversions" },
 .{ .name = "fno-experimental-lifetime-safety" },
+.{ .name = "fno-experimental-lifetime-safety-inference" },
 .{ .name = "fno-limit-debug-info" },
 .{ .name = "fno-logical-abbreviations" },
 .{ .name = "fno-loop-interchange" },
@@ -1671,6 +1699,7 @@
 .{ .name = "fno-modules-force-validate-user-headers" },
 .{ .name = "fno-modules-global-index" },
 .{ .name = "fno-modules-prune-non-affecting-module-map-files" },
+.{ .name = "fno-modules-reduced-bmi" },
 .{ .name = "fno-modules-search-all" },
 .{ .name = "fno-modules-share-filemanager" },
 .{ .name = "fno-modules-skip-diagnostic-options" },
@@ -1685,12 +1714,14 @@
 .{ .name = "fno-ms-extensions" },
 .{ .name = "fno-ms-tls-guards" },
 .{ .name = "fno-ms-volatile" },
+.{ .name = "fno-named-loops" },
 .{ .name = "fno-new-infallible" },
 .{ .name = "fno-non-call-exceptions" },
 .{ .name = "fno-objc-arc" },
 .{ .name = "fno-objc-arc-exceptions" },
 .{ .name = "fno-objc-avoid-heapify-local-blocks" },
 .{ .name = "fno-objc-convert-messages-to-runtime-calls" },
+.{ .name = "fno-objc-direct-precondition-thunk" },
 .{ .name = "fno-objc-encode-cxx-class-template-spec" },
 .{ .name = "fno-objc-exceptions" },
 .{ .name = "fno-objc-infer-related-result-type" },
@@ -1718,6 +1749,7 @@
 .{ .name = "fno-pack-derived" },
 .{ .name = "fno-pack-struct" },
 .{ .name = "fno-padding-on-unsigned-fixed-point" },
+.{ .name = "fno-partition-static-data-sections" },
 .{ .name = "fno-pascal-strings" },
 .{ .name = "fno-pch-codegen" },
 .{ .name = "fno-pch-debuginfo" },
@@ -1802,9 +1834,13 @@
 .{ .name = "fno-sanitize-address-poison-custom-array-cookie" },
 .{ .name = "fno-sanitize-address-use-after-scope" },
 .{ .name = "fno-sanitize-address-use-odr-indicator" },
+.{ .name = "fno-sanitize-alloc-token-extended" },
+.{ .name = "fno-sanitize-alloc-token-fast-abi" },
 .{ .name = "fno-sanitize-annotate-debug-info" },
 .{ .name = "fno-sanitize-cfi-canonical-jump-tables" },
 .{ .name = "fno-sanitize-cfi-cross-dso" },
+.{ .name = "fno-sanitize-debug-trap-reasons" },
+.{ .name = "fno-sanitize-handler-preserve-all-regs" },
 .{ .name = "fno-sanitize-hwaddress-experimental-aliasing" },
 .{ .name = "fno-sanitize-ignorelist" },
 .{ .name = "fno-sanitize-link-c++-runtime" },
@@ -1821,6 +1857,7 @@
 .{ .name = "fno-sanitize-thread-func-entry-exit" },
 .{ .name = "fno-sanitize-thread-memory-access" },
 .{ .name = "fno-sanitize-trap", .ze = .no_sanitize_trap },
+.{ .name = "fno-sanitize-type-outline-instrumentation" },
 .{ .name = "fno-sanitize-undefined-trap-on-error" },
 .{ .name = "fno-save-main-program" },
 .{ .name = "fno-save-optimization-record" },
@@ -1887,6 +1924,7 @@
 .{ .name = "fno-unique-source-file-names" },
 .{ .name = "fno-unroll-all-loops" },
 .{ .name = "fno-unroll-loops" },
+.{ .name = "fno-unsafe-cray-pointers" },
 .{ .name = "fno-unsafe-loop-optimizations" },
 .{ .name = "fno-unsafe-math-optimizations" },
 .{ .name = "fno-unsigned" },
@@ -1938,6 +1976,7 @@
 .{ .name = "fobjc-avoid-heapify-local-blocks" },
 .{ .name = "fobjc-call-cxx-cdtors" },
 .{ .name = "fobjc-convert-messages-to-runtime-calls" },
+.{ .name = "fobjc-direct-precondition-thunk" },
 .{ .name = "fobjc-disable-direct-methods-for-testing" },
 .{ .name = "fobjc-encode-cxx-class-template-spec" },
 .{ .name = "fobjc-exceptions" },
@@ -1987,6 +2026,7 @@
 .{ .name = "fpack-struct" },
 .{ .name = "fpadding-on-unsigned-fixed-point" },
 .{ .name = "fparse-all-comments" },
+.{ .name = "fpartition-static-data-sections" },
 .{ .name = "fpascal-strings" },
 .{ .name = "fpass-by-value-is-noalias" },
 .{ .name = "fpcc-struct-return" },
@@ -2081,6 +2121,8 @@
 .{ .name = "fsanitize-address-poison-custom-array-cookie" },
 .{ .name = "fsanitize-address-use-after-scope" },
 .{ .name = "fsanitize-address-use-odr-indicator" },
+.{ .name = "fsanitize-alloc-token-extended" },
+.{ .name = "fsanitize-alloc-token-fast-abi" },
 .{ .name = "fsanitize-annotate-debug-info" },
 .{ .name = "fsanitize-cfi-canonical-jump-tables" },
 .{ .name = "fsanitize-cfi-cross-dso" },
@@ -2102,6 +2144,8 @@
 .{ .name = "fsanitize-coverage-trace-pc" },
 .{ .name = "fsanitize-coverage-trace-pc-guard", .ze = .san_cov_trace_pc_guard },
 .{ .name = "fsanitize-coverage-trace-stores" },
+.{ .name = "fsanitize-debug-trap-reasons" },
+.{ .name = "fsanitize-handler-preserve-all-regs" },
 .{ .name = "fsanitize-hwaddress-experimental-aliasing" },
 .{ .name = "fsanitize-kcfi-arity" },
 .{ .name = "fsanitize-link-c++-runtime" },
@@ -2118,6 +2162,7 @@
 .{ .name = "fsanitize-thread-func-entry-exit" },
 .{ .name = "fsanitize-thread-memory-access" },
 .{ .name = "fsanitize-trap", .ze = .sanitize_trap },
+.{ .name = "fsanitize-type-outline-instrumentation" },
 .{ .name = "fsanitize-undefined-trap-on-error" },
 .{ .name = "fsave-main-program" },
 .{ .name = "fsave-optimization-record" },
@@ -2205,6 +2250,7 @@
 .{ .name = "funknown-anytype" },
 .{ .name = "funroll-all-loops" },
 .{ .name = "funroll-loops" },
+.{ .name = "funsafe-cray-pointers" },
 .{ .name = "funsafe-loop-optimizations" },
 .{ .name = "funsafe-math-optimizations" },
 .{ .name = "funsigned" },
@@ -2259,6 +2305,7 @@
 .{ .name = "g2" },
 .{ .name = "g3" },
 .{ .name = "g", .ze = .debug },
+.{ .name = "gcall-site-info" },
 .{ .name = "gcodeview" },
 .{ .name = "gcodeview-command-line" },
 .{ .name = "gcodeview-ghash" },
@@ -2271,6 +2318,7 @@
 .{ .name = "gdwarf-3", .ze = .debug },
 .{ .name = "gdwarf-4", .ze = .debug },
 .{ .name = "gdwarf-5", .ze = .debug },
+.{ .name = "gdwarf-6" },
 .{ .name = "gdwarf-aranges" },
 .{ .name = "gembed-source" },
 .{ .name = "gen-cdb-fragment-path", .syntax = .separate },
@@ -2289,6 +2337,7 @@
 .{ .name = "glldb" },
 .{ .name = "gmlt" },
 .{ .name = "gmodules" },
+.{ .name = "gno-call-site-info" },
 .{ .name = "gno-codeview-command-line" },
 .{ .name = "gno-codeview-ghash" },
 .{ .name = "gno-column-info" },
@@ -2303,6 +2352,7 @@
 .{ .name = "gno-simple-template-names" },
 .{ .name = "gno-split-dwarf" },
 .{ .name = "gno-strict-dwarf" },
+.{ .name = "gno-structor-decl-linkage-names" },
 .{ .name = "gno-template-alias" },
 .{ .name = "gomit-unreferenced-methods" },
 .{
@@ -2319,9 +2369,15 @@
 .{ .name = "gpulibc" },
 .{ .name = "grecord-command-line" },
 .{ .name = "gsce" },
+.{
+    .name = "gsframe",
+    .pd1 = false,
+    .pd2 = true,
+},
 .{ .name = "gsimple-template-names" },
 .{ .name = "gsplit-dwarf" },
 .{ .name = "gstrict-dwarf" },
+.{ .name = "gstructor-decl-linkage-names" },
 .{ .name = "gtemplate-alias" },
 .{ .name = "gtoggle" },
 .{ .name = "gused" },
@@ -2347,6 +2403,7 @@
     .pd1 = false,
     .pd2 = true,
 },
+.{ .name = "hlsl-all-resources-bound", .psl = true },
 .{ .name = "ibuiltininc" },
 .{ .name = "ignore-pch" },
 .{ .name = "image_base", .syntax = .separate },
@@ -2397,6 +2454,7 @@
 .{ .name = "maix-struct-return" },
 .{ .name = "malign-double" },
 .{ .name = "maltivec", .ze = .m },
+.{ .name = "mamdgpu-expand-waitcnt-profiling" },
 .{ .name = "mamdgpu-ieee" },
 .{ .name = "mamdgpu-precise-memory-op" },
 .{ .name = "mamx-avx512", .ze = .m },
@@ -2408,7 +2466,6 @@
 .{ .name = "mamx-movrs", .ze = .m },
 .{ .name = "mamx-tf32", .ze = .m },
 .{ .name = "mamx-tile", .ze = .m },
-.{ .name = "mamx-transpose", .ze = .m },
 .{ .name = "mannotate-tablejump" },
 .{ .name = "mapx-inline-asm-use-gpr32" },
 .{ .name = "mapxf" },
@@ -2418,11 +2475,7 @@
 .{ .name = "matomics", .ze = .m },
 .{ .name = "mavx", .ze = .m },
 .{ .name = "mavx10.1" },
-.{ .name = "mavx10.1-256" },
-.{ .name = "mavx10.1-512", .ze = .m },
 .{ .name = "mavx10.2" },
-.{ .name = "mavx10.2-256" },
-.{ .name = "mavx10.2-512", .ze = .m },
 .{ .name = "mavx2", .ze = .m },
 .{ .name = "mavx512bf16", .ze = .m },
 .{ .name = "mavx512bitalg", .ze = .m },
@@ -2485,7 +2538,6 @@
 .{ .name = "menable-no-nans" },
 .{ .name = "menqcmd", .ze = .m },
 .{ .name = "metal", .psl = true },
-.{ .name = "mevex512", .ze = .m },
 .{ .name = "mexception-handling", .ze = .m },
 .{ .name = "mexecute-only", .ze = .m },
 .{ .name = "mextended-const", .ze = .m },
@@ -2517,6 +2569,7 @@
 .{ .name = "mfsgsbase", .ze = .m },
 .{ .name = "mfsmuld" },
 .{ .name = "mfxsr", .ze = .m },
+.{ .name = "mgc" },
 .{ .name = "mgeneral-regs-only" },
 .{ .name = "mgfni", .ze = .m },
 .{ .name = "mginv", .ze = .m },
@@ -2614,6 +2667,7 @@
 .{ .name = "mno-adx", .ze = .m },
 .{ .name = "mno-aes", .ze = .m },
 .{ .name = "mno-altivec", .ze = .m },
+.{ .name = "mno-amdgpu-expand-waitcnt-profiling" },
 .{ .name = "mno-amdgpu-ieee" },
 .{ .name = "mno-amdgpu-precise-memory-op" },
 .{ .name = "mno-amx-avx512", .ze = .m },
@@ -2625,14 +2679,11 @@
 .{ .name = "mno-amx-movrs", .ze = .m },
 .{ .name = "mno-amx-tf32", .ze = .m },
 .{ .name = "mno-amx-tile", .ze = .m },
-.{ .name = "mno-amx-transpose", .ze = .m },
 .{ .name = "mno-annotate-tablejump" },
 .{ .name = "mno-apxf" },
 .{ .name = "mno-atomics", .ze = .m },
 .{ .name = "mno-avx", .ze = .m },
 .{ .name = "mno-avx10.1" },
-.{ .name = "mno-avx10.1-256" },
-.{ .name = "mno-avx10.1-512", .ze = .m },
 .{ .name = "mno-avx10.2" },
 .{ .name = "mno-avx2", .ze = .m },
 .{ .name = "mno-avx512bf16", .ze = .m },
@@ -2682,7 +2733,6 @@
 .{ .name = "mno-dspr2", .ze = .m },
 .{ .name = "mno-embedded-data" },
 .{ .name = "mno-enqcmd", .ze = .m },
-.{ .name = "mno-evex512", .ze = .m },
 .{ .name = "mno-exception-handling", .ze = .m },
 .{ .name = "mnoexecstack" },
 .{ .name = "mno-execute-only", .ze = .m },
@@ -2707,6 +2757,7 @@
 .{ .name = "mno-fsmuld", .ze = .m },
 .{ .name = "mno-fxsr", .ze = .m },
 .{ .name = "mno-gather" },
+.{ .name = "mno-gc" },
 .{ .name = "mno-gfni", .ze = .m },
 .{ .name = "mno-ginv", .ze = .m },
 .{ .name = "mno-global-merge" },
@@ -2798,6 +2849,7 @@
 .{ .name = "mno-relax-all" },
 .{ .name = "mno-relax-pic-calls" },
 .{ .name = "mno-relaxed-simd", .ze = .m },
+.{ .name = "mno-reserve-frame-pointer-reg" },
 .{ .name = "mno-restrict-it" },
 .{ .name = "mno-retpoline", .ze = .m },
 .{ .name = "mno-retpoline-external-thunk", .ze = .m },
@@ -2886,7 +2938,6 @@
 .{ .name = "module-file-deps" },
 .{ .name = "module-file-info" },
 .{ .name = "module-suffix", .syntax = .separate },
-.{ .name = "fmodules-reduced-bmi" },
 .{ .name = "momit-leaf-frame-pointer" },
 .{ .name = "moutline" },
 .{ .name = "moutline-atomics", .ze = .m },
@@ -2931,6 +2982,7 @@
 .{ .name = "mrelax-relocations=no" },
 .{ .name = "mrelaxed-simd", .ze = .m },
 .{ .name = "mrelocation-model", .syntax = .separate },
+.{ .name = "mreserve-frame-pointer-reg" },
 .{ .name = "mrestrict-it" },
 .{ .name = "mretpoline", .ze = .m },
 .{ .name = "mretpoline-external-thunk", .ze = .m },
@@ -3007,6 +3059,7 @@
 .{ .name = "mv73", .ze = .m },
 .{ .name = "mv75", .ze = .m },
 .{ .name = "mv79", .ze = .m },
+.{ .name = "mv81" },
 .{ .name = "mv8plus", .ze = .m },
 .{ .name = "mvaes", .ze = .m },
 .{ .name = "mvector-strict-align" },
@@ -3101,6 +3154,7 @@
 .{ .name = "no-pthread" },
 .{ .name = "no-round-trip-args" },
 .{ .name = "no-struct-path-tbaa" },
+.{ .name = "no-use-spirv-backend" },
 .{
     .name = "no-wasm-opt",
     .pd1 = false,
@@ -3311,6 +3365,7 @@
 .{ .name = "static-openmp" },
 .{ .name = "static-pie" },
 .{ .name = "stats-file-append" },
+.{ .name = "stats-file-timers" },
 .{ .name = "stdlib" },
 .{
     .name = "sycl-link",
@@ -3368,6 +3423,7 @@
 .{ .name = "Wrealloc-lhs-all", .pd2 = true },
 .{ .name = "Wfrontend-loop-interchange", .pd2 = true },
 .{ .name = "Wtarget-lifetime", .pd2 = true },
+.{ .name = "use-spirv-backend" },
 .{ .name = "v", .ze = .verbose },
 .{ .name = "vectorize-loops" },
 .{ .name = "vectorize-slp" },
@@ -3469,6 +3525,7 @@
 .{ .name = "fobjc-nonfragile-abi-version=", .syntax = .joined },
 .{ .name = "fprofile-instrument-use-path=", .syntax = .joined },
 .{ .name = "fsanitize-coverage-allowlist=", .syntax = .joined },
+.{ .name = "fsanitize-debug-trap-reasons=", .syntax = .joined },
 .{ .name = "fxray-instrumentation-bundle=", .syntax = .joined },
 .{ .name = "fsanitize-address-destructor=", .syntax = .joined },
 .{
@@ -3567,6 +3624,8 @@
 .{ .name = "analyzer-disable-checker=", .syntax = .joined },
 .{ .name = "fbuild-session-timestamp=", .syntax = .joined },
 .{ .name = "fdo-concurrent-to-openmp=", .syntax = .joined },
+.{ .name = "fdx-rootsignature-define=", .syntax = .joined },
+.{ .name = "fms-layout-compatibility=", .syntax = .joined },
 .{ .name = "fprofile-function-groups=", .syntax = .joined },
 .{ .name = "fprofile-instrument-path=", .syntax = .joined },
 .{ .name = "header-include-filtering=", .syntax = .joined },
@@ -3589,11 +3648,13 @@
 .{ .name = "dump-minimization-hints=", .syntax = .joined },
 .{ .name = "fapinotes-swift-version=", .syntax = .joined },
 .{ .name = "fcomment-block-commands=", .syntax = .comma_joined },
+.{ .name = "fintrinsic-modules-path=", .syntax = .joined },
 .{ .name = "flax-vector-conversions=", .syntax = .joined },
 .{ .name = "fmodules-embed-all-files", .syntax = .joined },
 .{ .name = "fmodules-prune-interval=", .syntax = .joined },
 .{ .name = "foverride-record-layout=", .syntax = .joined },
 .{ .name = "fprofile-instr-generate=", .syntax = .joined },
+.{ .name = "fprofile-instrument-use=", .syntax = .joined },
 .{ .name = "fprofile-remapping-file=", .syntax = .joined },
 .{ .name = "fsanitize-coverage-type=", .syntax = .joined },
 .{ .name = "fsanitize-hwaddress-abi=", .syntax = .joined },
@@ -3619,6 +3680,7 @@
 .{ .name = "fdebug-compilation-dir=", .syntax = .joined },
 .{ .name = "fdebug-default-version=", .syntax = .joined },
 .{ .name = "ffp-exception-behavior=", .syntax = .joined },
+.{ .name = "finitial-counter-value=", .syntax = .joined },
 .{ .name = "fmacro-backtrace-limit=", .syntax = .joined },
 .{ .name = "fmax-array-constructor=", .syntax = .joined },
 .{ .name = "fmcdc-max-test-vectors=", .syntax = .joined },
@@ -3640,6 +3702,7 @@
 .{ .name = "ffile-compilation-dir=", .syntax = .joined },
 .{ .name = "fgpu-inline-threshold=", .syntax = .joined },
 .{ .name = "finline-max-stacksize=", .syntax = .joined },
+.{ .name = "fmatrix-memory-layout=", .syntax = .joined },
 .{ .name = "fmax-subrecord-length=", .syntax = .joined },
 .{ .name = "fmodules-ignore-macro=", .syntax = .joined },
 .{
@@ -3732,6 +3795,7 @@
 .{ .name = "fms-omit-default-lib", .syntax = .joined },
 .{ .name = "fprofile-instrument=", .syntax = .joined },
 .{ .name = "fprofile-sample-use=", .syntax = .joined },
+.{ .name = "fsanitize-kcfi-hash=", .syntax = .joined },
 .{ .name = "fstrict-flex-arrays=", .syntax = .joined },
 .{
     .name = "hipstdpar-prim-path=",
@@ -3817,6 +3881,7 @@
     .pd1 = false,
     .pd2 = true,
 },
+.{ .name = "falloc-token-mode=", .syntax = .joined },
 .{ .name = "fbinutils-version=", .syntax = .joined },
 .{ .name = "fclang-abi-compat=", .syntax = .joined },
 .{ .name = "fcodegen-data-use=", .syntax = .joined },
@@ -3865,6 +3930,7 @@
     .pd2 = true,
 },
 .{ .name = "falign-functions=", .syntax = .joined },
+.{ .name = "falloc-token-max=", .syntax = .joined },
 .{ .name = "fconstexpr-depth=", .syntax = .joined },
 .{ .name = "fconstexpr-steps=", .syntax = .joined },
 .{ .name = "ffile-prefix-map=", .syntax = .joined },
@@ -3951,7 +4017,7 @@
 .{ .name = "mzos-hlq-csslib=", .syntax = .joined },
 .{
     .name = "no-offload-arch=",
-    .syntax = .joined,
+    .syntax = .comma_joined,
     .pd1 = false,
     .pd2 = true,
 },
@@ -4296,6 +4362,12 @@
 .{ .name = "inline-asm=", .syntax = .joined },
 .{ .name = "ivfsoverlay", .syntax = .joined_or_separate },
 .{ .name = "iwithprefix", .syntax = .joined_or_separate },
+.{
+    .name = "libclc-lib=",
+    .syntax = .joined,
+    .pd1 = false,
+    .pd2 = true,
+},
 .{ .name = "mfloat-abi=", .syntax = .joined },
 .{ .name = "plugin-arg-", .syntax = .joined_and_separate },
 .{
@@ -4457,7 +4529,6 @@
     .pd1 = false,
     .pd2 = true,
 },
-.{ .name = "fcoarray=", .syntax = .joined },
 .{ .name = "fconvert=", .syntax = .joined },
 .{ .name = "fc++-abi=", .syntax = .joined },
 .{ .name = "fextdirs=", .syntax = .joined },
@@ -4782,6 +4853,11 @@
     .syntax = .joined,
     .psl = true,
 },
+.{
+    .name = "Frs",
+    .syntax = .joined_or_separate,
+    .psl = true,
+},
 .{ .name = "gz=", .syntax = .joined },
 .{ .name = "A-", .syntax = .joined },
 .{ .name = "G=", .syntax = .joined },
diff --git a/src/codegen/c.zig b/src/codegen/c.zig
index ee9e90ce80..885ca0aa1c 100644
--- a/src/codegen/c.zig
+++ b/src/codegen/c.zig
@@ -3497,9 +3497,27 @@ fn airOverflow(f: *Function, inst: Air.Inst.Index, operation: []const u8, info:
     try w.writeAll(operation);
     try w.writeAll("o_");
     try f.dg.renderTypeForBuiltinFnName(w, scalar_ty);
-    try w.writeAll("(&");
+    try w.writeByte('(');
+
+    // '&dest', possibly preceded by a cast
+    switch (zcu.intern_pool.indexToKey(scalar_ty.toIntern())) {
+        .int_type => {}, // we already have a '[u]intX_t *'
+        .simple_type => {
+            // '&dest' will be something like a 'uintptr_t *', which might be a different C type to
+            // the equivalent sized integer (e.g. 'uint64_t *'), so we need a cast. We don't need a
+            // cast on the *operands* because they are passed by value (except for big integers,
+            // where this issue doesn't exist because no "simple" int type needs bigint repr).
+            try w.print("({s}int{d}_t *)", .{
+                if (scalar_ty.isUnsignedInt(zcu)) "u" else "",
+                scalar_ty.abiSize(zcu) * 8,
+            });
+        },
+        else => unreachable,
+    }
+    try w.writeByte('&');
     try f.writeCValueMember(w, local, .{ .field = 0 });
     try v.elem(f, w);
+
     try w.writeAll(", ");
     if (ref_arg) try w.writeByte('&');
     try f.writeCValue(w, lhs, .other);
diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig
index 947efb70ca..4175293a2c 100644
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@@ -162,6 +162,7 @@ pub fn targetTriple(allocator: Allocator, target: *const std.Target) ![]const u8
             .{ .v9_4a, "v9.4a" },
             .{ .v9_5a, "v9.5a" },
             .{ .v9_6a, "v9.6a" },
+            .{ .v9_7a, "v9.7a" },
         }),
         .powerpc => subArchName(target, .powerpc, .{
             .{ .spe, "spe" },
@@ -255,11 +256,15 @@ pub fn targetTriple(allocator: Allocator, target: *const std.Target) ![]const u8
         .none,
         .windows,
         => {},
-        .semver => |ver| try llvm_triple.print("{d}.{d}.{d}", .{
-            ver.min.major,
-            ver.min.minor,
-            ver.min.patch,
-        }),
+        .semver => |ver| if (target.os.tag == .wasi and ver.min.major == 0) {
+            try llvm_triple.print("p{d}", .{ver.min.minor});
+        } else {
+            try llvm_triple.print("{d}.{d}.{d}", .{
+                ver.min.major,
+                ver.min.minor,
+                ver.min.patch,
+            });
+        },
         inline .linux, .hurd => |ver| try llvm_triple.print("{d}.{d}.{d}", .{
             ver.range.min.major,
             ver.range.min.minor,
@@ -383,16 +388,9 @@ pub fn dataLayout(target: *const std.Target) []const u8 {
         .powerpc => "E-m:e-p:32:32-Fn32-i64:64-n32",
         .powerpcle => "e-m:e-p:32:32-Fn32-i64:64-n32",
         .powerpc64 => switch (target.os.tag) {
-            .linux => if (target.abi.isMusl())
-                "E-m:e-Fn32-i64:64-i128:128-n32:64-S128-v256:256:256-v512:512:512"
-            else
-                "E-m:e-Fi64-i64:64-i128:128-n32:64-S128-v256:256:256-v512:512:512",
+            .linux => "E-m:e-Fn32-i64:64-i128:128-n32:64-S128-v256:256:256-v512:512:512",
             .ps3 => "E-m:e-p:32:32-Fi64-i64:64-i128:128-n32:64",
-            else => if (target.os.tag == .openbsd or
-                (target.os.tag == .freebsd and target.os.version_range.semver.isAtLeast(.{ .major = 13, .minor = 0, .patch = 0 }) orelse false))
-                "E-m:e-Fn32-i64:64-i128:128-n32:64"
-            else
-                "E-m:e-Fi64-i64:64-i128:128-n32:64",
+            else => "E-m:e-Fn32-i64:64-i128:128-n32:64",
         },
         .powerpc64le => if (target.os.tag == .linux)
             "e-m:e-Fn32-i64:64-i128:128-n32:64-S128-v256:256:256-v512:512:512"
@@ -400,7 +398,7 @@ pub fn dataLayout(target: *const std.Target) []const u8 {
             "e-m:e-Fn32-i64:64-i128:128-n32:64",
         .nvptx => "e-p:32:32-p6:32:32-p7:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64",
         .nvptx64 => "e-p6:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64",
-        .amdgcn => "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9",
+        .amdgcn => "e-m:e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9",
         .riscv32 => if (target.cpu.has(.riscv, .e))
             "e-m:e-p:32:32-i64:64-n32-S32"
         else
@@ -1748,6 +1746,7 @@ pub const Object = struct {
                 if (name.eqlSlice("WinMainCRTStartup", ip)) flags.winmain_crt_startup = true;
                 if (name.eqlSlice("wWinMainCRTStartup", ip)) flags.wwinmain_crt_startup = true;
                 if (name.eqlSlice("DllMainCRTStartup", ip)) flags.dllmain_crt_startup = true;
+                if (name.eqlSlice("_DllMainCRTStartup", ip)) flags.dllmain_crt_startup = true;
             }
         }
 
@@ -4652,20 +4651,6 @@ fn toLlvmGlobalAddressSpace(wanted_address_space: std.builtin.AddressSpace, targ
 /// or if it produces miscompilations.
 pub fn backendSupportsF16(target: *const std.Target) bool {
     return switch (target.cpu.arch) {
-        // https://github.com/llvm/llvm-project/issues/97981
-        .csky,
-        // https://github.com/llvm/llvm-project/issues/97981
-        .powerpc,
-        .powerpcle,
-        .powerpc64,
-        .powerpc64le,
-        // https://github.com/llvm/llvm-project/issues/97981
-        .wasm32,
-        .wasm64,
-        // https://github.com/llvm/llvm-project/issues/97981
-        .sparc,
-        .sparc64,
-        => false,
         .arm,
         .armeb,
         .thumb,
@@ -4692,11 +4677,6 @@ pub fn backendSupportsF128(target: *const std.Target) bool {
     return switch (target.cpu.arch) {
         // https://github.com/llvm/llvm-project/issues/121122
         .amdgcn,
-        // Test failures all over the place.
-        .mips64,
-        .mips64el,
-        // https://github.com/llvm/llvm-project/issues/41838
-        .sparc,
         => false,
         .arm,
         .armeb,
@@ -4839,7 +4819,7 @@ pub fn initializeLLVMTarget(arch: std.Target.Cpu.Arch) void {
                 bindings.LLVMInitializeXtensaTarget();
                 bindings.LLVMInitializeXtensaTargetInfo();
                 bindings.LLVMInitializeXtensaTargetMC();
-                // There is no LLVMInitializeXtensaAsmPrinter function.
+                bindings.LLVMInitializeXtensaAsmPrinter();
                 bindings.LLVMInitializeXtensaAsmParser();
             }
         },
diff --git a/src/codegen/llvm/FuncGen.zig b/src/codegen/llvm/FuncGen.zig
index 21022543cb..e4f69d15ca 100644
--- a/src/codegen/llvm/FuncGen.zig
+++ b/src/codegen/llvm/FuncGen.zig
@@ -6630,11 +6630,11 @@ const ParamTypeIterator = struct {
                 } else if (isByRef(ty, zcu)) {
                     return .byref;
                 } else if (target.cpu.arch.isX86() and
-                    !target.cpu.has(.x86, .evex512) and
+                    !target.cpu.has(.x86, .avx512f) and
                     ty.totalVectorBits(zcu) >= 512)
                 {
                     // As of LLVM 18, passing a vector byval with fastcc that is 512 bits or more returns
-                    // "512-bit vector arguments require 'evex512' for AVX512"
+                    // "512-bit vector arguments require 'avx512f' for AVX512"
                     return .byref;
                 } else {
                     return .byval;
@@ -6903,11 +6903,11 @@ fn returnTypeByRef(zcu: *Zcu, target: *const std.Target, ty: Type) bool {
     if (isByRef(ty, zcu)) {
         return true;
     } else if (target.cpu.arch.isX86() and
-        !target.cpu.has(.x86, .evex512) and
+        !target.cpu.has(.x86, .avx512f) and
         ty.totalVectorBits(zcu) >= 512)
     {
         // As of LLVM 18, passing a vector byval with fastcc that is 512 bits or more returns
-        // "512-bit vector arguments require 'evex512' for AVX512"
+        // "512-bit vector arguments require 'avx512f' for AVX512"
         return true;
     } else {
         return false;
diff --git a/src/codegen/llvm/bindings.zig b/src/codegen/llvm/bindings.zig
index 9f7ea2ca41..38f0c300bc 100644
--- a/src/codegen/llvm/bindings.zig
+++ b/src/codegen/llvm/bindings.zig
@@ -279,6 +279,7 @@ pub extern fn LLVMInitializeSystemZAsmPrinter() void;
 pub extern fn LLVMInitializeWebAssemblyAsmPrinter() void;
 pub extern fn LLVMInitializeX86AsmPrinter() void;
 pub extern fn LLVMInitializeXCoreAsmPrinter() void;
+pub extern fn LLVMInitializeXtensaAsmPrinter() void;
 pub extern fn LLVMInitializeM68kAsmPrinter() void;
 pub extern fn LLVMInitializeVEAsmPrinter() void;
 pub extern fn LLVMInitializeARCAsmPrinter() void;
diff --git a/src/libs/libcxx.zig b/src/libs/libcxx.zig
index 83ba56ccf8..bed5edb95b 100644
--- a/src/libs/libcxx.zig
+++ b/src/libs/libcxx.zig
@@ -538,11 +538,20 @@ pub fn addCxxArgs(
     // Compilation.addCCArgs. This option makes it use serial backend which
     // is simple and works everywhere.
     try cflags.append("-D_LIBCPP_PSTL_BACKEND_SERIAL");
-    try cflags.append(switch (optimize_mode) {
-        .Debug => "-D_LIBCPP_HARDENING_MODE=_LIBCPP_HARDENING_MODE_DEBUG",
-        .ReleaseFast, .ReleaseSmall => "-D_LIBCPP_HARDENING_MODE=_LIBCPP_HARDENING_MODE_NONE",
-        .ReleaseSafe => "-D_LIBCPP_HARDENING_MODE=_LIBCPP_HARDENING_MODE_FAST",
-    });
+    switch (optimize_mode) {
+        .Debug => {
+            try cflags.append("-D_LIBCPP_HARDENING_MODE=_LIBCPP_HARDENING_MODE_DEBUG");
+            try cflags.append("-D_LIBCPP_ASSERTION_SEMANTIC_DEFAULT=_LIBCPP_ASSERTION_SEMANTIC_ENFORCE");
+        },
+        .ReleaseFast, .ReleaseSmall => {
+            try cflags.append("-D_LIBCPP_HARDENING_MODE=_LIBCPP_HARDENING_MODE_NONE");
+            try cflags.append("-D_LIBCPP_ASSERTION_SEMANTIC_DEFAULT=_LIBCPP_ASSERTION_SEMANTIC_IGNORE");
+        },
+        .ReleaseSafe => {
+            try cflags.append("-D_LIBCPP_HARDENING_MODE=_LIBCPP_HARDENING_MODE_FAST");
+            try cflags.append("-D_LIBCPP_ASSERTION_SEMANTIC_DEFAULT=_LIBCPP_ASSERTION_SEMANTIC_ENFORCE");
+        },
+    }
     if (target.isGnuLibC()) {
         // glibc 2.16 introduced aligned_alloc
         if (target.os.versionRange().gnuLibCVersion().?.order(.{ .major = 2, .minor = 16, .patch = 0 }) == .lt) {
diff --git a/src/link/Lld.zig b/src/link/Lld.zig
index ffb9020e7f..51d40f7097 100644
--- a/src/link/Lld.zig
+++ b/src/link/Lld.zig
@@ -637,17 +637,7 @@ fn coffLink(lld: *Lld, arena: Allocator) !void {
                             try argv.append("-ALTERNATENAME:__image_base__=__ImageBase");
                         }
 
-                        if (is_dyn_lib) {
-                            try argv.append(try comp.crtFileAsString(arena, "dllcrt2.obj"));
-                            if (target.cpu.arch == .x86) {
-                                try argv.append("-ALTERNATENAME:__DllMainCRTStartup@12=_DllMainCRTStartup@12");
-                            } else {
-                                try argv.append("-ALTERNATENAME:_DllMainCRTStartup=DllMainCRTStartup");
-                            }
-                        } else {
-                            try argv.append(try comp.crtFileAsString(arena, "crt2.obj"));
-                        }
-
+                        try argv.append(try comp.crtFileAsString(arena, if (is_dyn_lib) "dllcrt2.obj" else "crt2.obj"));
                         try argv.append(try comp.crtFileAsString(arena, "libmingw32.lib"));
                     } else {
                         try argv.append(switch (comp.config.link_mode) {
@@ -812,7 +802,8 @@ fn elfLink(lld: *Lld, arena: Allocator) !void {
             target.cpu.arch == .m68k or
             target.cpu.arch.isSPARC() or
             target.cpu.arch == .ve or
-            target.cpu.arch == .xcore))
+            target.cpu.arch == .xcore or
+            target.cpu.arch == .xtensa))
     {
         // In this case we must do a simple file copy
         // here. TODO: think carefully about how we can avoid this redundant operation when doing
diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig
index 4a7ed1ea33..37c182c514 100644
--- a/src/link/Wasm.zig
+++ b/src/link/Wasm.zig
@@ -2829,6 +2829,7 @@ pub const Feature = packed struct(u8) {
         @"exception-handling",
         @"extended-const",
         fp16,
+        gc,
         memory64,
         multimemory,
         multivalue,
@@ -2852,6 +2853,7 @@ pub const Feature = packed struct(u8) {
                 .exception_handling => .@"exception-handling",
                 .extended_const => .@"extended-const",
                 .fp16 => .fp16,
+                .gc => .gc,
                 .multimemory => .multimemory,
                 .multivalue => .multivalue,
                 .mutable_globals => .@"mutable-globals",
@@ -2875,6 +2877,7 @@ pub const Feature = packed struct(u8) {
                 .@"exception-handling" => .exception_handling,
                 .@"extended-const" => .extended_const,
                 .fp16 => .fp16,
+                .gc => .gc,
                 .memory64 => null, // Linker-only feature.
                 .multimemory => .multimemory,
                 .multivalue => .multivalue,
diff --git a/src/main.zig b/src/main.zig
index 817e8d29dc..95ee9a82f1 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -977,6 +977,11 @@ fn buildOutputType(
     var cc_argv: std.ArrayList([]const u8) = .empty;
     var deps: std.ArrayList(CliModule.Dep) = .empty;
 
+    // We need to raise the FD limit *before* CLI parsing, because we open link inputs during CLI
+    // parsing (in `createModule`), so a large number of link inputs could push us past the limit on
+    // targets with a low soft limit (e.g. macOS has a default limit of 256).
+    process.raiseFileDescriptorLimit();
+
     // Contains every module specified via -M. The dependencies are added
     // after argument parsing is completed. We use a StringArrayHashMap to make
     // error output consistent. "root" is special.
@@ -3534,8 +3539,6 @@ fn buildOutputType(
         break :b .whole;
     };
 
-    process.raiseFileDescriptorLimit();
-
     var file_system_inputs: std.ArrayList(u8) = .empty;
     defer file_system_inputs.deinit(gpa);
 
diff --git a/src/target.zig b/src/target.zig
index 171b279125..47957d34a8 100644
--- a/src/target.zig
+++ b/src/target.zig
@@ -227,11 +227,11 @@ pub fn hasLlvmSupport(target: *const std.Target, ofmt: std.Target.ObjectFormat)
         .wasm32,
         .wasm64,
         .ve,
+        .xtensa,
         => true,
 
         // LLVM backend exists but can produce neither assembly nor object files.
         .csky,
-        .xtensa,
         => false,
 
         // Third-party LLVM backend exists.
@@ -388,6 +388,8 @@ pub fn hasDebugInfo(target: *const std.Target) bool {
             .ptx85,
             .ptx86,
             .ptx87,
+            .ptx88,
+            .ptx90,
         }),
         .bpfel, .bpfeb => false,
         else => true,
@@ -684,8 +686,8 @@ pub fn llvmMachineAbi(target: *const std.Target) ?[:0]const u8 {
             else => "lp64d",
         },
         .loongarch32 => switch (target.abi) {
-            .gnusf => "ilp32s",
-            .gnuf32 => "ilp32f",
+            .gnusf, .muslsf => "ilp32s",
+            .gnuf32, .muslf32 => "ilp32f",
             else => "ilp32d",
         },
         .mips, .mipsel => "o32",
diff --git a/src/zig_clang_cc1_main.cpp b/src/zig_clang_cc1_main.cpp
index 2c17f28621..1adb217014 100644
--- a/src/zig_clang_cc1_main.cpp
+++ b/src/zig_clang_cc1_main.cpp
@@ -12,19 +12,20 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "clang/Basic/DiagnosticFrontend.h"
 #include "clang/Basic/Stack.h"
 #include "clang/Basic/TargetOptions.h"
 #include "clang/CodeGen/ObjectFilePCHContainerWriter.h"
 #include "clang/Config/config.h"
+#include "clang/Driver/Driver.h"
 #include "clang/Driver/DriverDiagnostic.h"
-#include "clang/Driver/Options.h"
 #include "clang/Frontend/CompilerInstance.h"
 #include "clang/Frontend/CompilerInvocation.h"
-#include "clang/Frontend/FrontendDiagnostic.h"
 #include "clang/Frontend/TextDiagnosticBuffer.h"
 #include "clang/Frontend/TextDiagnosticPrinter.h"
 #include "clang/Frontend/Utils.h"
 #include "clang/FrontendTool/Utils.h"
+#include "clang/Options/Options.h"
 #include "clang/Serialization/ObjectFilePCHContainerReader.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
@@ -38,6 +39,7 @@
 #include "llvm/Support/BuryPointer.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/IOSandbox.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Process.h"
@@ -217,7 +219,7 @@ static int PrintEnabledExtensions(const TargetOptions& TargetOpts) {
 int cc1_main(ArrayRef<const char *> Argv, const char *Argv0, void *MainAddr) {
   ensureSufficientStack();
 
-  IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
+  IntrusiveRefCntPtr<DiagnosticIDs> DiagID = DiagnosticIDs::create();
 
   // Register the support for object-file-wrapped Clang modules.
   auto PCHOps = std::make_shared<PCHContainerOperations>();
@@ -269,12 +271,17 @@ int cc1_main(ArrayRef<const char *> Argv, const char *Argv0, void *MainAddr) {
   if (Clang->getHeaderSearchOpts().UseBuiltinIncludes &&
       Clang->getHeaderSearchOpts().ResourceDir.empty())
     Clang->getHeaderSearchOpts().ResourceDir =
-      CompilerInvocation::GetResourcesPath(Argv0, MainAddr);
+        GetResourcesPath(Argv0, MainAddr);
+
+  /// Create the actual file system.
+  auto VFS = [] {
+    auto BypassSandbox = llvm::sys::sandbox::scopedDisable();
+    return llvm::vfs::getRealFileSystem();
+  }();
+  Clang->createVirtualFileSystem(std::move(VFS), DiagsBuffer);
 
   // Create the actual diagnostics engine.
-  Clang->createDiagnostics(*llvm::vfs::getRealFileSystem());
-  if (!Clang->hasDiagnostics())
-    return 1;
+  Clang->createDiagnostics();
 
   // Set an error handler, so that any LLVM backend diagnostics go through our
   // error handler.
@@ -299,29 +306,21 @@ int cc1_main(ArrayRef<const char *> Argv, const char *Argv0, void *MainAddr) {
 
   // If any timers were active but haven't been destroyed yet, print their
   // results now.  This happens in -disable-free mode.
-  std::unique_ptr<raw_ostream> IOFile = llvm::CreateInfoOutputFile();
-  if (Clang->getCodeGenOpts().TimePassesJson) {
-    *IOFile << "{\n";
-    llvm::TimerGroup::printAllJSONValues(*IOFile, "");
-    *IOFile << "\n}\n";
-  } else {
-    llvm::TimerGroup::printAll(*IOFile);
+  {
+    // This isn't a formal input or output of the compiler.
+    auto BypassSandbox = llvm::sys::sandbox::scopedDisable();
+    std::unique_ptr<raw_ostream> IOFile = llvm::CreateInfoOutputFile();
+    if (Clang->getCodeGenOpts().TimePassesJson) {
+      *IOFile << "{\n";
+      llvm::TimerGroup::printAllJSONValues(*IOFile, "");
+      *IOFile << "\n}\n";
+    } else if (!Clang->getCodeGenOpts().TimePassesStatsFile) {
+      llvm::TimerGroup::printAll(*IOFile);
+    }
+    llvm::TimerGroup::clearAll();
   }
-  llvm::TimerGroup::clearAll();
 
   if (llvm::timeTraceProfilerEnabled()) {
-    // It is possible that the compiler instance doesn't own a file manager here
-    // if we're compiling a module unit. Since the file manager are owned by AST
-    // when we're compiling a module unit. So the file manager may be invalid
-    // here.
-    //
-    // It should be fine to create file manager here since the file system
-    // options are stored in the compiler invocation and we can recreate the VFS
-    // from the compiler invocation.
-    if (!Clang->hasFileManager())
-      Clang->createFileManager(createVFSFromCompilerInvocation(
-          Clang->getInvocation(), Clang->getDiagnostics()));
-
     if (auto profilerOutput = Clang->createOutputFile(
             Clang->getFrontendOpts().TimeTracePath, /*Binary=*/false,
             /*RemoveFileOnSignal=*/false,
diff --git a/src/zig_clang_cc1as_main.cpp b/src/zig_clang_cc1as_main.cpp
index f938e7e404..339693e709 100644
--- a/src/zig_clang_cc1as_main.cpp
+++ b/src/zig_clang_cc1as_main.cpp
@@ -12,12 +12,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/DiagnosticFrontend.h"
 #include "clang/Basic/DiagnosticOptions.h"
 #include "clang/Driver/DriverDiagnostic.h"
-#include "clang/Driver/Options.h"
-#include "clang/Frontend/FrontendDiagnostic.h"
 #include "clang/Frontend/TextDiagnosticPrinter.h"
 #include "clang/Frontend/Utils.h"
+#include "clang/Options/Options.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSwitch.h"
@@ -45,6 +45,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/IOSandbox.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Process.h"
@@ -59,8 +60,7 @@
 #include <optional>
 #include <system_error>
 using namespace clang;
-using namespace clang::driver;
-using namespace clang::driver::options;
+using namespace clang::options;
 using namespace llvm;
 using namespace llvm::opt;
 
@@ -71,8 +71,8 @@ struct AssemblerInvocation {
   /// @name Target Options
   /// @{
 
-  /// The name of the target triple to assemble for.
-  std::string Triple;
+  /// The target triple to assemble for.
+  llvm::Triple Triple;
 
   /// If given, the name of the target CPU to determine which instructions
   /// are legal.
@@ -163,6 +163,10 @@ struct AssemblerInvocation {
   LLVM_PREFERRED_TYPE(bool)
   unsigned EmitCompactUnwindNonCanonical : 1;
 
+  // Whether to emit sframe unwind sections.
+  LLVM_PREFERRED_TYPE(bool)
+  unsigned EmitSFrameUnwind : 1;
+
   LLVM_PREFERRED_TYPE(bool)
   unsigned Crel : 1;
   LLVM_PREFERRED_TYPE(bool)
@@ -192,9 +196,12 @@ struct AssemblerInvocation {
   std::string AsSecureLogFile;
   /// @}
 
+  void setTriple(llvm::StringRef Str) {
+    Triple = llvm::Triple(llvm::Triple::normalize(Str));
+  }
+
 public:
   AssemblerInvocation() {
-    Triple = "";
     NoInitialTextSection = 0;
     InputFile = "-";
     OutputPath = "-";
@@ -261,7 +268,7 @@ bool AssemblerInvocation::CreateFromArgs(AssemblerInvocation &Opts,
   // Construct the invocation.
 
   // Target Options
-  Opts.Triple = llvm::Triple::normalize(Args.getLastArgValue(OPT_triple));
+  Opts.setTriple(Args.getLastArgValue(OPT_triple));
   if (Arg *A = Args.getLastArg(options::OPT_darwin_target_variant_triple))
     Opts.DarwinTargetVariantTriple = llvm::Triple(A->getValue());
   if (Arg *A = Args.getLastArg(OPT_darwin_target_variant_sdk_version_EQ)) {
@@ -278,7 +285,7 @@ bool AssemblerInvocation::CreateFromArgs(AssemblerInvocation &Opts,
 
   // Use the default target triple if unspecified.
   if (Opts.Triple.empty())
-    Opts.Triple = llvm::sys::getDefaultTargetTriple();
+    Opts.setTriple(llvm::sys::getDefaultTargetTriple());
 
   // Language Options
   Opts.IncludePaths = Args.getAllArgValues(OPT_I);
@@ -385,6 +392,7 @@ bool AssemblerInvocation::CreateFromArgs(AssemblerInvocation &Opts,
 
   Opts.EmitCompactUnwindNonCanonical =
       Args.hasArg(OPT_femit_compact_unwind_non_canonical);
+  Opts.EmitSFrameUnwind = Args.hasArg(OPT_gsframe);
   Opts.Crel = Args.hasArg(OPT_crel);
   Opts.ImplicitMapsyms = Args.hasArg(OPT_mmapsyms_implicit);
   Opts.X86RelaxRelocations = !Args.hasArg(OPT_mrelax_relocations_no);
@@ -414,15 +422,19 @@ getOutputStream(StringRef Path, DiagnosticsEngine &Diags, bool Binary) {
 }
 
 static bool ExecuteAssemblerImpl(AssemblerInvocation &Opts,
-                                 DiagnosticsEngine &Diags) {
+                                 DiagnosticsEngine &Diags,
+                                 IntrusiveRefCntPtr<vfs::FileSystem> VFS) {
   // Get the target specific parser.
   std::string Error;
   const Target *TheTarget = TargetRegistry::lookupTarget(Opts.Triple, Error);
   if (!TheTarget)
-    return Diags.Report(diag::err_target_unknown_triple) << Opts.Triple;
+    return Diags.Report(diag::err_target_unknown_triple) << Opts.Triple.str();
 
-  ErrorOr<std::unique_ptr<MemoryBuffer>> Buffer =
-      MemoryBuffer::getFileOrSTDIN(Opts.InputFile, /*IsText=*/true);
+  ErrorOr<std::unique_ptr<MemoryBuffer>> Buffer = [&] {
+    // FIXME(sandboxing): Make this a proper input file.
+    auto BypassSandbox = sys::sandbox::scopedDisable();
+    return MemoryBuffer::getFileOrSTDIN(Opts.InputFile, /*IsText=*/true);
+  }();
 
   if (std::error_code EC = Buffer.getError()) {
     return Diags.Report(diag::err_fe_error_reading)
@@ -437,6 +449,7 @@ static bool ExecuteAssemblerImpl(AssemblerInvocation &Opts,
   // Record the location of the include directories so that the lexer can find
   // it later.
   SrcMgr.setIncludeDirs(Opts.IncludePaths);
+  SrcMgr.setVirtualFileSystem(VFS);
 
   std::unique_ptr<MCRegisterInfo> MRI(TheTarget->createMCRegInfo(Opts.Triple));
   assert(MRI && "Unable to create target register info!");
@@ -445,11 +458,13 @@ static bool ExecuteAssemblerImpl(AssemblerInvocation &Opts,
   MCOptions.MCRelaxAll = Opts.RelaxAll;
   MCOptions.EmitDwarfUnwind = Opts.EmitDwarfUnwind;
   MCOptions.EmitCompactUnwindNonCanonical = Opts.EmitCompactUnwindNonCanonical;
+  MCOptions.EmitSFrameUnwind = Opts.EmitSFrameUnwind;
   MCOptions.MCSaveTempLabels = Opts.SaveTemporaryLabels;
   MCOptions.Crel = Opts.Crel;
   MCOptions.ImplicitMapSyms = Opts.ImplicitMapsyms;
   MCOptions.X86RelaxRelocations = Opts.X86RelaxRelocations;
   MCOptions.X86Sse2Avx = Opts.X86Sse2Avx;
+  MCOptions.MCNoExecStack = Opts.NoExecStack;
   MCOptions.CompressDebugSections = Opts.CompressDebugSections;
   MCOptions.AsSecureLogFile = Opts.AsSecureLogFile;
 
@@ -477,7 +492,10 @@ static bool ExecuteAssemblerImpl(AssemblerInvocation &Opts,
 
   std::unique_ptr<MCSubtargetInfo> STI(
       TheTarget->createMCSubtargetInfo(Opts.Triple, Opts.CPU, FS));
-  assert(STI && "Unable to create subtarget info!");
+  if (!STI) {
+    return Diags.Report(diag::err_fe_unable_to_create_subtarget)
+           << Opts.CPU << FS.empty() << FS;
+  }
 
   MCContext Ctx(Triple(Opts.Triple), MAI.get(), MRI.get(), STI.get(), &SrcMgr,
                 &MCOptions);
@@ -509,9 +527,8 @@ static bool ExecuteAssemblerImpl(AssemblerInvocation &Opts,
     Ctx.setCompilationDir(Opts.DebugCompilationDir);
   else {
     // If no compilation dir is set, try to use the current directory.
-    SmallString<128> CWD;
-    if (!sys::fs::current_path(CWD))
-      Ctx.setCompilationDir(CWD);
+    if (auto CWD = VFS->getCurrentWorkingDirectory())
+      Ctx.setCompilationDir(*CWD);
   }
   if (!Opts.DebugPrefixMap.empty())
     for (const auto &KV : Opts.DebugPrefixMap)
@@ -577,7 +594,6 @@ static bool ExecuteAssemblerImpl(AssemblerInvocation &Opts,
     Triple T(Opts.Triple);
     Str.reset(TheTarget->createMCObjectStreamer(
         T, Ctx, std::move(MAB), std::move(OW), std::move(CE), *STI));
-    Str->initSections(Opts.NoExecStack, *STI);
     if (T.isOSBinFormatMachO() && T.isOSDarwin()) {
       Triple *TVT = Opts.DarwinTargetVariantTriple
                         ? &*Opts.DarwinTargetVariantTriple
@@ -605,7 +621,7 @@ static bool ExecuteAssemblerImpl(AssemblerInvocation &Opts,
   std::unique_ptr<MCTargetAsmParser> TAP(
       TheTarget->createMCAsmParser(*STI, *Parser, *MCII, MCOptions));
   if (!TAP)
-    Failed = Diags.Report(diag::err_target_unknown_triple) << Opts.Triple;
+    Failed = Diags.Report(diag::err_target_unknown_triple) << Opts.Triple.str();
 
   // Set values for symbols, if any.
   for (auto &S : Opts.SymbolDefs) {
@@ -627,8 +643,9 @@ static bool ExecuteAssemblerImpl(AssemblerInvocation &Opts,
 }
 
 static bool ExecuteAssembler(AssemblerInvocation &Opts,
-                             DiagnosticsEngine &Diags) {
-  bool Failed = ExecuteAssemblerImpl(Opts, Diags);
+                             DiagnosticsEngine &Diags,
+                             IntrusiveRefCntPtr<vfs::FileSystem> VFS) {
+  bool Failed = ExecuteAssemblerImpl(Opts, Diags, VFS);
 
   // Delete output file if there were errors.
   if (Failed) {
@@ -662,8 +679,12 @@ int cc1as_main(ArrayRef<const char *> Argv, const char *Argv0, void *MainAddr) {
   TextDiagnosticPrinter *DiagClient =
       new TextDiagnosticPrinter(errs(), DiagOpts);
   DiagClient->setPrefix("clang -cc1as");
-  IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
-  DiagnosticsEngine Diags(DiagID, DiagOpts, DiagClient);
+  DiagnosticsEngine Diags(DiagnosticIDs::create(), DiagOpts, DiagClient);
+
+  auto VFS = [] {
+    auto BypassSandbox = sys::sandbox::scopedDisable();
+    return vfs::getRealFileSystem();
+  }();
 
   // Set an error handler, so that any LLVM backend diagnostics go through our
   // error handler.
@@ -679,8 +700,7 @@ int cc1as_main(ArrayRef<const char *> Argv, const char *Argv0, void *MainAddr) {
     getDriverOptTable().printHelp(
         llvm::outs(), "clang -cc1as [options] file...",
         "Clang Integrated Assembler", /*ShowHidden=*/false,
-        /*ShowAllAliases=*/false,
-        llvm::opt::Visibility(driver::options::CC1AsOption));
+        /*ShowAllAliases=*/false, llvm::opt::Visibility(options::CC1AsOption));
 
     return 0;
   }
@@ -703,11 +723,12 @@ int cc1as_main(ArrayRef<const char *> Argv, const char *Argv0, void *MainAddr) {
     for (unsigned i = 0; i != NumArgs; ++i)
       Args[i + 1] = Asm.LLVMArgs[i].c_str();
     Args[NumArgs + 1] = nullptr;
-    llvm::cl::ParseCommandLineOptions(NumArgs + 1, Args.get());
+    llvm::cl::ParseCommandLineOptions(NumArgs + 1, Args.get(), /*Overview=*/"",
+                                      /*Errs=*/nullptr, /*VFS=*/VFS.get());
   }
 
   // Execute the invocation, unless there were parsing errors.
-  bool Failed = Diags.hasErrorOccurred() || ExecuteAssembler(Asm, Diags);
+  bool Failed = Diags.hasErrorOccurred() || ExecuteAssembler(Asm, Diags, VFS);
 
   // If any timers were active but haven't been destroyed yet, print their
   // results now.
diff --git a/src/zig_clang_driver.cpp b/src/zig_clang_driver.cpp
index 7f4c6034be..75d945d6de 100644
--- a/src/zig_clang_driver.cpp
+++ b/src/zig_clang_driver.cpp
@@ -18,13 +18,13 @@
 #include "clang/Config/config.h"
 #include "clang/Driver/Compilation.h"
 #include "clang/Driver/DriverDiagnostic.h"
-#include "clang/Driver/Options.h"
 #include "clang/Driver/ToolChain.h"
 #include "clang/Frontend/ChainedDiagnosticConsumer.h"
 #include "clang/Frontend/CompilerInvocation.h"
 #include "clang/Frontend/SerializedDiagnosticPrinter.h"
 #include "clang/Frontend/TextDiagnosticPrinter.h"
 #include "clang/Frontend/Utils.h"
+#include "clang/Options/Options.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
@@ -38,6 +38,7 @@
 #include "llvm/Support/CrashRecoveryContext.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FileSystem.h"
+#include "llvm/Support/IOSandbox.h"
 #include "llvm/Support/LLVMDriver.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/PrettyStackTrace.h"
@@ -201,7 +202,8 @@ static void FixupDiagPrefixExeName(TextDiagnosticPrinter *DiagClient,
 }
 
 static int ExecuteCC1Tool(SmallVectorImpl<const char *> &ArgV,
-                          const llvm::ToolContext &ToolContext) {
+                          const llvm::ToolContext &ToolContext,
+                          IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS) {
   // If we call the cc1 tool from the clangDriver library (through
   // Driver::CC1Main), we need to clean up the options usage count. The options
   // are currently global, and they might have been used previously by the
@@ -209,7 +211,8 @@ static int ExecuteCC1Tool(SmallVectorImpl<const char *> &ArgV,
   llvm::cl::ResetAllOptionOccurrences();
 
   llvm::BumpPtrAllocator A;
-  llvm::cl::ExpansionContext ECtx(A, llvm::cl::TokenizeGNUCommandLine);
+  llvm::cl::ExpansionContext ECtx(A, llvm::cl::TokenizeGNUCommandLine,
+                                  VFS.get());
   if (llvm::Error Err = ECtx.expandResponseFiles(ArgV)) {
     llvm::errs() << toString(std::move(Err)) << '\n';
     return 1;
@@ -249,14 +252,22 @@ static int clang_main(int Argc, char **Argv, const llvm::ToolContext &ToolContex
   bool ClangCLMode =
       IsClangCL(getDriverMode(ProgName, llvm::ArrayRef(Args).slice(1)));
 
-  if (llvm::Error Err = expandResponseFiles(Args, ClangCLMode, A)) {
+  auto VFS = llvm::vfs::getRealFileSystem();
+
+  if (llvm::Error Err = expandResponseFiles(Args, ClangCLMode, A, VFS.get())) {
     llvm::errs() << toString(std::move(Err)) << '\n';
     return 1;
   }
 
   // Handle -cc1 integrated tools.
-  if (Args.size() >= 2 && StringRef(Args[1]).starts_with("-cc1"))
-    return ExecuteCC1Tool(Args, ToolContext);
+  if (Args.size() >= 2 && StringRef(Args[1]).starts_with("-cc1")) {
+    // Note that this only enables the sandbox for direct -cc1 invocations and
+    // out-of-process -cc1 invocations launched by the driver. For in-process
+    // -cc1 invocations launched by the driver, the sandbox is enabled in
+    // CC1Command::Execute() for better crash recovery.
+    auto EnableSandbox = llvm::sys::sandbox::scopedEnable();
+    return ExecuteCC1Tool(Args, ToolContext, VFS);
+  }
 
   // Handle options that need handling before the real command line parsing in
   // Driver::BuildCompilation()
@@ -326,9 +337,7 @@ static int clang_main(int Argc, char **Argv, const llvm::ToolContext &ToolContex
       new TextDiagnosticPrinter(llvm::errs(), *DiagOpts);
   FixupDiagPrefixExeName(DiagClient, ProgName);
 
-  IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
-
-  DiagnosticsEngine Diags(DiagID, *DiagOpts, DiagClient);
+  DiagnosticsEngine Diags(DiagnosticIDs::create(), *DiagOpts, DiagClient);
 
   if (!DiagOpts->DiagnosticSerializationFile.empty()) {
     auto SerializedConsumer =
@@ -338,7 +347,6 @@ static int clang_main(int Argc, char **Argv, const llvm::ToolContext &ToolContex
         Diags.takeClient(), std::move(SerializedConsumer)));
   }
 
-  auto VFS = llvm::vfs::getRealFileSystem();
   ProcessWarningOptions(Diags, *DiagOpts, *VFS, /*ReportDiags=*/false);
 
   Driver TheDriver(Path, llvm::sys::getDefaultTargetTriple(), Diags,
@@ -358,10 +366,10 @@ static int clang_main(int Argc, char **Argv, const llvm::ToolContext &ToolContex
   if (!SetBackdoorDriverOutputsFromEnvVars(TheDriver))
     return 1;
 
-  auto ExecuteCC1WithContext =
-      [&ToolContext](SmallVectorImpl<const char *> &ArgV) {
-        return ExecuteCC1Tool(ArgV, ToolContext);
-      };
+  auto ExecuteCC1WithContext = [&ToolContext,
+                                &VFS](SmallVectorImpl<const char *> &ArgV) {
+    return ExecuteCC1Tool(ArgV, ToolContext, VFS);
+  };
   if (!UseNewCC1Process) {
     TheDriver.CC1Main = ExecuteCC1WithContext;
     // Ensure the CC1Command actually catches cc1 crashes
diff --git a/src/zig_llvm-ar.cpp b/src/zig_llvm-ar.cpp
index 50478e8dda..fd00a7be49 100644
--- a/src/zig_llvm-ar.cpp
+++ b/src/zig_llvm-ar.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/BinaryFormat/Magic.h"
 #include "llvm/IR/LLVMContext.h"
diff --git a/src/zig_llvm.cpp b/src/zig_llvm.cpp
index 90e462860f..9bba8e96d5 100644
--- a/src/zig_llvm.cpp
+++ b/src/zig_llvm.cpp
@@ -439,7 +439,7 @@ ZIG_EXTERN_C bool ZigLLVMTargetMachineEmitToFile(LLVMTargetMachineRef targ_machi
 
 void ZigLLVMSetOptBisectLimit(LLVMContextRef context_ref, int limit) {
     static OptBisect opt_bisect;
-    opt_bisect.setLimit(limit);
+    opt_bisect.setIntervals({0, limit});
     unwrap(context_ref)->setOptPassGate(opt_bisect);
 }
 
diff --git a/stage1/zig.h b/stage1/zig.h
index 0b9c6e58ca..67158429fd 100644
--- a/stage1/zig.h
+++ b/stage1/zig.h
@@ -79,6 +79,9 @@
 #elif defined(__I86__)
 #define zig_x86_16
 #define zig_x86
+#elif defined (__ez80)
+#define zig_ez80
+#define zig_z80
 #endif
 
 #if defined(zig_msvc) || __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
@@ -409,6 +412,8 @@
 #define zig_trap() __asm__ volatile("int $0x3")
 #elif defined(zig_x86)
 #define zig_trap() __asm__ volatile("ud2")
+#elif defined(zig_z80)
+#define zig_trap() __asm__ volatile("rst 00h")
 #else
 #define zig_trap() zig_trap_unavailable
 #endif
@@ -511,7 +516,7 @@ zig_extern void *memcpy (void *zig_restrict, void const *zig_restrict, size_t);
 zig_extern void *memset (void *, int, size_t);
 zig_extern void *memmove (void *, void const *, size_t);
 
-/* ================ Bool and 8/16/32/64-bit Integer Support ================= */
+/* ================ Bool and 8/16/24/32/48/64-bit Integer Support ================= */
 
 #include <limits.h>
 
@@ -590,6 +595,16 @@ typedef   signed long long  int16_t;
 #define  INT16_MAX ( INT16_C(0x7FFF))
 #define UINT16_MAX ( INT16_C(0xFFFF))
 
+#if defined(zig_ez80)
+typedef unsigned       int uint24_t;
+typedef   signed       int  int24_t;
+#define  INT24_C(c) c
+#define UINT24_C(c) c##U
+#endif
+#define  INT24_MIN (~INT24_C(0x7FFF))
+#define  INT24_MAX ( INT24_C(0x7FFF))
+#define UINT24_MAX ( INT24_C(0xFFFF))
+
 #if SCHAR_MIN == ~0x7FFFFFFF && SCHAR_MAX == 0x7FFFFFFF && UCHAR_MAX == 0xFFFFFFFF
 typedef unsigned      char uint32_t;
 typedef   signed      char  int32_t;
@@ -620,6 +635,17 @@ typedef   signed long long  int32_t;
 #define  INT32_MAX ( INT32_C(0x7FFFFFFF))
 #define UINT32_MAX ( INT32_C(0xFFFFFFFF))
 
+#if defined(zig_ez80)
+typedef unsigned   __int48 uint48_t;
+typedef   signed   __int48  int48_t;
+#define  INT48_C(c) c
+/* no suffix */
+#define UINT48_C(c) ((uint48_t)(c))
+#endif
+#define  INT48_MIN (~INT48_C(0x7FFFFFFFFFFF))
+#define  INT48_MAX ( INT48_C(0x7FFFFFFFFFFF))
+#define UINT48_MAX ( INT48_C(0xFFFFFFFFFFFF))
+
 #if SCHAR_MIN == ~0x7FFFFFFFFFFFFFFF && SCHAR_MAX == 0x7FFFFFFFFFFFFFFF && UCHAR_MAX == 0xFFFFFFFFFFFFFFFF
 typedef unsigned      char uint64_t;
 typedef   signed      char  int64_t;
@@ -663,10 +689,18 @@ typedef ptrdiff_t intptr_t;
 #define zig_maxInt_i16  INT16_MAX
 #define zig_minInt_u16 UINT16_C(0)
 #define zig_maxInt_u16 UINT16_MAX
+#define zig_minInt_i24  INT24_MIN
+#define zig_maxInt_i24  INT24_MAX
+#define zig_minInt_u24 UINT24_C(0)
+#define zig_maxInt_u24 UINT24_MAX
 #define zig_minInt_i32  INT32_MIN
 #define zig_maxInt_i32  INT32_MAX
 #define zig_minInt_u32 UINT32_C(0)
 #define zig_maxInt_u32 UINT32_MAX
+#define zig_minInt_i48  INT48_MIN
+#define zig_maxInt_i48  INT48_MAX
+#define zig_minInt_u48 UINT48_C(0)
+#define zig_maxInt_u48 UINT48_MAX
 #define zig_minInt_i64  INT64_MIN
 #define zig_maxInt_i64  INT64_MAX
 #define zig_minInt_u64 UINT64_C(0)
@@ -786,6 +820,17 @@ zig_int_helpers(16, unsigned long long)
 #else
 zig_int_helpers(16, uint16_t)
 #endif
+#if defined(zig_ez80)
+#if UINT24_MAX <= UINT_MAX
+zig_int_helpers(24, unsigned int)
+#elif UINT24_MAX <= ULONG_MAX
+zig_int_helpers(24, unsigned long)
+#elif UINT24_MAX <= ULLONG_MAX
+zig_int_helpers(24, unsigned long long)
+#else
+zig_int_helpers(24, uint24_t)
+#endif
+#endif
 #if UINT32_MAX <= UINT_MAX
 zig_int_helpers(32, unsigned int)
 #elif UINT32_MAX <= ULONG_MAX
@@ -795,6 +840,17 @@ zig_int_helpers(32, unsigned long long)
 #else
 zig_int_helpers(32, uint32_t)
 #endif
+#if defined(zig_ez80)
+#if UINT24_MAX <= UINT_MAX
+zig_int_helpers(48, unsigned int)
+#elif UINT24_MAX <= ULONG_MAX
+zig_int_helpers(48, unsigned long)
+#elif UINT24_MAX <= ULLONG_MAX
+zig_int_helpers(48, unsigned long long)
+#else
+zig_int_helpers(48, uint48_t)
+#endif
+#endif
 #if UINT64_MAX <= UINT_MAX
 zig_int_helpers(64, unsigned int)
 #elif UINT64_MAX <= ULONG_MAX
@@ -909,6 +965,66 @@ static inline bool zig_addo_i16(int16_t *res, int16_t lhs, int16_t rhs, uint8_t
 #endif
 }
 
+#if defined(zig_ez80)
+static inline bool zig_addo_u24(uint24_t *res, uint24_t lhs, uint24_t rhs, uint8_t bits) {
+#if zig_has_builtin(add_overflow) || defined(zig_gcc)
+    uint24_t full_res;
+    bool overflow = __builtin_add_overflow(lhs, rhs, &full_res);
+    *res = zig_wrap_u24(full_res, bits);
+    return overflow || full_res < zig_minInt_u(24, bits) || full_res > zig_maxInt_u(24, bits);
+#else
+    uint32_t full_res;
+    bool overflow = zig_addo_u32(&full_res, lhs, rhs, bits);
+    *res = (uint24_t)full_res;
+    return overflow;
+#endif
+}
+
+static inline bool zig_addo_i24(int24_t *res, int24_t lhs, int24_t rhs, uint8_t bits) {
+#if zig_has_builtin(add_overflow) || defined(zig_gcc)
+    int24_t full_res;
+    bool overflow = __builtin_add_overflow(lhs, rhs, &full_res);
+    *res = zig_wrap_i24(full_res, bits);
+    return overflow || full_res < zig_minInt_i(24, bits) || full_res > zig_maxInt_i(24, bits);
+#else
+    int32_t full_res;
+    bool overflow = zig_addo_i32(&full_res, lhs, rhs, bits);
+    *res = (int24_t)full_res;
+    return overflow;
+#endif
+}
+#endif
+
+#if defined(zig_ez80)
+static inline bool zig_addo_u48(uint48_t *res, uint48_t lhs, uint48_t rhs, uint8_t bits) {
+#if zig_has_builtin(add_overflow) || defined(zig_gcc)
+    uint48_t full_res;
+    bool overflow = __builtin_add_overflow(lhs, rhs, &full_res);
+    *res = zig_wrap_u48(full_res, bits);
+    return overflow || full_res < zig_minInt_u(48, bits) || full_res > zig_maxInt_u(48, bits);
+#else
+    uint64_t full_res;
+    bool overflow = zig_addo_u64(&full_res, lhs, rhs, bits);
+    *res = (uint48_t)full_res;
+    return overflow;
+#endif
+}
+
+static inline bool zig_addo_i48(int48_t *res, int48_t lhs, int48_t rhs, uint8_t bits) {
+#if zig_has_builtin(add_overflow) || defined(zig_gcc)
+    int48_t full_res;
+    bool overflow = __builtin_add_overflow(lhs, rhs, &full_res);
+    *res = zig_wrap_i48(full_res, bits);
+    return overflow || full_res < zig_minInt_i(48, bits) || full_res > zig_maxInt_i(48, bits);
+#else
+    int64_t full_res;
+    bool overflow = zig_addo_i64(&full_res, lhs, rhs, bits);
+    *res = (int48_t)full_res;
+    return overflow;
+#endif
+}
+#endif
+
 static inline bool zig_subo_u32(uint32_t *res, uint32_t lhs, uint32_t rhs, uint8_t bits) {
 #if zig_has_builtin(sub_overflow) || defined(zig_gcc)
     uint32_t full_res;
@@ -933,6 +1049,7 @@ static inline bool zig_subo_i32(int32_t *res, int32_t lhs, int32_t rhs, uint8_t
     return overflow || full_res < zig_minInt_i(32, bits) || full_res > zig_maxInt_i(32, bits);
 }
 
+
 static inline bool zig_subo_u64(uint64_t *res, uint64_t lhs, uint64_t rhs, uint8_t bits) {
 #if zig_has_builtin(sub_overflow) || defined(zig_gcc)
     uint64_t full_res;
@@ -1013,6 +1130,66 @@ static inline bool zig_subo_i16(int16_t *res, int16_t lhs, int16_t rhs, uint8_t
 #endif
 }
 
+#if defined(zig_ez80)
+static inline bool zig_subo_u24(uint24_t *res, uint24_t lhs, uint24_t rhs, uint8_t bits) {
+#if zig_has_builtin(sub_overflow) || defined(zig_gcc)
+    uint24_t full_res;
+    bool overflow = __builtin_sub_overflow(lhs, rhs, &full_res);
+    *res = zig_wrap_u24(full_res, bits);
+    return overflow || full_res < zig_minInt_u(24, bits) || full_res > zig_maxInt_u(24, bits);
+#else
+    uint32_t full_res;
+    bool overflow = zig_subo_u32(&full_res, lhs, rhs, bits);
+    *res = (uint24_t)full_res;
+    return overflow;
+#endif
+}
+
+static inline bool zig_subo_i24(int24_t *res, int24_t lhs, int24_t rhs, uint8_t bits) {
+#if zig_has_builtin(sub_overflow) || defined(zig_gcc)
+    int24_t full_res;
+    bool overflow = __builtin_sub_overflow(lhs, rhs, &full_res);
+    *res = zig_wrap_i24(full_res, bits);
+    return overflow || full_res < zig_minInt_i(24, bits) || full_res > zig_maxInt_i(24, bits);
+#else
+    int32_t full_res;
+    bool overflow = zig_subo_i32(&full_res, lhs, rhs, bits);
+    *res = (int24_t)full_res;
+    return overflow;
+#endif
+}
+#endif
+
+#if defined(zig_ez80)
+static inline bool zig_subo_u48(uint48_t *res, uint48_t lhs, uint48_t rhs, uint8_t bits) {
+#if zig_has_builtin(sub_overflow) || defined(zig_gcc)
+    uint48_t full_res;
+    bool overflow = __builtin_sub_overflow(lhs, rhs, &full_res);
+    *res = zig_wrap_u48(full_res, bits);
+    return overflow || full_res < zig_minInt_u(48, bits) || full_res > zig_maxInt_u(48, bits);
+#else
+    uint64_t full_res;
+    bool overflow = zig_subo_u64(&full_res, lhs, rhs, bits);
+    *res = (uint48_t)full_res;
+    return overflow;
+#endif
+}
+
+static inline bool zig_subo_i48(int48_t *res, int48_t lhs, int48_t rhs, uint8_t bits) {
+#if zig_has_builtin(sub_overflow) || defined(zig_gcc)
+    int48_t full_res;
+    bool overflow = __builtin_sub_overflow(lhs, rhs, &full_res);
+    *res = zig_wrap_i48(full_res, bits);
+    return overflow || full_res < zig_minInt_i(48, bits) || full_res > zig_maxInt_i(48, bits);
+#else
+    int64_t full_res;
+    bool overflow = zig_subo_i64(&full_res, lhs, rhs, bits);
+    *res = (int48_t)full_res;
+    return overflow;
+#endif
+}
+#endif
+
 static inline bool zig_mulo_u32(uint32_t *res, uint32_t lhs, uint32_t rhs, uint8_t bits) {
 #if zig_has_builtin(mul_overflow) || defined(zig_gcc)
     uint32_t full_res;
@@ -1121,6 +1298,66 @@ static inline bool zig_mulo_i16(int16_t *res, int16_t lhs, int16_t rhs, uint8_t
 #endif
 }
 
+#if defined(zig_ez80)
+static inline bool zig_mulo_u24(uint24_t *res, uint24_t lhs, uint24_t rhs, uint8_t bits) {
+#if zig_has_builtin(mul_overflow) || defined(zig_gcc)
+    uint24_t full_res;
+    bool overflow = __builtin_mul_overflow(lhs, rhs, &full_res);
+    *res = zig_wrap_u24(full_res, bits);
+    return overflow || full_res < zig_minInt_u(24, bits) || full_res > zig_maxInt_u(24, bits);
+#else
+    uint32_t full_res;
+    bool overflow = zig_mulo_u32(&full_res, lhs, rhs, bits);
+    *res = (uint24_t)full_res;
+    return overflow;
+#endif
+}
+
+static inline bool zig_mulo_i24(int24_t *res, int24_t lhs, int24_t rhs, uint8_t bits) {
+#if zig_has_builtin(mul_overflow) || defined(zig_gcc)
+    int24_t full_res;
+    bool overflow = __builtin_mul_overflow(lhs, rhs, &full_res);
+    *res = zig_wrap_i24(full_res, bits);
+    return overflow || full_res < zig_minInt_i(24, bits) || full_res > zig_maxInt_i(24, bits);
+#else
+    int32_t full_res;
+    bool overflow = zig_mulo_i32(&full_res, lhs, rhs, bits);
+    *res = (int24_t)full_res;
+    return overflow;
+#endif
+}
+#endif
+
+#if defined(zig_ez80)
+static inline bool zig_mulo_u48(uint48_t *res, uint48_t lhs, uint48_t rhs, uint8_t bits) {
+#if zig_has_builtin(mul_overflow) || defined(zig_gcc)
+    uint48_t full_res;
+    bool overflow = __builtin_mul_overflow(lhs, rhs, &full_res);
+    *res = zig_wrap_u48(full_res, bits);
+    return overflow || full_res < zig_minInt_u(48, bits) || full_res > zig_maxInt_u(48, bits);
+#else
+    uint64_t full_res;
+    bool overflow = zig_mulo_u64(&full_res, lhs, rhs, bits);
+    *res = (uint48_t)full_res;
+    return overflow;
+#endif
+}
+
+static inline bool zig_mulo_i48(int48_t *res, int48_t lhs, int48_t rhs, uint8_t bits) {
+#if zig_has_builtin(mul_overflow) || defined(zig_gcc)
+    int48_t full_res;
+    bool overflow = __builtin_mul_overflow(lhs, rhs, &full_res);
+    *res = zig_wrap_i48(full_res, bits);
+    return overflow || full_res < zig_minInt_i(48, bits) || full_res > zig_maxInt_i(48, bits);
+#else
+    int64_t full_res;
+    bool overflow = zig_mulo_i64(&full_res, lhs, rhs, bits);
+    *res = (int48_t)full_res;
+    return overflow;
+#endif
+}
+#endif
+
 #define zig_int_builtins(w) \
     static inline bool zig_shlo_u##w(uint##w##_t *res, uint##w##_t lhs, uint8_t rhs, uint8_t bits) { \
         *res = zig_shlw_u##w(lhs, rhs, bits); \
@@ -1180,7 +1417,13 @@ static inline bool zig_mulo_i16(int16_t *res, int16_t lhs, int16_t rhs, uint8_t
     }
 zig_int_builtins(8)
 zig_int_builtins(16)
+#if defined(zig_ez80)
+zig_int_builtins(24)
+#endif
 zig_int_builtins(32)
+#if defined(zig_ez80)
+zig_int_builtins(48)
+#endif
 zig_int_builtins(64)
 
 #define zig_builtin8(name, val) __builtin_##name(val)
@@ -1189,6 +1432,11 @@ typedef unsigned int zig_Builtin8;
 #define zig_builtin16(name, val) __builtin_##name(val)
 typedef unsigned int zig_Builtin16;
 
+#if defined(zig_ez80)
+#define zig_builtin24(name, val) __builtin_##name(val)
+typedef unsigned int zig_Builtin24;
+#endif
+
 #if INT_MIN <= INT32_MIN
 #define zig_builtin32(name, val) __builtin_##name(val)
 typedef unsigned int zig_Builtin32;
@@ -1197,6 +1445,11 @@ typedef unsigned int zig_Builtin32;
 typedef unsigned long zig_Builtin32;
 #endif
 
+#if defined(zig_ez80)
+#define zig_builtin48(name, val) __builtin_##name(val)
+typedef unsigned long long zig_Builtin48;
+#endif
+
 #if INT_MIN <= INT64_MIN
 #define zig_builtin64(name, val) __builtin_##name(val)
 typedef unsigned int zig_Builtin64;
@@ -1231,6 +1484,23 @@ static inline int16_t zig_byte_swap_i16(int16_t val, uint8_t bits) {
     return zig_wrap_i16((int16_t)zig_byte_swap_u16((uint16_t)val, bits), bits);
 }
 
+#if defined(zig_ez80)
+static inline uint16_t zig_byte_swap_u24(uint24_t val, uint8_t bits) {
+    uint24_t full_res;
+#if zig_has_builtin(bswap24) || defined(zig_gcc)
+    full_res = __builtin_bswap24(val);
+#else
+    full_res = (uint24_t)zig_byte_swap_u8((uint8_t)(val >>  0), 8) <<  16 |
+               (uint24_t)zig_byte_swap_u16((uint16_t)(val >>  8), 16) >>  0;
+#endif
+    return zig_wrap_u24(full_res >> (24 - bits), bits);
+}
+
+static inline int16_t zig_byte_swap_i24(int24_t val, uint8_t bits) {
+    return zig_wrap_i24((int24_t)zig_byte_swap_u24((uint24_t)val, bits), bits);
+}
+#endif
+
 static inline uint32_t zig_byte_swap_u32(uint32_t val, uint8_t bits) {
     uint32_t full_res;
 #if zig_has_builtin(bswap32) || defined(zig_gcc)
@@ -1246,6 +1516,23 @@ static inline int32_t zig_byte_swap_i32(int32_t val, uint8_t bits) {
     return zig_wrap_i32((int32_t)zig_byte_swap_u32((uint32_t)val, bits), bits);
 }
 
+#if defined(zig_ez80)
+static inline uint32_t zig_byte_swap_u48(uint48_t val, uint8_t bits) {
+    uint48_t full_res;
+#if zig_has_builtin(bswap48) || defined(zig_gcc)
+    full_res = __builtin_bswap48(val);
+#else
+    full_res = (uint48_t)zig_byte_swap_u24((uint24_t)(val >>  0), 24) << 24 |
+               (uint48_t)zig_byte_swap_u24((uint24_t)(val >> 24), 24) >>  0;
+#endif
+    return zig_wrap_u48(full_res >> (48 - bits), bits);
+}
+
+static inline int32_t zig_byte_swap_i48(int48_t val, uint8_t bits) {
+    return zig_wrap_i48((int48_t)zig_byte_swap_u48((uint48_t)val, bits), bits);
+}
+#endif
+
 static inline uint64_t zig_byte_swap_u64(uint64_t val, uint8_t bits) {
     uint64_t full_res;
 #if zig_has_builtin(bswap64) || defined(zig_gcc)
@@ -1294,6 +1581,23 @@ static inline int16_t zig_bit_reverse_i16(int16_t val, uint8_t bits) {
     return zig_wrap_i16((int16_t)zig_bit_reverse_u16((uint16_t)val, bits), bits);
 }
 
+#if defined(zig_ez80)
+static inline uint24_t zig_bit_reverse_u24(uint24_t val, uint8_t bits) {
+    uint24_t full_res;
+#if zig_has_builtin(bitreverse24)
+    full_res = __builtin_bitreverse24(val);
+#else
+    full_res = (uint24_t)zig_bit_reverse_u8((uint8_t)(val >>  0), 8) <<  16 |
+               (uint24_t)zig_bit_reverse_u16((uint16_t)(val >>  8), 16) >>  0;
+#endif
+    return zig_wrap_u24(full_res >> (24 - bits), bits);
+}
+
+static inline int24_t zig_bit_reverse_i24(int24_t val, uint8_t bits) {
+    return zig_wrap_i24((int24_t)zig_bit_reverse_u24((uint24_t)val, bits), bits);
+}
+#endif
+
 static inline uint32_t zig_bit_reverse_u32(uint32_t val, uint8_t bits) {
     uint32_t full_res;
 #if zig_has_builtin(bitreverse32)
@@ -1309,6 +1613,23 @@ static inline int32_t zig_bit_reverse_i32(int32_t val, uint8_t bits) {
     return zig_wrap_i32((int32_t)zig_bit_reverse_u32((uint32_t)val, bits), bits);
 }
 
+#if defined(zig_ez80)
+static inline uint32_t zig_bit_reverse_u48(uint48_t val, uint8_t bits) {
+    uint48_t full_res;
+#if zig_has_builtin(bitreverse48)
+    full_res = __builtin_bitreverse48(val);
+#else
+    full_res = (uint48_t)zig_bit_reverse_u24((uint24_t)(val >>  0), 24) << 24 |
+               (uint48_t)zig_bit_reverse_u24((uint24_t)(val >> 24), 24) >>  0;
+#endif
+    return zig_wrap_u32(full_res >> (48 - bits), bits);
+}
+
+static inline int32_t zig_bit_reverse_i48(int48_t val, uint8_t bits) {
+    return zig_wrap_i48((int48_t)zig_bit_reverse_u48((uint48_t)val, bits), bits);
+}
+#endif
+
 static inline uint64_t zig_bit_reverse_u64(uint64_t val, uint8_t bits) {
     uint64_t full_res;
 #if zig_has_builtin(bitreverse64)
@@ -1350,7 +1671,13 @@ static inline int64_t zig_bit_reverse_i64(int64_t val, uint8_t bits) {
 #endif
 zig_builtin_popcount(8)
 zig_builtin_popcount(16)
+#if defined(zig_ez80)
+zig_builtin_popcount(24)
+#endif
 zig_builtin_popcount(32)
+#if defined(zig_ez80)
+zig_builtin_popcount(48)
+#endif
 zig_builtin_popcount(64)
 
 #define zig_builtin_ctz_common(w) \
@@ -1375,7 +1702,13 @@ zig_builtin_popcount(64)
 #endif
 zig_builtin_ctz(8)
 zig_builtin_ctz(16)
+#if defined(zig_ez80)
+zig_builtin_ctz(24)
+#endif
 zig_builtin_ctz(32)
+#if defined(zig_ez80)
+zig_builtin_ctz(48)
+#endif
 zig_builtin_ctz(64)
 
 #define zig_builtin_clz_common(w) \
@@ -1400,7 +1733,13 @@ zig_builtin_ctz(64)
 #endif
 zig_builtin_clz(8)
 zig_builtin_clz(16)
+#if defined(zig_ez80)
+zig_builtin_clz(24)
+#endif
 zig_builtin_clz(32)
+#if defined(zig_ez80)
+zig_builtin_clz(48)
+#endif
 zig_builtin_clz(64)
 
 /* ======================== 128-bit Integer Support ========================= */
@@ -1981,6 +2320,20 @@ static inline zig_i128 zig_bit_reverse_i128(zig_i128 val, uint8_t bits) {
     return zig_bitCast_i128(zig_bit_reverse_u128(zig_bitCast_u128(val), bits));
 }
 
+#if zig_has_int128
+#define zig_switch_int128(operand) switch (operand)
+#define zig_switch_prong_begin_int128()
+#define zig_switch_case_int128(Type, operand, value) case value:
+#define zig_switch_prong_end_int128()
+#define zig_switch_default_int128() default:
+#else // zig_has_int128
+#define zig_switch_int128(operand)
+#define zig_switch_prong_begin_int128() if (0
+#define zig_switch_case_int128(Type, operand, value) || (zig_cmp_##Type(operand, value) == 0)
+#define zig_switch_prong_end_int128() )
+#define zig_switch_default_int128()
+#endif // zig_has_int128
+
 /* ========================== Big Integer Support =========================== */
 
 static inline uint16_t zig_int_bytes(uint16_t bits) {
diff --git a/stage1/zig1.wasm b/stage1/zig1.wasm
index 11942012c7..67ca7db75a 100644
Binary files a/stage1/zig1.wasm and b/stage1/zig1.wasm differ
diff --git a/test/behavior/align.zig b/test/behavior/align.zig
index 3c90c35d54..0bc7195d39 100644
--- a/test/behavior/align.zig
+++ b/test/behavior/align.zig
@@ -317,7 +317,6 @@ test "@alignCast functions" {
 
     // function alignment is a compile error on wasm
     if (native_arch.isWasm()) return error.SkipZigTest;
-    if (native_arch.isThumb()) return error.SkipZigTest;
 
     try expect(fnExpectsOnly1(simple4) == 0x19);
 }
diff --git a/test/behavior/asm.zig b/test/behavior/asm.zig
index 4e138574c6..8d7f3bcbca 100644
--- a/test/behavior/asm.zig
+++ b/test/behavior/asm.zig
@@ -67,7 +67,6 @@ test "alternative constraints" {
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
-    if (builtin.zig_backend == .stage2_llvm and builtin.cpu.arch.isLoongArch()) return error.SkipZigTest; // https://github.com/llvm/llvm-project/issues/159200
 
     if (builtin.zig_backend == .stage2_c and builtin.os.tag == .windows) return error.SkipZigTest; // MSVC doesn't support inline assembly
 
@@ -247,12 +246,9 @@ test "extern output types (x86_64)" {
     }
 }
 
-test "riscv abi register aliases as clobbers" {
+test "abi register aliases as clobbers (RISC-V)" {
     if (!builtin.target.cpu.arch.isRISCV()) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
-    if (builtin.zig_backend == .stage2_c and builtin.os.tag == .windows) return error.SkipZigTest;
-
-    if (!comptime builtin.cpu.arch.isRISCV()) return error.SkipZigTest;
 
     // Verify that ABI alias names are accepted as clobbers for RISC-V.
     asm volatile ("" ::: .{ .ra = true, .sp = true, .gp = true, .tp = true });
diff --git a/test/behavior/basic.zig b/test/behavior/basic.zig
index 32df2f5199..60bd0d6933 100644
--- a/test/behavior/basic.zig
+++ b/test/behavior/basic.zig
@@ -1407,11 +1407,7 @@ test "allocation and looping over 3-byte integer" {
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
-
-    if (builtin.zig_backend == .stage2_llvm and builtin.os.tag.isDarwin()) {
-        return error.SkipZigTest; // TODO
-    }
-    if (builtin.cpu.arch == .s390x and builtin.zig_backend == .stage2_llvm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_llvm and builtin.os.tag.isDarwin()) return error.SkipZigTest; // TODO
 
     try expect(@sizeOf(u24) == 4);
     try expect(@sizeOf([1]u24) == 4);
diff --git a/test/behavior/field_parent_ptr.zig b/test/behavior/field_parent_ptr.zig
index 2749115fc0..edaccda098 100644
--- a/test/behavior/field_parent_ptr.zig
+++ b/test/behavior/field_parent_ptr.zig
@@ -1757,7 +1757,6 @@ test "@fieldParentPtr packed union" {
     if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
-    if (builtin.target.cpu.arch.endian() == .big) return error.SkipZigTest; // TODO
 
     const C = packed union {
         a: packed struct(u32) {
diff --git a/test/behavior/floatop.zig b/test/behavior/floatop.zig
index bd23a6da14..38097b69d8 100644
--- a/test/behavior/floatop.zig
+++ b/test/behavior/floatop.zig
@@ -136,7 +136,6 @@ test "cmp f32" {
 test "cmp f64" {
     if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-    if (builtin.cpu.arch.isArm() and builtin.target.abi.float() == .soft) return error.SkipZigTest; // https://github.com/ziglang/zig/issues/21234
 
     try testCmp(f64);
     try comptime testCmp(f64);
@@ -155,7 +154,6 @@ test "cmp f128" {
 test "cmp f80/c_longdouble" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_c and builtin.cpu.arch.isArm()) return error.SkipZigTest;
-    if (builtin.zig_backend == .stage2_llvm) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
@@ -223,8 +221,7 @@ test "vector cmp f16" {
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
-    if (builtin.cpu.arch.isArm()) return error.SkipZigTest;
-    if (builtin.cpu.arch.isPowerPC64()) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_llvm and builtin.cpu.arch.isArm()) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_llvm and builtin.cpu.arch == .hexagon) return error.SkipZigTest;
 
     try testCmpVector(f16);
@@ -236,8 +233,8 @@ test "vector cmp f32" {
     if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
-    if (builtin.cpu.arch.isArm()) return error.SkipZigTest;
-    if (builtin.cpu.arch.isPowerPC64()) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_llvm and builtin.cpu.arch.isArm()) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_llvm and builtin.cpu.arch.isPowerPC64()) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_llvm and builtin.cpu.arch == .hexagon) return error.SkipZigTest;
 
     try testCmpVector(f32);
@@ -248,8 +245,6 @@ test "vector cmp f64" {
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
-    if (builtin.cpu.arch.isArm()) return error.SkipZigTest;
-    if (builtin.cpu.arch.isPowerPC64()) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_llvm and builtin.cpu.arch == .hexagon) return error.SkipZigTest;
 
     try testCmpVector(f64);
@@ -262,8 +257,7 @@ test "vector cmp f128" {
     if (builtin.zig_backend == .stage2_c and builtin.cpu.arch.isArm()) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
-    if (builtin.cpu.arch.isArm()) return error.SkipZigTest;
-    if (builtin.cpu.arch.isPowerPC64()) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_llvm and builtin.cpu.arch == .powerpc64le) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_llvm and builtin.cpu.arch == .hexagon) return error.SkipZigTest;
 
     try testCmpVector(f128);
diff --git a/test/behavior/generics.zig b/test/behavior/generics.zig
index 89ff5764a8..b62199fea9 100644
--- a/test/behavior/generics.zig
+++ b/test/behavior/generics.zig
@@ -305,8 +305,6 @@ test "generic function instantiation non-duplicates" {
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest;
 
-    if (builtin.os.tag == .wasi) return error.SkipZigTest;
-
     const S = struct {
         fn copy(comptime T: type, dest: []T, source: []const T) void {
             @export(&foo, .{ .name = "test_generic_instantiation_non_dupe" });
@@ -323,8 +321,6 @@ test "generic function instantiation non-duplicates" {
 test "generic instantiation of tagged union with only one field" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
 
-    if (builtin.os.tag == .wasi) return error.SkipZigTest;
-
     const S = struct {
         const U = union(enum) {
             s: []const u8,
diff --git a/test/behavior/math.zig b/test/behavior/math.zig
index 5bea5142fd..f2143d1bb4 100644
--- a/test/behavior/math.zig
+++ b/test/behavior/math.zig
@@ -110,8 +110,6 @@ test "@clz vectors" {
 }
 
 fn testClzVectors() !void {
-    if (comptime builtin.cpu.has(.loongarch, .lsx)) return error.SkipZigTest; // https://github.com/llvm/llvm-project/issues/159529
-
     const Vu4 = @Vector(64, u4);
     const Vu8 = @Vector(64, u8);
     const Vu128 = @Vector(64, u128);
@@ -193,8 +191,6 @@ test "@ctz vectors" {
 }
 
 fn testCtzVectors() !void {
-    if (comptime builtin.cpu.has(.loongarch, .lsx)) return error.SkipZigTest; // https://github.com/llvm/llvm-project/issues/159529
-
     const Vu4 = @Vector(64, u4);
     const Vu8 = @Vector(64, u8);
     @setEvalBranchQuota(10_000);
@@ -2429,7 +2425,6 @@ test "runtime comparison to NaN is comptime-known" {
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
-    if (builtin.cpu.arch.isArm() and builtin.target.abi.float() == .soft) return error.SkipZigTest; // https://github.com/ziglang/zig/issues/21234
 
     const S = struct {
         fn doTheTest(comptime F: type, x: F) void {
@@ -2458,7 +2453,6 @@ test "runtime int comparison to inf is comptime-known" {
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
-    if (builtin.cpu.arch.isArm() and builtin.target.abi.float() == .soft) return error.SkipZigTest; // https://github.com/ziglang/zig/issues/21234
 
     const S = struct {
         fn doTheTest(comptime F: type, x: u32) void {
diff --git a/test/behavior/maximum_minimum.zig b/test/behavior/maximum_minimum.zig
index 1a261f224e..efe2a7f108 100644
--- a/test/behavior/maximum_minimum.zig
+++ b/test/behavior/maximum_minimum.zig
@@ -138,8 +138,6 @@ test "@min/max for floats" {
     };
 
     inline for (.{ f16, f32, f64, f80, f128, c_longdouble }) |T| {
-        if (T == c_longdouble and builtin.cpu.arch.isMIPS64()) return error.SkipZigTest; // https://github.com/ziglang/zig/issues/21090
-
         try S.doTheTest(T);
         try comptime S.doTheTest(T);
     }
diff --git a/test/behavior/packed-struct.zig b/test/behavior/packed-struct.zig
index 21538ba369..2414aec7b0 100644
--- a/test/behavior/packed-struct.zig
+++ b/test/behavior/packed-struct.zig
@@ -1196,7 +1196,6 @@ test "packed struct with signed field" {
 
 test "assign packed struct initialized with RLS to packed struct literal field" {
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
-    if (builtin.zig_backend == .stage2_llvm and builtin.cpu.arch.isWasm()) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest;
 
diff --git a/test/behavior/saturating_arithmetic.zig b/test/behavior/saturating_arithmetic.zig
index 949ad2b460..0e2978bd9b 100644
--- a/test/behavior/saturating_arithmetic.zig
+++ b/test/behavior/saturating_arithmetic.zig
@@ -147,11 +147,6 @@ test "saturating multiplication <= 32 bits" {
     if (builtin.zig_backend == .stage2_c and builtin.cpu.arch.isArm()) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
-    if (builtin.zig_backend == .stage2_llvm and builtin.cpu.arch.isWasm()) {
-        // https://github.com/ziglang/zig/issues/9660
-        return error.SkipZigTest;
-    }
-
     try testSatMul(u8, 0, maxInt(u8), 0);
     try testSatMul(u8, 1 << 7, 1 << 7, maxInt(u8));
     try testSatMul(u8, maxInt(u8) - 1, 2, maxInt(u8));
@@ -246,11 +241,6 @@ test "saturating multiplication" {
     if (builtin.zig_backend == .stage2_c and builtin.cpu.arch.isArm()) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
-    if (builtin.zig_backend == .stage2_llvm and builtin.cpu.arch.isWasm()) {
-        // https://github.com/ziglang/zig/issues/9660
-        return error.SkipZigTest;
-    }
-
     const S = struct {
         fn doTheTest() !void {
             try testSatMul(i8, -3, 10, -30);
diff --git a/test/behavior/struct.zig b/test/behavior/struct.zig
index 1e0907d8f6..2295d06db1 100644
--- a/test/behavior/struct.zig
+++ b/test/behavior/struct.zig
@@ -404,7 +404,6 @@ test "packed struct 24bits" {
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
-    if (builtin.cpu.arch.isArm()) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; // TODO
 
@@ -921,7 +920,6 @@ test "tuple assigned to variable" {
 
 test "comptime struct field" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
-    if (builtin.cpu.arch.isArm()) return error.SkipZigTest; // TODO
 
     const T = struct {
         a: i32,
diff --git a/test/behavior/switch.zig b/test/behavior/switch.zig
index c2a98de528..b832f1aeed 100644
--- a/test/behavior/switch.zig
+++ b/test/behavior/switch.zig
@@ -683,6 +683,7 @@ test "switch on pointer type" {
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_llvm) return error.SkipZigTest; // https://github.com/llvm/llvm-project/issues/176634
 
     const S = struct {
         const X = struct {
diff --git a/test/behavior/union.zig b/test/behavior/union.zig
index 0b7340d0ec..17a6238a0c 100644
--- a/test/behavior/union.zig
+++ b/test/behavior/union.zig
@@ -1592,7 +1592,6 @@ test "memset packed union" {
 
     try comptime S.doTheTest();
 
-    if (builtin.cpu.arch.isWasm()) return error.SkipZigTest; // TODO
     try S.doTheTest();
 }
 
@@ -1750,8 +1749,6 @@ test "reinterpret packed union" {
     try comptime S.doTheTest();
 
     if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
-    if (builtin.cpu.arch.isWasm()) return error.SkipZigTest; // TODO
-    if (builtin.cpu.arch.endian() == .big) return error.SkipZigTest; // https://github.com/ziglang/zig/issues/21050
     try S.doTheTest();
 }
 
diff --git a/test/behavior/vector.zig b/test/behavior/vector.zig
index 2e3e22440a..777730345e 100644
--- a/test/behavior/vector.zig
+++ b/test/behavior/vector.zig
@@ -7,7 +7,6 @@ const expect = std.testing.expect;
 const expectEqual = std.testing.expectEqual;
 
 test "implicit cast vector to array - bool" {
-    if (builtin.cpu.arch == .aarch64_be and builtin.zig_backend == .stage2_llvm) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
 
     const S = struct {
@@ -622,11 +621,6 @@ test "vector bitwise not operator" {
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
-    if (builtin.cpu.arch == .aarch64_be and builtin.zig_backend == .stage2_llvm) {
-        // https://github.com/ziglang/zig/issues/24061
-        return error.SkipZigTest;
-    }
-
     const S = struct {
         fn doTheTestNot(comptime T: type, x: @Vector(4, T)) !void {
             const y = ~x;
@@ -660,11 +654,6 @@ test "vector boolean not operator" {
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
-    if (builtin.cpu.arch == .aarch64_be and builtin.zig_backend == .stage2_llvm) {
-        // https://github.com/ziglang/zig/issues/24061
-        return error.SkipZigTest;
-    }
-
     const S = struct {
         fn doTheTestNot(comptime T: type, x: @Vector(4, T)) !void {
             const y = !x;
@@ -759,7 +748,6 @@ test "vector reduce operation" {
     if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_c and builtin.cpu.arch.isArm()) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
-    if (builtin.zig_backend == .stage2_llvm and builtin.cpu.arch.isMIPS64()) return error.SkipZigTest; // https://github.com/ziglang/zig/issues/21091
     if (builtin.zig_backend == .stage2_llvm and builtin.cpu.arch.isSPARC()) return error.SkipZigTest; // https://github.com/ziglang/zig/issues/23719
 
     const S = struct {
@@ -1096,9 +1084,6 @@ test "saturating multiplication" {
     if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
-    // TODO: once #9660 has been solved, remove this line
-    if (builtin.target.cpu.arch.isWasm()) return error.SkipZigTest;
-
     const S = struct {
         fn doTheTest() !void {
             // Broken out to avoid https://github.com/ziglang/zig/issues/11251
@@ -1165,7 +1150,6 @@ test "@addWithOverflow" {
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest;
-    if (builtin.cpu.arch == .aarch64_be and builtin.zig_backend == .stage2_llvm) return error.SkipZigTest;
 
     const S = struct {
         fn doTheTest() !void {
@@ -1213,7 +1197,6 @@ test "@subWithOverflow" {
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest;
-    if (builtin.cpu.arch == .aarch64_be and builtin.zig_backend == .stage2_llvm) return error.SkipZigTest;
 
     const S = struct {
         fn doTheTest() !void {
@@ -1244,7 +1227,6 @@ test "@mulWithOverflow" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
-    if (builtin.cpu.arch == .aarch64_be and builtin.zig_backend == .stage2_llvm) return error.SkipZigTest;
 
     const S = struct {
         fn doTheTest() !void {
@@ -1265,7 +1247,6 @@ test "@shlWithOverflow" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
-    if (builtin.cpu.arch == .aarch64_be and builtin.zig_backend == .stage2_llvm) return error.SkipZigTest;
 
     const S = struct {
         fn doTheTest() !void {
@@ -1347,7 +1328,6 @@ test "byte vector initialized in inline function" {
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
-    if (builtin.cpu.arch == .aarch64_be and builtin.zig_backend == .stage2_llvm) return error.SkipZigTest;
     if (builtin.cpu.arch == .hexagon and builtin.zig_backend == .stage2_llvm) return error.SkipZigTest;
 
     if (builtin.zig_backend == .stage2_llvm and builtin.cpu.arch == .x86_64 and comptime builtin.cpu.has(.x86, .avx512f)) {
@@ -1463,7 +1443,6 @@ test "store packed vector element" {
     if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
-    if (builtin.cpu.arch == .aarch64_be and builtin.zig_backend == .stage2_llvm) return error.SkipZigTest;
 
     var v = @Vector(4, u1){ 1, 1, 1, 1 };
     try expectEqual(@Vector(4, u1){ 1, 1, 1, 1 }, v);
@@ -1496,7 +1475,6 @@ test "store vector with memset" {
     if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
-    if (builtin.cpu.arch == .aarch64_be and builtin.zig_backend == .stage2_llvm) return error.SkipZigTest;
 
     var a: [5]@Vector(2, i1) = undefined;
     var b: [5]@Vector(2, u2) = undefined;
@@ -1596,7 +1574,6 @@ test "bitcast to vector with different child type" {
         }
     };
 
-    // Originally reported at https://github.com/ziglang/zig/issues/8184
     try S.doTheTest();
     try comptime S.doTheTest();
 }
diff --git a/test/behavior/wrapping_arithmetic.zig b/test/behavior/wrapping_arithmetic.zig
index 7c5d60cc41..247023c17a 100644
--- a/test/behavior/wrapping_arithmetic.zig
+++ b/test/behavior/wrapping_arithmetic.zig
@@ -82,9 +82,6 @@ test "wrapping multiplication" {
     if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
 
-    // TODO: once #9660 has been solved, remove this line
-    if (builtin.cpu.arch.isWasm()) return error.SkipZigTest;
-
     const S = struct {
         fn doTheTest() !void {
             try testWrapMul(i8, -3, 10, -30);
diff --git a/test/c_abi/cfuncs.c b/test/c_abi/cfuncs.c
index fc708f343e..64e34fc0b9 100644
--- a/test/c_abi/cfuncs.c
+++ b/test/c_abi/cfuncs.c
@@ -65,6 +65,10 @@ static void assert_or_panic(bool ok) {
 #  define ZIG_NO_COMPLEX
 #endif
 
+#if defined(__loongarch__) && defined(__loongarch_soft_float)
+#  define ZIG_NO_COMPLEX
+#endif
+
 #ifdef __powerpc__
 #  define ZIG_NO_COMPLEX
 #endif
diff --git a/test/c_abi/main.zig b/test/c_abi/main.zig
index 1f221260b4..138e9700c7 100644
--- a/test/c_abi/main.zig
+++ b/test/c_abi/main.zig
@@ -187,7 +187,8 @@ extern fn c_cmultd(a: ComplexDouble, b: ComplexDouble) ComplexDouble;
 const complex_abi_compatible = builtin.cpu.arch != .x86 and !builtin.cpu.arch.isMIPS() and
     !builtin.cpu.arch.isArm() and !builtin.cpu.arch.isPowerPC32() and !builtin.cpu.arch.isRISCV() and
     builtin.cpu.arch != .hexagon and
-    builtin.cpu.arch != .s390x;
+    builtin.cpu.arch != .s390x and
+    !(builtin.cpu.arch.isLoongArch() and builtin.abi.float() == .soft);
 
 test "C ABI complex float" {
     if (!complex_abi_compatible) return error.SkipZigTest;
@@ -5676,6 +5677,7 @@ test "C ABI pointer sized float struct" {
     if (builtin.cpu.arch.isMIPS64()) return error.SkipZigTest;
     if (builtin.cpu.arch.isPowerPC32()) return error.SkipZigTest;
     if (builtin.cpu.arch.isArm() and builtin.abi.float() == .soft) return error.SkipZigTest;
+    if (builtin.cpu.arch.isLoongArch() and builtin.abi.float() == .soft) return error.SkipZigTest;
     if (builtin.cpu.arch == .s390x) return error.SkipZigTest;
 
     c_ptr_size_float_struct(.{ .x = 1, .y = 2 });
diff --git a/test/llvm_targets.zig b/test/llvm_targets.zig
index 140f4ab185..6084acb85e 100644
--- a/test/llvm_targets.zig
+++ b/test/llvm_targets.zig
@@ -52,6 +52,7 @@ const targets = [_]std.Target.Query{
     .{ .cpu_arch = .arm, .os_tag = .freebsd, .abi = .eabihf },
     .{ .cpu_arch = .arm, .os_tag = .freestanding, .abi = .eabi },
     .{ .cpu_arch = .arm, .os_tag = .freestanding, .abi = .eabihf },
+    .{ .cpu_arch = .arm, .os_tag = .fuchsia, .abi = .eabihf },
     .{ .cpu_arch = .arm, .os_tag = .haiku, .abi = .eabi },
     .{ .cpu_arch = .arm, .os_tag = .haiku, .abi = .eabihf },
     .{ .cpu_arch = .arm, .os_tag = .linux, .abi = .androideabi },
@@ -99,12 +100,19 @@ const targets = [_]std.Target.Query{
     // .{ .cpu_arch = .csky, .os_tag = .linux, .abi = .gnueabihf },
 
     .{ .cpu_arch = .hexagon, .os_tag = .freestanding, .abi = .none },
+    .{ .cpu_arch = .hexagon, .os_tag = .linux, .abi = .musl },
     .{ .cpu_arch = .hexagon, .os_tag = .linux, .abi = .none },
 
     .{ .cpu_arch = .lanai, .os_tag = .freestanding, .abi = .none },
 
-    // .{ .cpu_arch = .loongarch32, .os_tag = .freestanding, .abi = .none },
-    // .{ .cpu_arch = .loongarch32, .os_tag = .linux, .abi = .none },
+    .{ .cpu_arch = .loongarch32, .os_tag = .freestanding, .abi = .none },
+    // .{ .cpu_arch = .loongarch32, .os_tag = .linux, .abi = .gnu },
+    // .{ .cpu_arch = .loongarch32, .os_tag = .linux, .abi = .gnuf32 },
+    // .{ .cpu_arch = .loongarch32, .os_tag = .linux, .abi = .gnusf },
+    // .{ .cpu_arch = .loongarch32, .os_tag = .linux, .abi = .musl },
+    // .{ .cpu_arch = .loongarch32, .os_tag = .linux, .abi = .muslf32 },
+    // .{ .cpu_arch = .loongarch32, .os_tag = .linux, .abi = .muslsf },
+    .{ .cpu_arch = .loongarch32, .os_tag = .linux, .abi = .none },
     // .{ .cpu_arch = .loongarch32, .os_tag = .uefi, .abi = .none },
 
     .{ .cpu_arch = .loongarch64, .os_tag = .freestanding, .abi = .none },
@@ -172,6 +180,7 @@ const targets = [_]std.Target.Query{
 
     .{ .cpu_arch = .nvptx, .os_tag = .cuda, .abi = .none },
     .{ .cpu_arch = .nvptx, .os_tag = .nvcl, .abi = .none },
+
     .{ .cpu_arch = .nvptx64, .os_tag = .cuda, .abi = .none },
     .{ .cpu_arch = .nvptx64, .os_tag = .nvcl, .abi = .none },
 
@@ -255,6 +264,7 @@ const targets = [_]std.Target.Query{
 
     .{ .cpu_arch = .thumb, .os_tag = .freestanding, .abi = .eabi },
     .{ .cpu_arch = .thumb, .os_tag = .freestanding, .abi = .eabihf },
+    .{ .cpu_arch = .thumb, .os_tag = .fuchsia, .abi = .eabihf },
     .{ .cpu_arch = .thumb, .os_tag = .linux, .abi = .eabi },
     .{ .cpu_arch = .thumb, .os_tag = .linux, .abi = .eabihf },
     .{ .cpu_arch = .thumb, .os_tag = .linux, .abi = .musleabi },
@@ -342,8 +352,8 @@ const targets = [_]std.Target.Query{
 
     .{ .cpu_arch = .xcore, .os_tag = .freestanding, .abi = .none },
 
-    // .{ .cpu_arch = .xtensa, .os_tag = .freestanding, .abi = .none },
-    // .{ .cpu_arch = .xtensa, .os_tag = .linux, .abi = .none },
+    .{ .cpu_arch = .xtensa, .os_tag = .freestanding, .abi = .none },
+    .{ .cpu_arch = .xtensa, .os_tag = .linux, .abi = .none },
 };
 
 pub fn addCases(
diff --git a/test/tests.zig b/test/tests.zig
index 061c75e0e5..5e336a6650 100644
--- a/test/tests.zig
+++ b/test/tests.zig
@@ -410,6 +410,14 @@ const module_test_targets = blk: {
             .extra_target = true,
         },
 
+        .{
+            .target = .{
+                .cpu_arch = .loongarch32,
+                .os_tag = .linux,
+                .abi = .none,
+            },
+        },
+
         .{
             .target = .{
                 .cpu_arch = .loongarch64,
@@ -435,6 +443,25 @@ const module_test_targets = blk: {
             .link_libc = true,
             .extra_target = true,
         },
+        .{
+            .target = .{
+                .cpu_arch = .loongarch64,
+                .os_tag = .linux,
+                .abi = .muslsf,
+            },
+            .link_libc = true,
+            .extra_target = true,
+        },
+        .{
+            .target = .{
+                .cpu_arch = .loongarch64,
+                .os_tag = .linux,
+                .abi = .muslsf,
+            },
+            .linkage = .dynamic,
+            .link_libc = true,
+            .extra_target = true,
+        },
         .{
             .target = .{
                 .cpu_arch = .loongarch64,
@@ -443,6 +470,15 @@ const module_test_targets = blk: {
             },
             .link_libc = true,
         },
+        .{
+            .target = .{
+                .cpu_arch = .loongarch64,
+                .os_tag = .linux,
+                .abi = .gnusf,
+            },
+            .link_libc = true,
+            .extra_target = true,
+        },
 
         .{
             .target = .{
@@ -1713,14 +1749,13 @@ const c_abi_targets = blk: {
             },
         },
 
-        // https://gitlab.com/qemu-project/qemu/-/issues/3291
-        // .{
-        //     .target = .{
-        //         .cpu_arch = .hexagon,
-        //         .os_tag = .linux,
-        //         .abi = .musl,
-        //     },
-        // },
+        .{
+            .target = .{
+                .cpu_arch = .hexagon,
+                .os_tag = .linux,
+                .abi = .musl,
+            },
+        },
 
         .{
             .target = .{
@@ -1729,6 +1764,13 @@ const c_abi_targets = blk: {
                 .abi = .musl,
             },
         },
+        .{
+            .target = .{
+                .cpu_arch = .loongarch64,
+                .os_tag = .linux,
+                .abi = .muslsf,
+            },
+        },
 
         .{
             .target = .{
diff --git a/tools/update_clang_options.zig b/tools/update_clang_options.zig
index 49b8e72f58..af42e362a7 100644
--- a/tools/update_clang_options.zig
+++ b/tools/update_clang_options.zig
@@ -675,7 +675,7 @@ pub fn main(init: std.process.Init) !void {
     const child_args = [_][]const u8{
         llvm_tblgen_exe,
         "--dump-json",
-        try std.fmt.allocPrint(arena, "{s}/clang/include/clang/Driver/Options.td", .{llvm_src_root}),
+        try std.fmt.allocPrint(arena, "{s}/clang/include/clang/Options/Options.td", .{llvm_src_root}),
         try std.fmt.allocPrint(arena, "-I={s}/llvm/include", .{llvm_src_root}),
         try std.fmt.allocPrint(arena, "-I={s}/clang/include/clang/Driver", .{llvm_src_root}),
     };
diff --git a/tools/update_cpu_features.zig b/tools/update_cpu_features.zig
index 8a8e15558b..5eb2386181 100644
--- a/tools/update_cpu_features.zig
+++ b/tools/update_cpu_features.zig
@@ -203,6 +203,10 @@ const targets = [_]ArchTarget{
                 .llvm_name = "ampere1a",
                 .flatten = true,
             },
+            .{
+                .llvm_name = "ampere1c",
+                .flatten = true,
+            },
             .{
                 .llvm_name = "apple-a7",
                 .flatten = true,
@@ -247,6 +251,26 @@ const targets = [_]ArchTarget{
                 .llvm_name = "apple-m4",
                 .flatten = true,
             },
+            .{
+                .llvm_name = "apple-m5",
+                .flatten = true,
+            },
+            .{
+                .llvm_name = "c1-nano",
+                .flatten = true,
+            },
+            .{
+                .llvm_name = "c1-premium",
+                .flatten = true,
+            },
+            .{
+                .llvm_name = "c1-pro",
+                .flatten = true,
+            },
+            .{
+                .llvm_name = "c1-ultra",
+                .flatten = true,
+            },
             .{
                 .llvm_name = "carmel",
                 .flatten = true,
@@ -862,6 +886,10 @@ const targets = [_]ArchTarget{
                 .llvm_name = "armv9.6-a",
                 .zig_name = "v9_6a",
             },
+            .{
+                .llvm_name = "armv9.7-a",
+                .zig_name = "v9_7a",
+            },
             .{
                 .llvm_name = "armv9-a",
                 .zig_name = "v9a",
@@ -982,6 +1010,10 @@ const targets = [_]ArchTarget{
                 .llvm_name = "v9.6a",
                 .zig_name = "has_v9_6a",
             },
+            .{
+                .llvm_name = "v9.7a",
+                .zig_name = "has_v9_7a",
+            },
         },
         .extra_cpus = &.{
             .{
@@ -1249,6 +1281,24 @@ const targets = [_]ArchTarget{
             .td_name = "LoongArch",
         },
         .extra_cpus = &.{
+            .{
+                .llvm_name = null,
+                .zig_name = "la32v1_0",
+                .features = &.{
+                    "32bit",
+                    "32s",
+                    "d",
+                    "ual",
+                },
+            },
+            .{
+                .llvm_name = null,
+                .zig_name = "la32rv1_0",
+                .features = &.{
+                    "32bit",
+                    "ual",
+                },
+            },
             .{
                 .llvm_name = null,
                 .zig_name = "la64v1_0",
@@ -1276,6 +1326,7 @@ const targets = [_]ArchTarget{
         },
         .omit_cpus = &.{
             "generic",
+            "loongarch32",
             "loongarch64",
         },
     },
@@ -1557,26 +1608,17 @@ const targets = [_]ArchTarget{
                 .llvm_name = "64bit-mode",
                 .omit = true,
             },
-            // Remove these when LLVM removes AVX10.N-256 support.
-            .{
-                .llvm_name = "avx10.1-256",
-                .flatten = true,
-            },
-            .{
-                .llvm_name = "avx10.2-256",
-                .flatten = true,
-            },
             .{
                 .llvm_name = "avx10.1-512",
-                .zig_name = "avx10_1",
+                .omit = true,
             },
             .{
                 .llvm_name = "avx10.2-512",
-                .zig_name = "avx10_2",
+                .omit = true,
             },
             .{
-                .llvm_name = "avx512f",
-                .extra_deps = &.{"evex512"},
+                .llvm_name = "evex512",
+                .omit = true,
             },
             .{
                 .llvm_name = "alderlake",